summaryrefslogtreecommitdiff
path: root/doc/gawktexi.in
diff options
context:
space:
mode:
Diffstat (limited to 'doc/gawktexi.in')
-rw-r--r--doc/gawktexi.in41021
1 files changed, 41021 insertions, 0 deletions
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
new file mode 100644
index 00000000..34c47270
--- /dev/null
+++ b/doc/gawktexi.in
@@ -0,0 +1,41021 @@
+\input texinfo @c -*-texinfo-*-
+@c vim: filetype=texinfo
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename gawk.info
+@settitle The GNU Awk User's Guide
+@c %**end of header (This is for running Texinfo on a region.)
+
+@dircategory Text creation and manipulation
+@direntry
+* Gawk: (gawk). A text scanning and processing language.
+@end direntry
+@dircategory Individual utilities
+@direntry
+* awk: (gawk)Invoking gawk. Text scanning and processing.
+@end direntry
+
+@ifset FOR_PRINT
+@tex
+\gdef\xrefprintnodename#1{``#1''}
+@end tex
+@end ifset
+
+@ifclear FOR_PRINT
+@c With early 2014 texinfo.tex, restore PDF links and colors
+@tex
+\gdef\linkcolor{0.5 0.09 0.12} % Dark Red
+\gdef\urlcolor{0.5 0.09 0.12} % Also
+\global\urefurlonlylinktrue
+@end tex
+@end ifclear
+
+@ifnotdocbook
+@set BULLET @bullet{}
+@set MINUS @minus{}
+@end ifnotdocbook
+
+@ifdocbook
+@set BULLET
+@set MINUS
+@end ifdocbook
+
+@set xref-automatic-section-title
+
+@c The following information should be updated here only!
+@c This sets the edition of the document, the version of gawk it
+@c applies to and all the info about who's publishing this edition
+
+@c These apply across the board.
+@set UPDATE-MONTH September, 2014
+@set VERSION 4.1
+@set PATCHLEVEL 2
+
+@ifset FOR_PRINT
+@set TITLE Effective awk Programming
+@end ifset
+@ifclear FOR_PRINT
+@set TITLE GAWK: Effective AWK Programming
+@end ifclear
+@set SUBTITLE A User's Guide for GNU Awk
+@set EDITION 4.1
+
+@iftex
+@set DOCUMENT book
+@set CHAPTER chapter
+@set APPENDIX appendix
+@set SECTION section
+@set SUBSECTION subsection
+@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}}
+@set COMMONEXT (c.e.)
+@set PAGE page
+@end iftex
+@ifinfo
+@set DOCUMENT Info file
+@set CHAPTER major node
+@set APPENDIX major node
+@set SECTION minor node
+@set SUBSECTION node
+@set DARKCORNER (d.c.)
+@set COMMONEXT (c.e.)
+@set PAGE screen
+@end ifinfo
+@ifhtml
+@set DOCUMENT Web page
+@set CHAPTER chapter
+@set APPENDIX appendix
+@set SECTION section
+@set SUBSECTION subsection
+@set DARKCORNER (d.c.)
+@set COMMONEXT (c.e.)
+@set PAGE screen
+@end ifhtml
+@ifdocbook
+@set DOCUMENT book
+@set CHAPTER chapter
+@set APPENDIX appendix
+@set SECTION section
+@set SUBSECTION subsection
+@set DARKCORNER (d.c.)
+@set COMMONEXT (c.e.)
+@set PAGE page
+@end ifdocbook
+@ifxml
+@set DOCUMENT book
+@set CHAPTER chapter
+@set APPENDIX appendix
+@set SECTION section
+@set SUBSECTION subsection
+@set DARKCORNER (d.c.)
+@set COMMONEXT (c.e.)
+@set PAGE page
+@end ifxml
+@ifplaintext
+@set DOCUMENT book
+@set CHAPTER chapter
+@set APPENDIX appendix
+@set SECTION section
+@set SUBSECTION subsection
+@set DARKCORNER (d.c.)
+@set COMMONEXT (c.e.)
+@set PAGE page
+@end ifplaintext
+
+@ifdocbook
+@c empty on purpose
+@set PART1
+@set PART2
+@set PART3
+@set PART4
+@end ifdocbook
+
+@ifnotdocbook
+@set PART1 Part I:@*
+@set PART2 Part II:@*
+@set PART3 Part III:@*
+@set PART4 Part IV:@*
+@end ifnotdocbook
+
+@c some special symbols
+@iftex
+@set LEQ @math{@leq}
+@set PI @math{@pi}
+@end iftex
+@ifdocbook
+@set LEQ @inlineraw{docbook, ≤}
+@set PI @inlineraw{docbook, &pgr;}
+@end ifdocbook
+@ifnottex
+@ifnotdocbook
+@set LEQ <=
+@set PI @i{pi}
+@end ifnotdocbook
+@end ifnottex
+
+@ifnottex
+@ifnotdocbook
+@macro ii{text}
+@i{\text\}
+@end macro
+@end ifnotdocbook
+@end ifnottex
+
+@ifdocbook
+@macro ii{text}
+@inlineraw{docbook,<lineannotation>\text\</lineannotation>}
+@end macro
+@end ifdocbook
+
+@c hack for docbook, where comma shouldn't always follow an @ref{}
+@ifdocbook
+@macro DBREF{text}
+@ref{\text\}
+@end macro
+@macro DBXREF{text}
+@xref{\text\}
+@end macro
+@macro DBPXREF{text}
+@pxref{\text\}
+@end macro
+@end ifdocbook
+
+@ifnotdocbook
+@macro DBREF{text}
+@ref{\text\},
+@end macro
+@macro DBXREF{text}
+@xref{\text\},
+@end macro
+@macro DBPXREF{text}
+@pxref{\text\},
+@end macro
+@end ifnotdocbook
+
+@ifclear FOR_PRINT
+@set FN file name
+@set FFN File Name
+@set DF data file
+@set DDF Data File
+@set PVERSION version
+@end ifclear
+@ifset FOR_PRINT
+@set FN filename
+@set FFN Filename
+@set DF datafile
+@set DDF Datafile
+@set PVERSION version
+@end ifset
+
+@c For HTML, spell out email addresses, to avoid problems with
+@c address harvesters for spammers.
+@ifhtml
+@macro EMAIL{real,spelled}
+``\spelled\''
+@end macro
+@end ifhtml
+@ifnothtml
+@macro EMAIL{real,spelled}
+@email{\real\}
+@end macro
+@end ifnothtml
+
+@c Indexing macros
+@ifinfo
+
+@macro cindexawkfunc{name}
+@cindex @code{\name\}
+@end macro
+
+@macro cindexgawkfunc{name}
+@cindex @code{\name\}
+@end macro
+
+@end ifinfo
+
+@ifnotinfo
+
+@macro cindexawkfunc{name}
+@cindex @code{\name\()} function
+@end macro
+
+@macro cindexgawkfunc{name}
+@cindex @code{\name\()} function (@command{gawk})
+@end macro
+@end ifnotinfo
+
+@ignore
+Some comments on the layout for TeX.
+1. Use at least texinfo.tex 2014-01-30.15
+2. When using @docbook, if the last line is part of a paragraph, end
+it with a space and @c so that the lines won't run together. This is a
+quirk of the language / makeinfo, and isn't going to change.
+@end ignore
+
+@c merge the function and variable indexes into the concept index
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+@ifxml
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end ifxml
+@ifdocbook
+@synindex fn cp
+@synindex vr cp
+@end ifdocbook
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long. Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@iftex
+@finalout
+@end iftex
+
+@copying
+@docbook
+<para>
+&ldquo;To boldly go where no man has gone before&rdquo; is a
+Registered Trademark of Paramount Pictures Corporation.</para>
+
+<para>Published by:</para>
+
+<literallayout class="normal">Free Software Foundation
+51 Franklin Street, Fifth Floor
+Boston, MA 02110-1301 USA
+Phone: +1-617-542-5942
+Fax: +1-617-542-2652
+Email: <email>gnu@@gnu.org</email>
+URL: <ulink url="http://www.gnu.org">http://www.gnu.org/</ulink></literallayout>
+
+<literallayout class="normal">Copyright &copy; 1989, 1991, 1992, 1993, 1996&ndash;2005, 2007, 2009&ndash;2014
+Free Software Foundation, Inc.
+All Rights Reserved.</literallayout>
+@end docbook
+
+@ifnotdocbook
+Copyright @copyright{} 1989, 1991, 1992, 1993, 1996--2005, 2007, 2009--2015 @*
+Free Software Foundation, Inc.
+@end ifnotdocbook
+@sp 2
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}: @value{SUBTITLE}},
+for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU
+implementation of AWK.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being ``GNU General Public License'', with the
+Front-Cover Texts being ``A GNU Manual'', and with the Back-Cover Texts
+as in (a) below.
+@ifclear FOR_PRINT
+A copy of the license is included in the section entitled
+``GNU Free Documentation License''.
+@end ifclear
+@ifset FOR_PRINT
+A copy of the license
+may be found on the Internet at
+@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
+the GNU Project's website}.
+@end ifset
+
+@enumerate a
+@item
+The FSF's Back-Cover Text is: ``You have the freedom to
+copy and modify this GNU manual.''
+@end enumerate
+@end copying
+
+@c Comment out the "smallbook" for technical review. Saves
+@c considerable paper. Remember to turn it back on *before*
+@c starting the page-breaking work.
+
+@c 4/2002: Karl Berry recommends commenting out this and the
+@c `@setchapternewpage odd', and letting users use `texi2dvi -t'
+@c if they want to waste paper.
+@c @smallbook
+
+
+@c Uncomment this for the release. Leaving it off saves paper
+@c during editing and review.
+@setchapternewpage odd
+
+@shorttitlepage GNU Awk
+@titlepage
+@title @value{TITLE}
+@subtitle @value{SUBTITLE}
+@subtitle Edition @value{EDITION}
+@subtitle @value{UPDATE-MONTH}
+@author Arnold D. Robbins
+
+@ifnotdocbook
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off. Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+``To boldly go where no man has gone before'' is a
+Registered Trademark of Paramount Pictures Corporation. @*
+@c sorry, i couldn't resist
+@sp 3
+Published by:
+@sp 1
+
+Free Software Foundation @*
+51 Franklin Street, Fifth Floor @*
+Boston, MA 02110-1301 USA @*
+Phone: +1-617-542-5942 @*
+Fax: +1-617-542-2652 @*
+Email: @email{gnu@@gnu.org} @*
+URL: @uref{http://www.gnu.org/} @*
+
+@c This one is correct for gawk 3.1.0 from the FSF
+ISBN 1-882114-28-0 @*
+@sp 2
+@insertcopying
+@end ifnotdocbook
+@end titlepage
+
+@c Thanks to Bob Chassell for directions on doing dedications.
+@iftex
+@headings off
+@page
+@w{ }
+@sp 9
+@center @i{To my parents, for their love, and for the wonderful example they set for me.}
+@sp 1
+@center @i{To my wife, Miriam, for making me complete.
+Thank you for building your life together with me.}
+@sp 1
+@center @i{To our children, Chana, Rivka, Nachum, and Malka, for enrichening our lives in innumerable ways.}
+@sp 1
+@w{ }
+@page
+@w{ }
+@page
+@headings on
+@end iftex
+
+@docbook
+<dedication>
+<para>To my parents, for their love, and for the wonderful
+example they set for me.</para>
+<para>To my wife Miriam, for making me complete.
+Thank you for building your life together with me.</para>
+<para>To our children Chana, Rivka, Nachum and Malka,
+for enrichening our lives in innumerable ways.</para>
+</dedication>
+@end docbook
+
+@iftex
+@headings off
+@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @|
+@oddheading @| @| @strong{@thischapter}@ @ @ @thispage
+@end iftex
+
+@ifnottex
+@ifnotxml
+@ifnotdocbook
+@node Top
+@top General Introduction
+@c Preface node should come right after the Top
+@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
+@c Licensing nodes are appendices, they're not central to AWK.
+
+This file documents @command{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+@insertcopying
+
+@end ifnotdocbook
+@end ifnotxml
+@end ifnottex
+
+@menu
+* Foreword3:: Some nice words about this
+ @value{DOCUMENT}.
+* Foreword4:: More nice words.
+* Preface:: What this @value{DOCUMENT} is about; brief
+ history and acknowledgments.
+* Getting Started:: A basic introduction to using
+ @command{awk}. How to run an @command{awk}
+ program. Command-line syntax.
+* Invoking Gawk:: How to run @command{gawk}.
+* Regexp:: All about matching things using regular
+ expressions.
+* Reading Files:: How to read files and manipulate fields.
+* Printing:: How to print using @command{awk}. Describes
+ the @code{print} and @code{printf}
+ statements. Also describes redirection of
+ output.
+* Expressions:: Expressions are the basic building blocks
+ of statements.
+* Patterns and Actions:: Overviews of patterns and actions.
+* Arrays:: The description and use of arrays. Also
+ includes array-oriented control statements.
+* Functions:: Built-in and user-defined functions.
+* Library Functions:: A Library of @command{awk} Functions.
+* Sample Programs:: Many @command{awk} programs with complete
+ explanations.
+* Advanced Features:: Stuff for advanced users, specific to
+ @command{gawk}.
+* Internationalization:: Getting @command{gawk} to speak your
+ language.
+* Debugger:: The @code{gawk} debugger.
+* Arbitrary Precision Arithmetic:: Arbitrary precision arithmetic with
+ @command{gawk}.
+* Dynamic Extensions:: Adding new built-in functions to
+ @command{gawk}.
+* Language History:: The evolution of the @command{awk}
+ language.
+* Installation:: Installing @command{gawk} under various
+ operating systems.
+* Notes:: Notes about adding things to @command{gawk}
+ and possible future work.
+* Basic Concepts:: A very quick introduction to programming
+ concepts.
+* Glossary:: An explanation of some unfamiliar terms.
+* Copying:: Your right to copy and distribute
+ @command{gawk}.
+* GNU Free Documentation License:: The license for this @value{DOCUMENT}.
+* Index:: Concept and Variable Index.
+
+@detailmenu
+* History:: The history of @command{gawk} and
+ @command{awk}.
+* Names:: What name to use to find
+ @command{awk}.
+* This Manual:: Using this @value{DOCUMENT}. Includes
+ sample input files that you can use.
+* Conventions:: Typographical Conventions.
+* Manual History:: Brief history of the GNU project and
+ this @value{DOCUMENT}.
+* How To Contribute:: Helping to save the world.
+* Acknowledgments:: Acknowledgments.
+* Running gawk:: How to run @command{gawk} programs;
+ includes command-line syntax.
+* One-shot:: Running a short throwaway
+ @command{awk} program.
+* Read Terminal:: Using no input files (input from the
+ keyboard instead).
+* Long:: Putting permanent @command{awk}
+ programs in files.
+* Executable Scripts:: Making self-contained @command{awk}
+ programs.
+* Comments:: Adding documentation to @command{gawk}
+ programs.
+* Quoting:: More discussion of shell quoting
+ issues.
+* DOS Quoting:: Quoting in Windows Batch Files.
+* Sample Data Files:: Sample data files for use in the
+ @command{awk} programs illustrated in
+ this @value{DOCUMENT}.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example using
+ two rules.
+* More Complex:: A more complex example.
+* Statements/Lines:: Subdividing or combining statements
+ into lines.
+* Other Features:: Other Features of @command{awk}.
+* When:: When to use @command{gawk} and when to
+ use other things.
+* Intro Summary:: Summary of the introduction.
+* Command Line:: How to run @command{awk}.
+* Options:: Command-line options and their
+ meanings.
+* Other Arguments:: Input file names and variable
+ assignments.
+* Naming Standard Input:: How to specify standard input with
+ other files.
+* Environment Variables:: The environment variables
+ @command{gawk} uses.
+* AWKPATH Variable:: Searching directories for
+ @command{awk} programs.
+* AWKLIBPATH Variable:: Searching directories for
+ @command{awk} shared libraries.
+* Other Environment Variables:: The environment variables.
+* Exit Status:: @command{gawk}'s exit status.
+* Include Files:: Including other files into your
+ program.
+* Loading Shared Libraries:: Loading shared libraries into your
+ program.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
+* Regexp Usage:: How to Use Regular Expressions.
+* Escape Sequences:: How to write nonprinting characters.
+* Regexp Operators:: Regular Expression Operators.
+* Bracket Expressions:: What can go between @samp{[...]}.
+* Leftmost Longest:: How much text matches.
+* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
+* Records:: Controlling how data is split into
+ records.
+* awk split records:: How standard @command{awk} splits
+ records.
+* gawk split records:: How @command{gawk} splits records.
+* Fields:: An introduction to fields.
+* Nonconstant Fields:: Nonconstant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change
+ it.
+* Default Field Splitting:: How fields are normally separated.
+* Regexp Field Splitting:: Using regexps as the field separator.
+* Single Character Fields:: Making each character a separate
+ field.
+* Command Line Field Separator:: Setting @code{FS} from the command
+ line.
+* Full Line Fields:: Making the full line be a single
+ field.
+* Field Splitting Summary:: Some final points and a summary table.
+* Constant Size:: Reading constant width data.
+* Splitting By Content:: Defining Fields By Content
+* Multiple Line:: Reading multiline records.
+* Getline:: Reading files under explicit program
+ control using the @code{getline}
+ function.
+* Plain Getline:: Using @code{getline} with no
+ arguments.
+* Getline/Variable:: Using @code{getline} into a variable.
+* Getline/File:: Using @code{getline} from a file.
+* Getline/Variable/File:: Using @code{getline} into a variable
+ from a file.
+* Getline/Pipe:: Using @code{getline} from a pipe.
+* Getline/Variable/Pipe:: Using @code{getline} into a variable
+ from a pipe.
+* Getline/Coprocess:: Using @code{getline} from a coprocess.
+* Getline/Variable/Coprocess:: Using @code{getline} into a variable
+ from a coprocess.
+* Getline Notes:: Important things to know about
+ @code{getline}.
+* Getline Summary:: Summary of @code{getline} Variants.
+* Read Timeout:: Reading input with a timeout.
+* Command-line directories:: What happens if you put a directory on
+ the command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print}
+ statements.
+* Output Separators:: The output separators and how to
+ change them.
+* OFMT:: Controlling Numeric Output With
+ @code{print}.
+* Printf:: The @code{printf} statement.
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+* Redirection:: How to redirect output to multiple
+ files and pipes.
+* Special FD:: Special files for I/O.
+* Special Files:: File name interpretation in
+ @command{gawk}. @command{gawk} allows
+ access to inherited file descriptors.
+* Other Inherited Files:: Accessing other open files with
+ @command{gawk}.
+* Special Network:: Special files for network
+ communications.
+* Special Caveats:: Things to watch out for.
+* Close Files And Pipes:: Closing Input and Output Files and
+ Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
+* Values:: Constants, Variables, and Regular
+ Expressions.
+* Constants:: String, numeric and regexp constants.
+* Scalar Constants:: Numeric and string constants.
+* Nondecimal-numbers:: What are octal and hex numbers.
+* Regexp Constants:: Regular Expression constants.
+* Using Constant Regexps:: When and how to use a regexp constant.
+* Variables:: Variables give names to values for
+ later use.
+* Using Variables:: Using variables in your programs.
+* Assignment Options:: Setting variables on the command line
+ and a summary of command-line syntax.
+ This is an advanced method of input.
+* Conversion:: The conversion of strings to numbers
+ and vice versa.
+* Strings And Numbers:: How @command{awk} Converts Between
+ Strings And Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
+* All Operators:: @command{gawk}'s operators.
+* Arithmetic Ops:: Arithmetic operations (@samp{+},
+ @samp{-}, etc.)
+* Concatenation:: Concatenating strings.
+* Assignment Ops:: Changing the value of a variable or a
+ field.
+* Increment Ops:: Incrementing the numeric value of a
+ variable.
+* Truth Values and Conditions:: Testing for true and false.
+* Truth Values:: What is ``true'' and what is
+ ``false''.
+* Typing and Comparison:: How variables acquire types and how
+ this affects comparison of numbers and
+ strings with @samp{<}, etc.
+* Variable Typing:: String type versus numeric type.
+* Comparison Operators:: The comparison operators.
+* POSIX String Comparison:: String comparison with POSIX rules.
+* Boolean Ops:: Combining comparison expressions using
+ boolean operators @samp{||} (``or''),
+ @samp{&&} (``and'') and @samp{!}
+ (``not'').
+* Conditional Exp:: Conditional expressions select between
+ two subexpressions under control of a
+ third subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
+* Pattern Overview:: What goes into a pattern.
+* Regexp Patterns:: Using regexps as patterns.
+* Expression Patterns:: Any expression can be used as a
+ pattern.
+* Ranges:: Pairs of patterns specify record
+ ranges.
+* BEGIN/END:: Specifying initialization and cleanup
+ rules.
+* Using BEGIN/END:: How and why to use BEGIN/END rules.
+* I/O And BEGIN/END:: I/O issues in BEGIN/END rules.
+* BEGINFILE/ENDFILE:: Two special patterns for advanced
+ control.
+* Empty:: The empty pattern, which matches every
+ record.
+* Using Shell Variables:: How to use shell variables with
+ @command{awk}.
+* Action Overview:: What goes into an action.
+* Statements:: Describes the various control
+ statements in detail.
+* If Statement:: Conditionally execute some
+ @command{awk} statements.
+* While Statement:: Loop until some condition is
+ satisfied.
+* Do Statement:: Do specified action while looping
+ until some condition is satisfied.
+* For Statement:: Another looping statement, that
+ provides initialization and increment
+ clauses.
+* Switch Statement:: Switch/case evaluation for conditional
+ execution of statements based on a
+ value.
+* Break Statement:: Immediately exit the innermost
+ enclosing loop.
+* Continue Statement:: Skip to the end of the innermost
+ enclosing loop.
+* Next Statement:: Stop processing the current input
+ record.
+* Nextfile Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @command{awk}.
+* Built-in Variables:: Summarizes the predefined variables.
+* User-modified:: Built-in variables that you change to
+ control @command{awk}.
+* Auto-set:: Built-in variables where @command{awk}
+ gives you information.
+* ARGC and ARGV:: Ways to use @code{ARGC} and
+ @code{ARGV}.
+* Pattern Action Summary:: Patterns and Actions summary.
+* Array Basics:: The basics of arrays.
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an
+ array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for}
+ statement. It loops through the
+ indices of an array's existing
+ elements.
+* Controlling Scanning:: Controlling the order in which arrays
+ are scanned.
+* Numeric Array Subscripts:: How to use numbers as subscripts in
+ @command{awk}.
+* Uninitialized Subscripts:: Using Uninitialized variables as
+ subscripts.
+* Delete:: The @code{delete} statement removes an
+ element from an array.
+* Multidimensional:: Emulating multidimensional arrays in
+ @command{awk}.
+* Multiscanning:: Scanning multidimensional arrays.
+* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
+* Built-in:: Summarizes the built-in functions.
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers,
+ including @code{int()}, @code{sin()}
+ and @code{rand()}.
+* String Functions:: Functions for string manipulation,
+ such as @code{split()}, @code{match()}
+ and @code{sprintf()}.
+* Gory Details:: More than you want to know about
+ @samp{\} and @samp{&} with
+ @code{sub()}, @code{gsub()}, and
+ @code{gensub()}.
+* I/O Functions:: Functions for files and shell
+ commands.
+* Time Functions:: Functions for dealing with timestamps.
+* Bitwise Functions:: Functions for bitwise operations.
+* Type Functions:: Functions for type information.
+* I18N Functions:: Functions for string translation.
+* User-defined:: Describes User-defined functions in
+ detail.
+* Definition Syntax:: How to write definitions and what they
+ mean.
+* Function Example:: An example function definition and
+ what it does.
+* Function Caveats:: Things to watch out for.
+* Calling A Function:: Don't use spaces.
+* Variable Scope:: Controlling variable scope.
+* Pass By Value/Reference:: Passing parameters.
+* Return Statement:: Specifying the value a function
+ returns.
+* Dynamic Typing:: How variable types can change at
+ runtime.
+* Indirect Calls:: Choosing the function to call at
+ runtime.
+* Functions Summary:: Summary of functions.
+* Library Names:: How to best name private global
+ variables in library functions.
+* General Functions:: Functions that are of general use.
+* Strtonum Function:: A replacement for the built-in
+ @code{strtonum()} function.
+* Assert Function:: A function for assertions in
+ @command{awk} programs.
+* Round Function:: A function for rounding if
+ @code{sprintf()} does not do it
+ correctly.
+* Cliff Random Function:: The Cliff Random Number Generator.
+* Ordinal Functions:: Functions for using characters as
+ numbers and vice versa.
+* Join Function:: A function to join an array into a
+ string.
+* Getlocaltime Function:: A function to get formatted times.
+* Readfile Function:: A function to read an entire file at
+ once.
+* Shell Quoting:: A function to quote strings for the
+ shell.
+* Data File Management:: Functions for managing command-line
+ data files.
+* Filetrans Function:: A function for handling data file
+ transitions.
+* Rewind Function:: A function for rereading the current
+ file.
+* File Checking:: Checking that data files are readable.
+* Empty Files:: Checking for zero-length files.
+* Ignoring Assigns:: Treating assignments as file names.
+* Getopt Function:: A function for processing command-line
+ arguments.
+* Passwd Functions:: Functions for getting user
+ information.
+* Group Functions:: Functions for getting group
+ information.
+* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
+* Running Examples:: How to run these examples.
+* Clones:: Clones of common utilities.
+* Cut Program:: The @command{cut} utility.
+* Egrep Program:: The @command{egrep} utility.
+* Id Program:: The @command{id} utility.
+* Split Program:: The @command{split} utility.
+* Tee Program:: The @command{tee} utility.
+* Uniq Program:: The @command{uniq} utility.
+* Wc Program:: The @command{wc} utility.
+* Miscellaneous Programs:: Some interesting @command{awk}
+ programs.
+* Dupword Program:: Finding duplicated words in a
+ document.
+* Alarm Program:: An alarm clock.
+* Translate Program:: A program similar to the @command{tr}
+ utility.
+* Labels Program:: Printing mailing labels.
+* Word Sorting:: A program to produce a word usage
+ count.
+* History Sorting:: Eliminating duplicate entries from a
+ history file.
+* Extract Program:: Pulling out programs from Texinfo
+ source files.
+* Simple Sed:: A Simple Stream Editor.
+* Igawk Program:: A wrapper for @command{awk} that
+ includes files.
+* Anagram Program:: Finding anagrams from a dictionary.
+* Signature Program:: People do amazing things with too much
+ time on their hands.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
+* Nondecimal Data:: Allowing nondecimal input data.
+* Array Sorting:: Facilities for controlling array
+ traversal and sorting arrays.
+* Controlling Array Traversal:: How to use PROCINFO["sorted_in"].
+* Array Sorting Functions:: How to use @code{asort()} and
+ @code{asorti()}.
+* Two-way I/O:: Two-way communications with another
+ process.
+* TCP/IP Networking:: Using @command{gawk} for network
+ programming.
+* Profiling:: Profiling your @command{awk} programs.
+* Advanced Features Summary:: Summary of advanced features.
+* I18N and L10N:: Internationalization and Localization.
+* Explaining gettext:: How GNU @command{gettext} works.
+* Programmer i18n:: Features for the programmer.
+* Translator i18n:: Features for the translator.
+* String Extraction:: Extracting marked strings.
+* Printf Ordering:: Rearranging @code{printf} arguments.
+* I18N Portability:: @command{awk}-level portability
+ issues.
+* I18N Example:: A simple i18n example.
+* Gawk I18N:: @command{gawk} is also
+ internationalized.
+* I18N Summary:: Summary of I18N stuff.
+* Debugging:: Introduction to @command{gawk}
+ debugger.
+* Debugging Concepts:: Debugging in General.
+* Debugging Terms:: Additional Debugging Concepts.
+* Awk Debugging:: Awk Debugging.
+* Sample Debugging Session:: Sample debugging session.
+* Debugger Invocation:: How to Start the Debugger.
+* Finding The Bug:: Finding the Bug.
+* List of Debugger Commands:: Main debugger commands.
+* Breakpoint Control:: Control of Breakpoints.
+* Debugger Execution Control:: Control of Execution.
+* Viewing And Changing Data:: Viewing and Changing Data.
+* Execution Stack:: Dealing with the Stack.
+* Debugger Info:: Obtaining Information about the
+ Program and the Debugger State.
+* Miscellaneous Debugger Commands:: Miscellaneous Commands.
+* Readline Support:: Readline support.
+* Limitations:: Limitations and future plans.
+* Debugging Summary:: Debugging summary.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in @command{gawk}.
+* FP Math Caution:: Things to know.
+* Inexactness of computations:: Floating point math is not exact.
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
+* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic
+ with @command{gawk}.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
+* Extension Intro:: What is an extension.
+* Plugin License:: A note about licensing.
+* Extension Mechanism Outline:: An outline of how it works.
+* Extension API Description:: A full description of the API.
+* Extension API Functions Introduction:: Introduction to the API functions.
+* General Data Types:: The data types.
+* Memory Allocation Functions:: Functions for allocating memory.
+* Constructor Functions:: Functions for creating values.
+* Registration Functions:: Functions to register things with
+ @command{gawk}.
+* Extension Functions:: Registering extension functions.
+* Exit Callback Functions:: Registering an exit callback.
+* Extension Version String:: Registering a version string.
+* Input Parsers:: Registering an input parser.
+* Output Wrappers:: Registering an output wrapper.
+* Two-way processors:: Registering a two-way processor.
+* Printing Messages:: Functions for printing messages.
+* Updating @code{ERRNO}:: Functions for updating @code{ERRNO}.
+* Requesting Values:: How to get a value.
+* Accessing Parameters:: Functions for accessing parameters.
+* Symbol Table Access:: Functions for accessing global
+ variables.
+* Symbol table by name:: Accessing variables by name.
+* Symbol table by cookie:: Accessing variables by ``cookie''.
+* Cached values:: Creating and using cached values.
+* Array Manipulation:: Functions for working with arrays.
+* Array Data Types:: Data types for working with arrays.
+* Array Functions:: Functions for working with arrays.
+* Flattening Arrays:: How to flatten arrays.
+* Creating Arrays:: How to create and populate arrays.
+* Extension API Variables:: Variables provided by the API.
+* Extension Versioning:: API Version information.
+* Extension API Informational Variables:: Variables providing information about
+ @command{gawk}'s invocation.
+* Extension API Boilerplate:: Boilerplate code for using the API.
+* Finding Extensions:: How @command{gawk} finds compiled
+ extensions.
+* Extension Example:: Example C code for an extension.
+* Internal File Description:: What the new functions will do.
+* Internal File Ops:: The code for internal file operations.
+* Using Internal File Ops:: How to use an external extension.
+* Extension Samples:: The sample extensions that ship with
+ @code{gawk}.
+* Extension Sample File Functions:: The file functions sample.
+* Extension Sample Fnmatch:: An interface to @code{fnmatch()}.
+* Extension Sample Fork:: An interface to @code{fork()} and
+ other process functions.
+* Extension Sample Inplace:: Enabling in-place file editing.
+* Extension Sample Ord:: Character to value to character
+ conversions.
+* Extension Sample Readdir:: An interface to @code{readdir()}.
+* Extension Sample Revout:: Reversing output sample output
+ wrapper.
+* Extension Sample Rev2way:: Reversing data sample two-way
+ processor.
+* Extension Sample Read write array:: Serializing an array to a file.
+* Extension Sample Readfile:: Reading an entire file into a string.
+* Extension Sample Time:: An interface to @code{gettimeofday()}
+ and @code{sleep()}.
+* Extension Sample API Tests:: Tests for the API.
+* gawkextlib:: The @code{gawkextlib} project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
+* V7/SVR3.1:: The major changes between V7 and
+ System V Release 3.1.
+* SVR4:: Minor changes between System V
+ Releases 3.1 and 4.
+* POSIX:: New features from the POSIX standard.
+* BTL:: New features from Brian Kernighan's
+ version of @command{awk}.
+* POSIX/GNU:: The extensions in @command{gawk} not
+ in POSIX @command{awk}.
+* Feature History:: The history of the features in
+ @command{gawk}.
+* Common Extensions:: Common Extensions Summary.
+* Ranges and Locales:: How locales used to affect regexp
+ ranges.
+* Contributors:: The major contributors to
+ @command{gawk}.
+* History summary:: History summary.
+* Gawk Distribution:: What is in the @command{gawk}
+ distribution.
+* Getting:: How to get the distribution.
+* Extracting:: How to extract the distribution.
+* Distribution contents:: What is in the distribution.
+* Unix Installation:: Installing @command{gawk} under
+ various versions of Unix.
+* Quick Installation:: Compiling @command{gawk} under Unix.
+* Shell Startup Files:: Shell convenience functions.
+* Additional Configuration Options:: Other compile-time options.
+* Configuration Philosophy:: How it's all supposed to work.
+* Non-Unix Installation:: Installation on Other Operating
+ Systems.
+* PC Installation:: Installing and Compiling
+ @command{gawk} on MS-DOS and OS/2.
+* PC Binary Installation:: Installing a prepared distribution.
+* PC Compiling:: Compiling @command{gawk} for MS-DOS,
+ Windows32, and OS/2.
+* PC Testing:: Testing @command{gawk} on PC systems.
+* PC Using:: Running @command{gawk} on MS-DOS,
+ Windows32 and OS/2.
+* Cygwin:: Building and running @command{gawk}
+ for Cygwin.
+* MSYS:: Using @command{gawk} In The MSYS
+ Environment.
+* VMS Installation:: Installing @command{gawk} on VMS.
+* VMS Compilation:: How to compile @command{gawk} under
+ VMS.
+* VMS Dynamic Extensions:: Compiling @command{gawk} dynamic
+ extensions on VMS.
+* VMS Installation Details:: How to install @command{gawk} under
+ VMS.
+* VMS Running:: How to run @command{gawk} under VMS.
+* VMS GNV:: The VMS GNV Project.
+* VMS Old Gawk:: An old version comes with some VMS
+ systems.
+* Bugs:: Reporting Problems and Bugs.
+* Other Versions:: Other freely available @command{awk}
+ implementations.
+* Installation summary:: Summary of installation.
+* Compatibility Mode:: How to disable certain @command{gawk}
+ extensions.
+* Additions:: Making Additions To @command{gawk}.
+* Accessing The Source:: Accessing the Git repository.
+* Adding Code:: Adding code to the main body of
+ @command{gawk}.
+* New Ports:: Porting @command{gawk} to a new
+ operating system.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
+* Future Extensions:: New features that may be implemented
+ one day.
+* Implementation Limitations:: Some limitations of the
+ implementation.
+* Extension Design:: Design notes about the extension API.
+* Old Extension Problems:: Problems with the old mechanism.
+* Extension New Mechanism Goals:: Goals for the new mechanism.
+* Extension Other Design Decisions:: Some other design decisions.
+* Extension Future Growth:: Some room for future growth.
+* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
+* Basic High Level:: The high level view.
+* Basic Data Typing:: A very quick intro to data types.
+@end detailmenu
+@end menu
+
+@c dedication for Info file
+@ifinfo
+To my parents, for their love, and for the wonderful
+example they set for me.
+@sp 1
+To my wife Miriam, for making me complete.
+Thank you for building your life together with me.
+@sp 1
+To our children Chana, Rivka, Nachum and Malka,
+for enrichening our lives in innumerable ways.
+@end ifinfo
+
+@summarycontents
+@contents
+
+@node Foreword3
+@unnumbered Foreword to the Third Edition
+
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Michael</firstname>
+ <surname>Brennan</surname>
+ <!-- can't put mawk into command tags. sigh. -->
+ <affiliation><jobtitle>Author of mawk</jobtitle></affiliation>
+ </author>
+ <date>March 2001</date>
+ </prefaceinfo>
+@end docbook
+
+Arnold Robbins and I are good friends. We were introduced
+@c 11 years ago
+in 1990
+by circumstances---and our favorite programming language, AWK.
+The circumstances started a couple of years
+earlier. I was working at a new job and noticed an unplugged
+Unix computer sitting in the corner. No one knew how to use it,
+and neither did I. However,
+a couple of days later, it was running, and
+I was @code{root} and the one-and-only user.
+That day, I began the transition from statistician to Unix programmer.
+
+On one of many trips to the library or bookstore in search of
+books on Unix, I found the gray AWK book, a.k.a.@:
+Alfred V.@: Aho, Brian W.@: Kernighan, and
+Peter J.@: Weinberger's @cite{The AWK Programming Language} (Addison-Wesley,
+1988). @command{awk}'s simple programming paradigm---find a pattern in the
+input and then perform an action---often reduced complex or tedious
+data manipulations to a few lines of code. I was excited to try my
+hand at programming in AWK.
+
+Alas, the @command{awk} on my computer was a limited version of the
+language described in the gray book. I discovered that my computer
+had ``old @command{awk}'' and the book described
+``new @command{awk}.''
+I learned that this was typical; the old version refused to step
+aside or relinquish its name. If a system had a new @command{awk}, it was
+invariably called @command{nawk}, and few systems had it.
+The best way to get a new @command{awk} was to @command{ftp} the source code for
+@command{gawk} from @code{prep.ai.mit.edu}. @command{gawk} was a version of
+new @command{awk} written by David Trueman and Arnold, and available under
+the GNU General Public License.
+
+(Incidentally,
+it's no longer difficult to find a new @command{awk}. @command{gawk} ships with
+GNU/Linux, and you can download binaries or source code for almost
+any system; my wife uses @command{gawk} on her VMS box.)
+
+My Unix system started out unplugged from the wall; it certainly was not
+plugged into a network. So, oblivious to the existence of @command{gawk}
+and the Unix community in general, and desiring a new @command{awk}, I wrote
+my own, called @command{mawk}.
+Before I was finished, I knew about @command{gawk},
+but it was too late to stop, so I eventually posted
+to a @code{comp.sources} newsgroup.
+
+A few days after my posting, I got a friendly email
+from Arnold introducing
+himself. He suggested we share design and algorithms and
+attached a draft of the POSIX standard so
+that I could update @command{mawk} to support language extensions added
+after publication of @cite{The AWK Programming Language}.
+
+Frankly, if our roles had
+been reversed, I would not have been so open and we probably would
+have never met. I'm glad we did meet.
+He is an AWK expert's AWK expert and a genuinely nice person.
+Arnold contributes significant amounts of his
+expertise and time to the Free Software Foundation.
+
+This book is the @command{gawk} reference manual, but at its core it
+is a book about AWK programming that
+will appeal to a wide audience.
+It is a definitive reference to the AWK language as defined by the
+1987 Bell Laboratories release and codified in the 1992 POSIX Utilities
+standard.
+
+On the other hand, the novice AWK programmer can study
+a wealth of practical programs that emphasize
+the power of AWK's basic idioms:
+data-driven control flow, pattern matching with regular expressions,
+and associative arrays.
+Those looking for something new can try out @command{gawk}'s
+interface to network protocols via special @file{/inet} files.
+
+The programs in this book make clear that an AWK program is
+typically much smaller and faster to develop than
+a counterpart written in C.
+Consequently, there is often a payoff to prototyping an
+algorithm or design in AWK to get it running quickly and expose
+problems early. Often, the interpreted performance is adequate
+and the AWK prototype becomes the product.
+
+The new @command{pgawk} (profiling @command{gawk}), produces
+program execution counts.
+I recently experimented with an algorithm that for
+@ifnotdocbook
+@math{n}
+@end ifnotdocbook
+@ifdocbook
+@i{n}
+@end ifdocbook
+lines of input, exhibited
+@tex
+$\sim\! Cn^2$
+@end tex
+@ifnottex
+@ifnotdocbook
+~ C n^2
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<emphasis>&sim; Cn<superscript>2</superscript></emphasis> @c
+@end docbook
+performance, while
+theory predicted
+@tex
+$\sim\! Cn\log n$
+@end tex
+@ifnottex
+@ifnotdocbook
+~ C n log n
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<emphasis>&sim; Cn log n</emphasis> @c
+@end docbook
+behavior. A few minutes poring
+over the @file{awkprof.out} profile pinpointed the problem to
+a single line of code. @command{pgawk} is a welcome addition to
+my programmer's toolbox.
+
+Arnold has distilled over a decade of experience writing and
+using AWK programs, and developing @command{gawk}, into this book. If you use
+AWK or want to learn how, then read this book.
+
+@ifnotdocbook
+@cindex Brennan, Michael
+@display
+Michael Brennan
+Author of @command{mawk}
+March 2001
+@end display
+@end ifnotdocbook
+
+@node Foreword4
+@unnumbered Foreword to the Fourth Edition
+
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Michael</firstname>
+ <surname>Brennan</surname>
+ <!-- can't put mawk into command tags. sigh. -->
+ <affiliation><jobtitle>Author of mawk</jobtitle></affiliation>
+ </author>
+ <date>October 2014</date>
+ </prefaceinfo>
+@end docbook
+
+Some things don't change. Thirteen years ago I wrote:
+``If you use AWK or want to learn how, then read this book.''
+True then, and still true today.
+
+Learning to use a programming language is about more than mastering the
+syntax. One needs to acquire an understanding of how to use the
+features of the language to solve practical programming problems.
+A focus of this book is many examples that show how to use AWK.
+
+Some things do change. Our computers are much faster and have more memory.
+Consequently, speed and storage inefficiencies of a high-level language
+matter less. Prototyping in AWK and then rewriting in C for performance
+reasons happens less, because more often the prototype is fast enough.
+
+Of course, there are computing operations that are best done in C or C++.
+With @command{gawk} 4.1 and later, you do not have to choose between writing
+your program in AWK or in C/C++. You can write most of your
+program in AWK and the aspects that require C/C++ capabilities can be written
+in C/C++, and then the pieces glued together when the @command{gawk} module loads
+the C/C++ module as a dynamic plug-in.
+@c Chapter 16
+@ref{Dynamic Extensions},
+has all the
+details, and, as expected, many examples to help you learn the ins and outs.
+
+I enjoy programming in AWK and had fun (re)reading this book.
+I think you will too.
+
+@ifnotdocbook
+@cindex Brennan, Michael
+@display
+Michael Brennan
+Author of @command{mawk}
+October 2014
+@end display
+@end ifnotdocbook
+
+@node Preface
+@unnumbered Preface
+@c I saw a comment somewhere that the preface should describe the book itself,
+@c and the introduction should describe what the book covers.
+@c
+@c 12/2000: Chuck wants the preface & intro combined.
+
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Arnold</firstname>
+ <surname>Robbins</surname>
+ <affiliation><jobtitle>Nof Ayalon</jobtitle></affiliation>
+ <affiliation><jobtitle>Israel</jobtitle></affiliation>
+ </author>
+ <date>December 2014</date>
+ </prefaceinfo>
+@end docbook
+
+Several kinds of tasks occur repeatedly when working with text files.
+You might want to extract certain lines and discard the rest. Or you
+may need to make changes wherever certain patterns appear, but leave the
+rest of the file alone. Such jobs are often easy with @command{awk}.
+The @command{awk} utility interprets a special-purpose programming
+language that makes it easy to handle simple data-reformatting jobs.
+
+The GNU implementation of @command{awk} is called @command{gawk}; if you
+invoke it with the proper options or environment variables,
+it is fully compatible with
+the POSIX@footnote{The 2008 POSIX standard is accessible online at
+@w{@url{http://www.opengroup.org/onlinepubs/9699919799/}.}}
+specification of the @command{awk} language
+and with the Unix version of @command{awk} maintained
+by Brian Kernighan.
+This means that all
+properly written @command{awk} programs should work with @command{gawk}.
+So most of the time, we don't distinguish between @command{gawk} and other
+@command{awk} implementations.
+
+@cindex @command{awk}, POSIX and, See Also POSIX @command{awk}
+@cindex @command{awk}, POSIX and
+@cindex POSIX, @command{awk} and
+@cindex @command{gawk}, @command{awk} and
+@cindex @command{awk}, @command{gawk} and
+@cindex @command{awk}, uses for
+Using @command{awk} you can:
+
+@itemize @value{BULLET}
+@item
+Manage small, personal databases
+
+@item
+Generate reports
+
+@item
+Validate data
+
+@item
+Produce indexes and perform other document-preparation tasks
+
+@item
+Experiment with algorithms that you can adapt later to other computer
+languages
+@end itemize
+
+@cindex @command{awk}, See Also @command{gawk}
+@cindex @command{gawk}, See Also @command{awk}
+@cindex @command{gawk}, uses for
+In addition,
+@command{gawk}
+provides facilities that make it easy to:
+
+@itemize @value{BULLET}
+@item
+Extract bits and pieces of data for processing
+
+@item
+Sort data
+
+@item
+Perform simple network communications
+
+@item
+Profile and debug @command{awk} programs
+
+@item
+Extend the language with functions written in C or C++
+@end itemize
+
+This @value{DOCUMENT} teaches you about the @command{awk} language and
+how you can use it effectively. You should already be familiar with basic
+system commands, such as @command{cat} and @command{ls},@footnote{These utilities
+are available on POSIX-compliant systems, as well as on traditional
+Unix-based systems. If you are using some other operating system, you still need to
+be familiar with the ideas of I/O redirection and pipes.} as well as basic shell
+facilities, such as input/output (I/O) redirection and pipes.
+
+@cindex GNU @command{awk}, See @command{gawk}
+Implementations of the @command{awk} language are available for many
+different computing environments. This @value{DOCUMENT}, while describing
+the @command{awk} language in general, also describes the particular
+implementation of @command{awk} called @command{gawk} (which stands for
+``GNU @command{awk}''). @command{gawk} runs on a broad range of Unix systems,
+ranging from Intel-architecture PC-based computers
+up through large-scale systems.
+@command{gawk} has also been ported to Mac OS X,
+Microsoft Windows
+@ifset FOR_PRINT
+(all versions),
+@end ifset
+@ifclear FOR_PRINT
+(all versions) and OS/2 PCs,
+@end ifclear
+and OpenVMS.@footnote{Some other, obsolete systems to which @command{gawk}
+was once ported are no longer supported and the code for those systems
+has been removed.}
+
+@menu
+* History:: The history of @command{gawk} and
+ @command{awk}.
+* Names:: What name to use to find @command{awk}.
+* This Manual:: Using this @value{DOCUMENT}. Includes sample
+ input files that you can use.
+* Conventions:: Typographical Conventions.
+* Manual History:: Brief history of the GNU project and this
+ @value{DOCUMENT}.
+* How To Contribute:: Helping to save the world.
+* Acknowledgments:: Acknowledgments.
+@end menu
+
+@node History
+@unnumberedsec History of @command{awk} and @command{gawk}
+@cindex recipe for a programming language
+@cindex programming language, recipe for
+@sidebar Recipe for a Programming Language
+
+@multitable {2 parts} {1 part @code{egrep}} {1 part @code{snobol}}
+@item @tab 1 part @code{egrep} @tab 1 part @code{snobol}
+@item @tab 2 parts @code{ed} @tab 3 parts C
+@end multitable
+
+Blend all parts well using @code{lex} and @code{yacc}.
+Document minimally and release.
+
+After eight years, add another part @code{egrep} and two
+more parts C. Document very well and release.
+@end sidebar
+
+@cindex Aho, Alfred
+@cindex Weinberger, Peter
+@cindex Kernighan, Brian
+@cindex @command{awk}, history of
+The name @command{awk} comes from the initials of its designers: Alfred V.@:
+Aho, Peter J.@: Weinberger, and Brian W.@: Kernighan. The original version of
+@command{awk} was written in 1977 at AT&T Bell Laboratories.
+In 1985, a new version made the programming
+language more powerful, introducing user-defined functions, multiple input
+streams, and computed regular expressions.
+This new version became widely available with Unix System V
+Release 3.1 (1987).
+The version in System V Release 4 (1989) added some new features and cleaned
+up the behavior in some of the ``dark corners'' of the language.
+The specification for @command{awk} in the POSIX Command Language
+and Utilities standard further clarified the language.
+Both the @command{gawk} designers and the original @command{awk} designers at Bell Laboratories
+provided feedback for the POSIX specification.
+
+@cindex Rubin, Paul
+@cindex Fenlason, Jay
+@cindex Trueman, David
+Paul Rubin wrote @command{gawk} in 1986.
+Jay Fenlason completed it, with advice from Richard Stallman. John Woods
+contributed parts of the code as well. In 1988 and 1989, David Trueman, with
+help from me, thoroughly reworked @command{gawk} for compatibility
+with the newer @command{awk}.
+Circa 1994, I became the primary maintainer.
+Current development focuses on bug fixes,
+performance improvements, standards compliance, and, occasionally, new features.
+
+In May 1997, J@"urgen Kahrs felt the need for network access
+from @command{awk}, and with a little help from me, set about adding
+features to do this for @command{gawk}. At that time, he also
+wrote the bulk of
+@cite{TCP/IP Internetworking with @command{gawk}}
+(a separate document, available as part of the @command{gawk} distribution).
+His code finally became part of the main @command{gawk} distribution
+with @command{gawk} @value{PVERSION} 3.1.
+
+John Haque rewrote the @command{gawk} internals, in the process providing
+an @command{awk}-level debugger. This version became available as
+@command{gawk} @value{PVERSION} 4.0 in 2011.
+
+@DBXREF{Contributors}
+for a full list of those who have made important contributions to @command{gawk}.
+
+@node Names
+@unnumberedsec A Rose by Any Other Name
+
+@cindex @command{awk}, new vs.@: old
+The @command{awk} language has evolved over the years. Full details are
+provided in @ref{Language History}.
+The language described in this @value{DOCUMENT}
+is often referred to as ``new @command{awk}.''
+By analogy, the original version of @command{awk} is
+referred to as ``old @command{awk}.''
+
+Today, on most systems, when you run the @command{awk} utility
+you get some version of new @command{awk}.@footnote{Only
+Solaris systems still use an old @command{awk} for the
+default @command{awk} utility. A more modern @command{awk} lives in
+@file{/usr/xpg6/bin} on these systems.} If your system's standard
+@command{awk} is the old one, you will see something like this
+if you try the test program:
+
+@example
+$ @kbd{awk 1 /dev/null}
+@error{} awk: syntax error near line 1
+@error{} awk: bailing out near line 1
+@end example
+
+@noindent
+In this case, you should find a version of new @command{awk},
+or just install @command{gawk}!
+
+Throughout this @value{DOCUMENT}, whenever we refer to a language feature
+that should be available in any complete implementation of POSIX @command{awk},
+we simply use the term @command{awk}. When referring to a feature that is
+specific to the GNU implementation, we use the term @command{gawk}.
+
+@node This Manual
+@unnumberedsec Using This Book
+@cindex @command{awk}, terms describing
+
+The term @command{awk} refers to a particular program as well as to the language you
+use to tell this program what to do. When we need to be careful, we call
+the language ``the @command{awk} language,''
+and the program ``the @command{awk} utility.''
+This @value{DOCUMENT} explains
+both how to write programs in the @command{awk} language and how to
+run the @command{awk} utility.
+The term ``@command{awk} program'' refers to a program written by you in
+the @command{awk} programming language.
+
+@cindex @command{gawk}, @command{awk} and
+@cindex @command{awk}, @command{gawk} and
+@cindex POSIX @command{awk}
+Primarily, this @value{DOCUMENT} explains the features of @command{awk}
+as defined in the POSIX standard. It does so in the context of the
+@command{gawk} implementation. While doing so, it also
+attempts to describe important differences between @command{gawk}
+and other @command{awk}
+@ifclear FOR_PRINT
+implementations.@footnote{All such differences
+appear in the index under the
+entry ``differences in @command{awk} and @command{gawk}.''}
+@end ifclear
+@ifset FOR_PRINT
+implementations.
+@end ifset
+Finally, it notes any @command{gawk} features that are not in
+the POSIX standard for @command{awk}.
+
+@ifnotinfo
+This @value{DOCUMENT} has the difficult task of being both a tutorial and a reference.
+If you are a novice, feel free to skip over details that seem too complex.
+You should also ignore the many cross-references; they are for the
+expert user and for the Info and
+@uref{http://www.gnu.org/software/gawk/manual/, HTML}
+versions of the @value{DOCUMENT}.
+@end ifnotinfo
+
+There are sidebars
+scattered throughout the @value{DOCUMENT}.
+They add a more complete explanation of points that are relevant, but not likely
+to be of interest on first reading.
+@ifclear FOR_PRINT
+All appear in the index, under the heading ``sidebar.''
+@end ifclear
+
+Most of the time, the examples use complete @command{awk} programs.
+Some of the more advanced sections show only the part of the @command{awk}
+program that illustrates the concept being described.
+
+Although this @value{DOCUMENT} is aimed principally at people who have not been
+exposed
+to @command{awk}, there is a lot of information here that even the @command{awk}
+expert should find useful. In particular, the description of POSIX
+@command{awk} and the example programs in
+@ref{Library Functions}, and
+@ifnotdocbook
+in
+@end ifnotdocbook
+@ref{Sample Programs},
+should be of interest.
+
+This @value{DOCUMENT} is split into several parts, as follows:
+
+@c FULLXREF ON
+
+@itemize @value{BULLET}
+@item
+Part I describes the @command{awk} language and the @command{gawk} program in detail.
+It starts with the basics, and continues through all of the features of @command{awk}.
+It contains the following chapters:
+
+@c nested
+@itemize @value{MINUS}
+@item
+@ref{Getting Started},
+provides the essentials you need to know to begin using @command{awk}.
+
+@item
+@ref{Invoking Gawk},
+describes how to run @command{gawk}, the meaning of its
+command-line options, and how it finds @command{awk}
+program source files.
+
+@item
+@ref{Regexp},
+introduces regular expressions in general, and in particular the flavors
+supported by POSIX @command{awk} and @command{gawk}.
+
+@item
+@ref{Reading Files},
+describes how @command{awk} reads your data.
+It introduces the concepts of records and fields, as well
+as the @code{getline} command.
+I/O redirection is first described here.
+Network I/O is also briefly introduced here.
+
+@item
+@ref{Printing},
+describes how @command{awk} programs can produce output with
+@code{print} and @code{printf}.
+
+@item
+@ref{Expressions},
+describes expressions, which are the basic building blocks
+for getting most things done in a program.
+
+@item
+@ref{Patterns and Actions},
+describes how to write patterns for matching records, actions for
+doing something when a record is matched, and the predefined variables
+@command{awk} and @command{gawk} use.
+
+@item
+@ref{Arrays},
+covers @command{awk}'s one-and-only data structure: the associative array.
+Deleting array elements and whole arrays is described, as well as
+sorting arrays in @command{gawk}. The @value{CHAPTER} also describes how
+@command{gawk} provides arrays of arrays.
+
+@item
+@ref{Functions},
+describes the built-in functions @command{awk} and @command{gawk} provide,
+as well as how to define your own functions. It also discusses how
+@command{gawk} lets you call functions indirectly.
+@end itemize
+
+@item
+Part II shows how to use @command{awk} and @command{gawk} for problem solving.
+There is lots of code here for you to read and learn from.
+This part contains the following chapters:
+
+@c nested
+@itemize @value{MINUS}
+@item
+@ref{Library Functions}, provides a number of functions meant to
+be used from main @command{awk} programs.
+
+@item
+@ref{Sample Programs},
+provides many sample @command{awk} programs.
+@end itemize
+
+Reading these two chapters allows you to see @command{awk}
+solving real problems.
+
+@item
+Part III focuses on features specific to @command{gawk}.
+It contains the following chapters:
+
+@c nested
+@itemize @value{MINUS}
+@item
+@ref{Advanced Features},
+describes a number of advanced features.
+Of particular note
+are the abilities to control the order of array traversal,
+have two-way communications with another process,
+perform TCP/IP networking, and
+profile your @command{awk} programs.
+
+@item
+@ref{Internationalization},
+describes special features for translating program
+messages into different languages at runtime.
+
+@item
+@ref{Debugger}, describes the @command{gawk} debugger.
+
+@item
+@ref{Arbitrary Precision Arithmetic},
+describes advanced arithmetic facilities.
+
+@item
+@ref{Dynamic Extensions}, describes how to add new variables and
+functions to @command{gawk} by writing extensions in C or C++.
+@end itemize
+
+@item
+@ifclear FOR_PRINT
+Part IV provides the appendices, the Glossary, and two licenses that cover
+the @command{gawk} source code and this @value{DOCUMENT}, respectively.
+It contains the following appendices:
+@end ifclear
+@ifset FOR_PRINT
+Part IV provides the following appendices,
+including the GNU General Public License:
+@end ifset
+
+@itemize @value{MINUS}
+@item
+@ref{Language History},
+describes how the @command{awk} language has evolved since
+its first release to the present. It also describes how @command{gawk}
+has acquired features over time.
+
+@item
+@ref{Installation},
+describes how to get @command{gawk}, how to compile it
+on POSIX-compatible systems,
+and how to compile and use it on different
+non-POSIX systems. It also describes how to report bugs
+in @command{gawk} and where to get other freely
+available @command{awk} implementations.
+@end itemize
+
+@ifset FOR_PRINT
+@itemize @value{MINUS}
+@item
+@ref{Copying},
+presents the license that covers the @command{gawk} source code.
+@end itemize
+
+The version of this @value{DOCUMENT} distributed with @command{gawk}
+contains additional appendices and other end material.
+To save space, we have omitted them from the
+printed edition. You may find them online, as follows:
+
+@itemize @value{BULLET}
+@item
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Notes.html,
+The appendix on implementation notes}
+describes how to disable @command{gawk}'s extensions, how to contribute
+new code to @command{gawk}, where to find information on some possible
+future directions for @command{gawk} development, and the design decisions
+behind the extension API.
+
+@item
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Basic-Concepts.html,
+The appendix on basic concepts}
+provides some very cursory background material for those who
+are completely unfamiliar with computer programming.
+
+@item
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Glossary.html,
+The Glossary}
+defines most, if not all, of the significant terms used
+throughout the @value{DOCUMENT}. If you find terms that you aren't familiar with,
+try looking them up here.
+
+@item
+@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
+The GNU FDL}
+is the license that covers this @value{DOCUMENT}.
+@end itemize
+
+Some of the chapters have exercise sections; these have also been
+omitted from the print edition but are available online.
+@end ifset
+
+@ifclear FOR_PRINT
+@itemize @value{MINUS}
+@item
+@ref{Notes},
+describes how to disable @command{gawk}'s extensions, as
+well as how to contribute new code to @command{gawk},
+and some possible future directions for @command{gawk} development.
+
+@item
+@ref{Basic Concepts},
+provides some very cursory background material for those who
+are completely unfamiliar with computer programming.
+
+The @ref{Glossary}, defines most, if not all, of the significant terms used
+throughout the @value{DOCUMENT}. If you find terms that you aren't familiar with,
+try looking them up here.
+
+@item
+@ref{Copying}, and
+@ref{GNU Free Documentation License},
+present the licenses that cover the @command{gawk} source code
+and this @value{DOCUMENT}, respectively.
+@end itemize
+@end ifclear
+@end itemize
+
+@c FULLXREF OFF
+
+@node Conventions
+@unnumberedsec Typographical Conventions
+
+@cindex Texinfo
+This @value{DOCUMENT} is written in @uref{http://www.gnu.org/software/texinfo/, Texinfo},
+the GNU documentation formatting language.
+A single Texinfo source file is used to produce both the printed and online
+versions of the documentation.
+@ifnotinfo
+Because of this, the typographical conventions
+are slightly different than in other books you may have read.
+@end ifnotinfo
+@ifinfo
+This @value{SECTION} briefly documents the typographical conventions used in Texinfo.
+@end ifinfo
+
+Examples you would type at the command line are preceded by the common
+shell primary and secondary prompts, @samp{$} and @samp{>}.
+Input that you type is shown @kbd{like this}.
+@c 8/2014: @print{} is stripped from the texi to make docbook.
+@ifclear FOR_PRINT
+Output from the command is preceded by the glyph ``@print{}''.
+This typically represents the command's standard output.
+@end ifclear
+@ifset FOR_PRINT
+Output from the command, usually its standard output, appears
+@code{like this}.
+@end ifset
+Error messages and other output on the command's standard error are preceded
+by the glyph ``@error{}''. For example:
+
+@example
+$ @kbd{echo hi on stdout}
+@print{} hi on stdout
+$ @kbd{echo hello on stderr 1>&2}
+@error{} hello on stderr
+@end example
+
+@ifnotinfo
+In the text, command names appear in @code{this font}, while code segments
+appear in the same font and quoted, @samp{like this}.
+Options look like this: @option{-f}.
+Some things are
+emphasized @emph{like this}, and if a point needs to be made
+strongly, it is done @strong{like this}. The first occurrence of
+a new term is usually its @dfn{definition} and appears in the same
+font as the previous occurrence of ``definition'' in this sentence.
+Finally, @value{FN}s are indicated like this: @file{/path/to/ourfile}.
+@end ifnotinfo
+
+Characters that you type at the keyboard look @kbd{like this}. In particular,
+there are special characters called ``control characters.'' These are
+characters that you type by holding down both the @kbd{CONTROL} key and
+another key, at the same time. For example, a @kbd{Ctrl-d} is typed
+by first pressing and holding the @kbd{CONTROL} key, next
+pressing the @kbd{d} key, and finally releasing both keys.
+
+For the sake of brevity, throughout this @value{DOCUMENT}, we refer to
+Brian Kernighan's version of @command{awk} as ``BWK @command{awk}.''
+(@DBXREF{Other Versions} for information on his and other versions.)
+
+@ifset FOR_PRINT
+@quotation NOTE
+Notes of interest look like this.
+@end quotation
+
+@quotation CAUTION
+Cautionary or warning notes look like this.
+@end quotation
+@end ifset
+
+@c fakenode --- for prepinfo
+@unnumberedsubsec Dark Corners
+@cindex Kernighan, Brian
+@quotation
+@i{Dark corners are basically fractal---no matter how much
+you illuminate, there's always a smaller but darker one.}
+@author Brian Kernighan
+@end quotation
+
+@cindex d.c., See dark corner
+@cindex dark corner
+Until the POSIX standard (and @cite{@value{TITLE}}),
+many features of @command{awk} were either poorly documented or not
+documented at all. Descriptions of such features
+(often called ``dark corners'') are noted in this @value{DOCUMENT} with
+@iftex
+the picture of a flashlight in the margin, as shown here.
+@value{DARKCORNER}
+@end iftex
+@ifnottex
+``(d.c.).''
+@end ifnottex
+@ifclear FOR_PRINT
+They also appear in the index under the heading ``dark corner.''
+@end ifclear
+
+But, as noted by the opening quote, any coverage of dark
+corners is by definition incomplete.
+
+@cindex c.e., See common extensions
+Extensions to the standard @command{awk} language that are supported by
+more than one @command{awk} implementation are marked
+@ifclear FOR_PRINT
+``@value{COMMONEXT},'' and listed in the index under ``common extensions''
+and ``extensions, common.''
+@end ifclear
+@ifset FOR_PRINT
+``@value{COMMONEXT}'' for ``common extension.''
+@end ifset
+
+@node Manual History
+@unnumberedsec The GNU Project and This Book
+
+@cindex FSF (Free Software Foundation)
+@cindex Free Software Foundation (FSF)
+@cindex Stallman, Richard
+The Free Software Foundation (FSF) is a nonprofit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor. GNU Emacs is the most widely used version of Emacs today.
+
+@cindex GNU Project
+@cindex GPL (General Public License)
+@cindex General Public License, See GPL
+@cindex documentation, online
+The GNU@footnote{GNU stands for ``GNU's Not Unix.''}
+Project is an ongoing effort on the part of the Free Software
+Foundation to create a complete, freely distributable, POSIX-compliant
+computing environment.
+The FSF uses the GNU General Public License (GPL) to ensure that
+its software's
+source code is always available to the end user.
+@ifclear FOR_PRINT
+A copy of the GPL is included
+@ifnotinfo
+in this @value{DOCUMENT}
+@end ifnotinfo
+for your reference
+(@pxref{Copying}).
+@end ifclear
+The GPL applies to the C language source code for @command{gawk}.
+To find out more about the FSF and the GNU Project online,
+see @uref{http://www.gnu.org, the GNU Project's home page}.
+This @value{DOCUMENT} may also be read from
+@uref{http://www.gnu.org/software/gawk/manual/, GNU's website}.
+
+@ifclear FOR_PRINT
+A shell, an editor (Emacs), highly portable optimizing C, C++, and
+Objective-C compilers, a symbolic debugger and dozens of large and
+small utilities (such as @command{gawk}), have all been completed and are
+freely available. The GNU operating
+system kernel (the HURD), has been released but remains in an early
+stage of development.
+
+@cindex Linux
+@cindex GNU/Linux
+@cindex operating systems, BSD-based
+Until the GNU operating system is more fully developed, you should
+consider using GNU/Linux, a freely distributable, Unix-like operating
+system for Intel,
+Power Architecture,
+Sun SPARC, IBM S/390, and other
+systems.@footnote{The terminology ``GNU/Linux'' is explained
+in the @ref{Glossary}.}
+Many GNU/Linux distributions are
+available for download from the Internet.
+@end ifclear
+
+@ifnotinfo
+The @value{DOCUMENT} you are reading is actually free---at least, the
+information in it is free to anyone. The machine-readable
+source code for the @value{DOCUMENT} comes with @command{gawk}.
+@ifclear FOR_PRINT
+(Take a moment to check the Free Documentation
+License in @ref{GNU Free Documentation License}.)
+@end ifclear
+@end ifnotinfo
+
+@cindex Close, Diane
+The @value{DOCUMENT} itself has gone through multiple previous editions.
+Paul Rubin wrote the very first draft of @cite{The GAWK Manual};
+it was around 40 pages long.
+Diane Close and Richard Stallman improved it, yielding a
+version that was
+around 90 pages and barely described the original, ``old''
+version of @command{awk}.
+
+I started working with that version in the fall of 1988.
+As work on it progressed,
+the FSF published several preliminary versions (numbered 0.@var{x}).
+In 1996, edition 1.0 was released with @command{gawk} 3.0.0.
+The FSF published the first two editions under
+the title @cite{The GNU Awk User's Guide}.
+@ifset FOR_PRINT
+SSC published two editions of the @value{DOCUMENT} under the
+title @cite{Effective awk Programming}, and O'Reilly published
+the third edition in 2001.
+@end ifset
+
+This edition maintains the basic structure of the previous editions.
+For FSF edition 4.0, the content was thoroughly reviewed and updated. All
+references to @command{gawk} versions prior to 4.0 were removed.
+Of significant note for that edition was the addition of @ref{Debugger}.
+
+For FSF edition
+@ifclear FOR_PRINT
+@value{EDITION},
+@end ifclear
+@ifset FOR_PRINT
+@value{EDITION}
+(the fourth edition as published by O'Reilly),
+@end ifset
+the content has been reorganized into parts,
+and the major new additions are @ref{Arbitrary Precision Arithmetic},
+and @ref{Dynamic Extensions}.
+
+This @value{DOCUMENT} will undoubtedly continue to evolve. If you
+find an error in the @value{DOCUMENT}, please report it! @DBXREF{Bugs}
+for information on submitting problem reports electronically.
+
+@ifset FOR_PRINT
+@c fakenode --- for prepinfo
+@unnumberedsec How to Stay Current
+
+You may have a newer version of @command{gawk} than the
+one described here. To find out what has changed,
+you should first look at the @file{NEWS} file in the @command{gawk}
+distribution, which provides a high-level summary of the changes in
+each release.
+
+You can then look at the @uref{http://www.gnu.org/software/gawk/manual/,
+online version} of this @value{DOCUMENT} to read about any new features.
+@end ifset
+
+@ifclear FOR_PRINT
+@node How To Contribute
+@unnumberedsec How to Contribute
+
+As the maintainer of GNU @command{awk}, I once thought that I would be
+able to manage a collection of publicly available @command{awk} programs
+and I even solicited contributions. Making things available on the Internet
+helps keep the @command{gawk} distribution down to manageable size.
+
+The initial collection of material, such as it is, is still available
+at @uref{ftp://ftp.freefriends.org/arnold/Awkstuff}. In the hopes of
+doing something more broad, I acquired the @code{awk.info} domain.
+
+However, I found that I could not dedicate enough time to managing
+contributed code: the archive did not grow and the domain went unused
+for several years.
+
+Late in 2008, a volunteer took on the task of setting up
+an @command{awk}-related website---@uref{http://awk.info}---and did a very
+nice job.
+
+If you have written an interesting @command{awk} program, or have written
+a @command{gawk} extension that you would like to share with the rest
+of the world, please see @uref{http://awk.info/?contribute} for how to
+contribute it to the website.
+
+@ignore
+As of this writing, this website is in search of a maintainer; please
+contact me if you are interested.
+@end ignore
+
+@ignore
+Other links:
+
+http://www.reddit.com/r/linux/comments/dtect/composing_music_in_awk/
+@end ignore
+@end ifclear
+
+@node Acknowledgments
+@unnumberedsec Acknowledgments
+
+The initial draft of @cite{The GAWK Manual} had the following acknowledgments:
+
+@quotation
+Many people need to be thanked for their assistance in producing this
+manual. Jay Fenlason contributed many ideas and sample programs. Richard
+Mlynarik and Robert Chassell gave helpful comments on drafts of this
+manual. The paper @cite{A Supplemental Document for AWK} by John W.@:
+Pierce of the Chemistry Department at UC San Diego, pinpointed several
+issues relevant both to @command{awk} implementation and to this manual, that
+would otherwise have escaped us.
+@end quotation
+
+@cindex Stallman, Richard
+I would like to acknowledge Richard M.@: Stallman, for his vision of a
+better world and for his courage in founding the FSF and starting the
+GNU Project.
+
+@ifclear FOR_PRINT
+Earlier editions of this @value{DOCUMENT} had the following acknowledgements:
+@end ifclear
+@ifset FOR_PRINT
+The previous edition of this @value{DOCUMENT} had
+the following acknowledgements:
+@end ifset
+
+@quotation
+The following people (in alphabetical order)
+provided helpful comments on various
+versions of this book:
+Rick Adams,
+Dr.@: Nelson H.F. Beebe,
+Karl Berry,
+Dr.@: Michael Brennan,
+Rich Burridge,
+Claire Cloutier,
+Diane Close,
+Scott Deifik,
+Christopher (``Topher'') Eliot,
+Jeffrey Friedl,
+Dr.@: Darrel Hankerson,
+Michal Jaegermann,
+Dr.@: Richard J.@: LeBlanc,
+Michael Lijewski,
+Pat Rankin,
+Miriam Robbins,
+Mary Sheehan,
+and
+Chuck Toporek.
+
+@cindex Berry, Karl
+@cindex Chassell, Robert J.@:
+@c @cindex Texinfo
+Robert J.@: Chassell provided much valuable advice on
+the use of Texinfo.
+He also deserves special thanks for
+convincing me @emph{not} to title this @value{DOCUMENT}
+@cite{How to Gawk Politely}.
+Karl Berry helped significantly with the @TeX{} part of Texinfo.
+
+@cindex Hartholz, Marshall
+@cindex Hartholz, Elaine
+@cindex Schreiber, Bert
+@cindex Schreiber, Rita
+I would like to thank Marshall and Elaine Hartholz of Seattle and
+Dr.@: Bert and Rita Schreiber of Detroit for large amounts of quiet vacation
+time in their homes, which allowed me to make significant progress on
+this @value{DOCUMENT} and on @command{gawk} itself.
+
+@cindex Hughes, Phil
+Phil Hughes of SSC
+contributed in a very important way by loaning me his laptop GNU/Linux
+system, not once, but twice, which allowed me to do a lot of work while
+away from home.
+
+@cindex Trueman, David
+David Trueman deserves special credit; he has done a yeoman job
+of evolving @command{gawk} so that it performs well and without bugs.
+Although he is no longer involved with @command{gawk},
+working with him on this project was a significant pleasure.
+
+@cindex Drepper, Ulrich
+@cindex GNITS mailing list
+@cindex mailing list, GNITS
+The intrepid members of the GNITS mailing list, and most notably Ulrich
+Drepper, provided invaluable help and feedback for the design of the
+internationalization features.
+
+Chuck Toporek, Mary Sheehan, and Claire Cloutier of O'Reilly & Associates contributed
+significant editorial help for this @value{DOCUMENT} for the
+3.1 release of @command{gawk}.
+@end quotation
+
+@cindex Beebe, Nelson H.F.@:
+@cindex Buening, Andreas
+@cindex Collado, Manuel
+@cindex Colombo, Antonio
+@cindex Davies, Stephen
+@cindex Deifik, Scott
+@cindex Demaille, Akim
+@cindex Hankerson, Darrel
+@cindex Jaegermann, Michal
+@cindex Kahrs, J@"urgen
+@cindex Kasal, Stepan
+@cindex Malmberg, John
+@cindex Pitts, Dave
+@cindex Ramey, Chet
+@cindex Rankin, Pat
+@cindex Schorr, Andrew
+@cindex Vinschen, Corinna
+@cindex Zaretskii, Eli
+
+Dr.@: Nelson Beebe,
+Andreas Buening,
+Dr.@: Manuel Collado,
+Antonio Colombo,
+Stephen Davies,
+Scott Deifik,
+Akim Demaille,
+Darrel Hankerson,
+Michal Jaegermann,
+J@"urgen Kahrs,
+Stepan Kasal,
+John Malmberg,
+Dave Pitts,
+Chet Ramey,
+Pat Rankin,
+Andrew Schorr,
+Corinna Vinschen,
+and Eli Zaretskii
+(in alphabetical order)
+make up the current @command{gawk} ``crack portability team.'' Without
+their hard work and help, @command{gawk} would not be nearly the robust,
+portable program it is today. It has been and continues to be a pleasure
+working with this team of fine people.
+
+Notable code and documentation contributions were made by
+a number of people. @DBXREF{Contributors} for the full list.
+
+@ifset FOR_PRINT
+@cindex Oram, Andy
+Thanks to Andy Oram of O'Reilly Media for initiating
+the fourth edition and for his support during the work.
+Thanks to Jasmine Kwityn for her copyediting work.
+@end ifset
+
+Thanks to Michael Brennan for the Forewords.
+
+@cindex Duman, Patrice
+@cindex Berry, Karl
+Thanks to Patrice Dumas for the new @command{makeinfo} program.
+Thanks to Karl Berry, who continues to work to keep
+the Texinfo markup language sane.
+
+@cindex Kernighan, Brian
+@cindex Brennan, Michael
+@cindex Day, Robert P.J.@:
+Robert P.J.@: Day, Michael Brennan, and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this @value{DOCUMENT}. Their feedback
+helped improve the final work.
+
+I would also like to thank Brian Kernighan for his invaluable assistance during the
+testing and debugging of @command{gawk}, and for his ongoing
+help and advice in clarifying numerous points about the language.
+We could not have done nearly as good a job on either @command{gawk}
+or its documentation without his help.
+
+Brian is in a class by himself as a programmer and technical
+author. I have to thank him (yet again) for his ongoing friendship
+and for being a role model to me for close to 30 years!
+Having him as a reviewer is an exciting privilege. It has also
+been extremely humbling@enddots{}
+
+@cindex Robbins, Miriam
+@cindex Robbins, Jean
+@cindex Robbins, Harry
+@cindex G-d
+I must thank my wonderful wife, Miriam, for her patience through
+the many versions of this project, for her proofreading,
+and for sharing me with the computer.
+I would like to thank my parents for their love, and for the grace with
+which they raised and educated me.
+Finally, I also must acknowledge my gratitude to G-d, for the many opportunities
+He has sent my way, as well as for the gifts He has given me with which to
+take advantage of those opportunities.
+@iftex
+@sp 2
+@noindent
+Arnold Robbins @*
+Nof Ayalon @*
+Israel @*
+December 2014
+@end iftex
+
+@ifnotinfo
+@part @value{PART1}The @command{awk} Language
+@end ifnotinfo
+
+@ifdocbook
+
+Part I describes the @command{awk} language and @command{gawk} program
+in detail. It starts with the basics, and continues through all of
+the features of @command{awk}. Included also are many, but not all,
+of the features of @command{gawk}. This part contains the
+following chapters:
+
+@itemize @value{BULLET}
+@item
+@ref{Getting Started}
+
+@item
+@ref{Invoking Gawk}
+
+@item
+@ref{Regexp}
+
+@item
+@ref{Reading Files}
+
+@item
+@ref{Printing}
+
+@item
+@ref{Expressions}
+
+@item
+@ref{Patterns and Actions}
+
+@item
+@ref{Arrays}
+
+@item
+@ref{Functions}
+@end itemize
+@end ifdocbook
+
+@node Getting Started
+@chapter Getting Started with @command{awk}
+@c @cindex script, definition of
+@c @cindex rule, definition of
+@c @cindex program, definition of
+@c @cindex basic function of @command{awk}
+@cindex @command{awk}, function of
+
+The basic function of @command{awk} is to search files for lines (or other
+units of text) that contain certain patterns. When a line matches one
+of the patterns, @command{awk} performs specified actions on that line.
+@command{awk} continues to process input lines in this way until it reaches
+the end of the input files.
+
+@cindex @command{awk}, uses for
+@cindex programming languages@comma{} data-driven vs.@: procedural
+@cindex @command{awk} programs
+Programs in @command{awk} are different from programs in most other languages,
+because @command{awk} programs are @dfn{data driven} (i.e., you describe
+the data you want to work with and then what to do when you find it).
+Most other languages are @dfn{procedural}; you have to describe, in great
+detail, every step the program should take. When working with procedural
+languages, it is usually much
+harder to clearly describe the data your program will process.
+For this reason, @command{awk} programs are often refreshingly easy to
+read and write.
+
+@cindex program, definition of
+@cindex rule, definition of
+When you run @command{awk}, you specify an @command{awk} @dfn{program} that
+tells @command{awk} what to do. The program consists of a series of
+@dfn{rules} (it may also contain @dfn{function definitions},
+an advanced feature that we will ignore for now;
+@pxref{User-defined}). Each rule specifies one
+pattern to search for and one action to perform
+upon finding the pattern.
+
+Syntactically, a rule consists of a @dfn{pattern} followed by an
+@dfn{action}. The action is enclosed in braces to separate it from the
+pattern. Newlines usually separate rules. Therefore, an @command{awk}
+program looks like this:
+
+@example
+@var{pattern} @{ @var{action} @}
+@var{pattern} @{ @var{action} @}
+@dots{}
+@end example
+
+@menu
+* Running gawk:: How to run @command{gawk} programs; includes
+ command-line syntax.
+* Sample Data Files:: Sample data files for use in the @command{awk}
+ programs illustrated in this @value{DOCUMENT}.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example using two
+ rules.
+* More Complex:: A more complex example.
+* Statements/Lines:: Subdividing or combining statements into
+ lines.
+* Other Features:: Other Features of @command{awk}.
+* When:: When to use @command{gawk} and when to use
+ other things.
+* Intro Summary:: Summary of the introduction.
+@end menu
+
+@node Running gawk
+@section How to Run @command{awk} Programs
+
+@cindex @command{awk} programs, running
+There are several ways to run an @command{awk} program. If the program is
+short, it is easiest to include it in the command that runs @command{awk},
+like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@cindex command line, formats
+When the program is long, it is usually more convenient to put it in a file
+and run it with a command like this:
+
+@example
+awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+This @value{SECTION} discusses both mechanisms, along with several
+variations of each.
+
+@menu
+* One-shot:: Running a short throwaway @command{awk}
+ program.
+* Read Terminal:: Using no input files (input from the keyboard
+ instead).
+* Long:: Putting permanent @command{awk} programs in
+ files.
+* Executable Scripts:: Making self-contained @command{awk} programs.
+* Comments:: Adding documentation to @command{gawk}
+ programs.
+* Quoting:: More discussion of shell quoting issues.
+@end menu
+
+@node One-shot
+@subsection One-Shot Throwaway @command{awk} Programs
+
+Once you are familiar with @command{awk}, you will often type in simple
+programs the moment you want to use them. Then you can write the
+program as the first argument of the @command{awk} command, like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of patterns and
+actions, as described earlier.
+
+@cindex single quote (@code{'})
+@cindex @code{'} (single quote)
+This command format instructs the @dfn{shell}, or command interpreter,
+to start @command{awk} and use the @var{program} to process records in the
+input file(s). There are single quotes around @var{program} so
+the shell won't interpret any @command{awk} characters as special shell
+characters. The quotes also cause the shell to treat all of @var{program} as
+a single argument for @command{awk}, and allow @var{program} to be more
+than one line long.
+
+@cindex shells, scripts
+@cindex @command{awk} programs, running, from shell scripts
+This format is also useful for running short or medium-sized @command{awk}
+programs from shell scripts, because it avoids the need for a separate
+file for the @command{awk} program. A self-contained shell script is more
+reliable because there are no other files to misplace.
+
+Later in this chapter, in
+@ifdocbook
+the section
+@end ifdocbook
+@ref{Very Simple},
+we'll see examples of several short,
+self-contained programs.
+
+@node Read Terminal
+@subsection Running @command{awk} Without Input Files
+
+@cindex standard input
+@cindex input, standard
+@cindex input files, running @command{awk} without
+You can also run @command{awk} without any input files. If you type the
+following command line:
+
+@example
+awk '@var{program}'
+@end example
+
+@noindent
+@command{awk} applies the @var{program} to the @dfn{standard input},
+which usually means whatever you type on the keyboard. This continues
+until you indicate end-of-file by typing @kbd{Ctrl-d}.
+@ifset FOR_PRINT
+(On non-POSIX operating systems, the end-of-file character may be different.)
+@end ifset
+@ifclear FOR_PRINT
+(On non-POSIX operating systems, the end-of-file character may be different.
+For example, on OS/2, it is @kbd{Ctrl-z}.)
+@end ifclear
+
+@cindex files, input, See input files
+@cindex input files, running @command{awk} without
+@cindex @command{awk} programs, running, without input files
+As an example, the following program prints a friendly piece of advice
+(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
+to keep you from worrying about the complexities of computer
+programming:
+
+@example
+$ @kbd{awk 'BEGIN @{ print "Don\47t Panic!" @}'}
+@print{} Don't Panic!
+@end example
+
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way (explained later) of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
+
+@quotation NOTE
+If you use Bash as your shell, you should execute the
+command @samp{set +H} before running this program interactively, to
+disable the C shell-style command history, which treats @samp{!} as a
+special character. We recommend putting this command into your personal
+startup file.
+@end quotation
+
+This next simple @command{awk} program
+emulates the @command{cat} utility; it copies whatever you type on the
+keyboard to its standard output (why this works is explained shortly):
+
+@example
+$ @kbd{awk '@{ print @}'}
+@kbd{Now is the time for all good men}
+@print{} Now is the time for all good men
+@kbd{to come to the aid of their country.}
+@print{} to come to the aid of their country.
+@kbd{Four score and seven years ago, ...}
+@print{} Four score and seven years ago, ...
+@kbd{What, me worry?}
+@print{} What, me worry?
+@kbd{Ctrl-d}
+@end example
+
+@node Long
+@subsection Running Long Programs
+
+@cindex @command{awk} programs, running
+@cindex @command{awk} programs, lengthy
+@cindex files, @command{awk} programs in
+Sometimes @command{awk} programs are very long. In these cases, it is
+more convenient to put the program into a separate file. In order to tell
+@command{awk} to use that file for its program, you type:
+
+@example
+awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@cindex @option{-f} option
+@cindex command line, option @option{-f}
+The @option{-f} instructs the @command{awk} utility to get the
+@command{awk} program from the file @var{source-file} (@pxref{Options}).
+Any @value{FN} can be used for @var{source-file}. For example, you
+could put the program:
+
+@example
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+into the file @file{advice}. Then this command:
+
+@example
+awk -f advice
+@end example
+
+@noindent
+does the same thing as this one:
+
+@example
+awk 'BEGIN @{ print "Don\47t Panic!" @}'
+@end example
+
+@cindex quoting in @command{gawk} command lines
+@noindent
+This was explained earlier
+(@pxref{Read Terminal}).
+Note that you don't usually need single quotes around the @value{FN} that you
+specify with @option{-f}, because most @value{FN}s don't contain any of the shell's
+special characters. Notice that in @file{advice}, the @command{awk}
+program did not have single quotes around it. The quotes are only needed
+for programs that are provided on the @command{awk} command line.
+(Also, placing the program in a file allows us to use a literal single quote in the program
+text, instead of the magic @samp{\47}.)
+
+@cindex single quote (@code{'}) in @command{gawk} command lines
+@cindex @code{'} (single quote) in @command{gawk} command lines
+If you want to clearly identify an @command{awk} program file as such,
+you can add the extension @file{.awk} to the @value{FN}. This doesn't
+affect the execution of the @command{awk} program but it does make
+``housekeeping'' easier.
+
+@node Executable Scripts
+@subsection Executable @command{awk} Programs
+@cindex @command{awk} programs
+@cindex @code{#} (number sign), @code{#!} (executable scripts)
+@cindex Unix, @command{awk} scripts and
+@cindex number sign (@code{#}), @code{#!} (executable scripts)
+
+Once you have learned @command{awk}, you may want to write self-contained
+@command{awk} scripts, using the @samp{#!} script mechanism. You can do
+this on many systems.@footnote{The @samp{#!} mechanism works on
+GNU/Linux systems, BSD-based systems, and commercial Unix systems.}
+For example, you could update the file @file{advice} to look like this:
+
+@example
+#! /bin/awk -f
+
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+After making this file executable (with the @command{chmod} utility),
+simply type @samp{advice}
+at the shell and the system arranges to run @command{awk} as if you had
+typed @samp{awk -f advice}:
+
+@example
+$ @kbd{chmod +x advice}
+$ @kbd{advice}
+@print{} Don't Panic!
+@end example
+
+@noindent
+(We assume you have the current directory in your shell's search
+path variable [typically @code{$PATH}]. If not, you may need
+to type @samp{./advice} at the shell.)
+
+Self-contained @command{awk} scripts are useful when you want to write a
+program that users can invoke without their having to know that the program is
+written in @command{awk}.
+
+@sidebar Understanding @samp{#!}
+@cindex portability, @code{#!} (executable scripts)
+
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+processor.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interpreted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. (Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}.)
+
+Some systems limit the length of the interpreter name to 32 characters.
+Often, this can be dealt with by using a symbolic link.
+
+You should not put more than one argument on the @samp{#!}
+line after the path to @command{awk}. It does not work. The operating system
+treats the rest of the line as a single argument and passes it to @command{awk}.
+Doing this leads to confusing behavior---most likely a usage diagnostic
+of some sort from @command{awk}.
+
+@cindex @code{ARGC}/@code{ARGV} variables, portability and
+@cindex portability, @code{ARGV} variable
+Finally, the value of @code{ARGV[0]}
+(@pxref{Built-in Variables})
+varies depending upon your operating system.
+Some systems put @samp{awk} there, some put the full pathname
+of @command{awk} (such as @file{/bin/awk}), and some put the name
+of your script (@samp{advice}). @value{DARKCORNER}
+Don't rely on the value of @code{ARGV[0]}
+to provide your script name.
+@end sidebar
+
+@node Comments
+@subsection Comments in @command{awk} Programs
+@cindex @code{#} (number sign), commenting
+@cindex number sign (@code{#}), commenting
+@cindex commenting
+@cindex @command{awk} programs, documenting
+
+A @dfn{comment} is some text that is included in a program for the sake
+of human readers; it is not really an executable part of the program. Comments
+can explain what the program does and how it works. Nearly all
+programming languages have provisions for comments, as programs are
+typically hard to understand without them.
+
+In the @command{awk} language, a comment starts with the number sign
+character (@samp{#}) and continues to the end of the line.
+The @samp{#} does not have to be the first character on the line. The
+@command{awk} language ignores the rest of a line following a number sign.
+For example, we could have put the following into @file{advice}:
+
+@example
+# This program prints a nice, friendly message. It helps
+# keep novice users from being afraid of the computer.
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+You can put comment lines into keyboard-composed throwaway @command{awk}
+programs, but this usually isn't very useful; the purpose of a
+comment is to help you or another person understand the program
+when reading it at a later time.
+
+@cindex quoting, for small awk programs
+@cindex single quote (@code{'}), vs.@: apostrophe
+@cindex @code{'} (single quote), vs.@: apostrophe
+@quotation CAUTION
+As mentioned in
+@ref{One-shot},
+you can enclose short to medium-sized programs in single quotes,
+in order to keep
+your shell scripts self-contained. When doing so, @emph{don't} put
+an apostrophe (i.e., a single quote) into a comment (or anywhere else
+in your program). The shell interprets the quote as the closing
+quote for the entire program. As a result, usually the shell
+prints a message about mismatched quotes, and if @command{awk} actually
+runs, it will probably print strange messages about syntax errors.
+For example, look at the following:
+
+@example
+$ @kbd{awk 'BEGIN @{ print "hello" @} # let's be cute'}
+>
+@end example
+
+The shell sees that the first two quotes match, and that
+a new quoted object begins at the end of the command line.
+It therefore prompts with the secondary prompt, waiting for more input.
+With Unix @command{awk}, closing the quoted string produces this result:
+
+@example
+$ @kbd{awk '@{ print "hello" @} # let's be cute'}
+> @kbd{'}
+@error{} awk: can't open file be
+@error{} source line number 1
+@end example
+
+@cindex @code{\} (backslash)
+@cindex backslash (@code{\})
+Putting a backslash before the single quote in @samp{let's} wouldn't help,
+because backslashes are not special inside single quotes.
+The next @value{SUBSECTION} describes the shell's quoting rules.
+@end quotation
+
+@node Quoting
+@subsection Shell Quoting Issues
+@cindex shell quoting, rules for
+
+@menu
+* DOS Quoting:: Quoting in Windows Batch Files.
+@end menu
+
+For short to medium-length @command{awk} programs, it is most convenient
+to enter the program on the @command{awk} command line.
+This is best done by enclosing the entire program in single quotes.
+This is true whether you are entering the program interactively at
+the shell prompt, or writing it as part of a larger shell script:
+
+@example
+awk '@var{program text}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@cindex shells, quoting, rules for
+@cindex Bourne shell, quoting rules for
+Once you are working with the shell, it is helpful to have a basic
+knowledge of shell quoting rules. The following rules apply only to
+POSIX-compliant, Bourne-style shells (such as Bash, the GNU Bourne-Again
+Shell). If you use the C shell, you're on your own.
+
+Before diving into the rules, we introduce a concept that appears
+throughout this @value{DOCUMENT}, which is that of the @dfn{null},
+or empty, string.
+
+The null string is character data that has no value.
+In other words, it is empty. It is written in @command{awk} programs
+like this: @code{""}. In the shell, it can be written using single
+or double quotes: @code{""} or @code{''}. Although the null string has
+no characters in it, it does exist. For example, consider this command:
+
+@example
+$ @kbd{echo ""}
+@end example
+
+@noindent
+Here, the @command{echo} utility receives a single argument, even
+though that argument has no characters in it. In the rest of this
+@value{DOCUMENT}, we use the terms @dfn{null string} and @dfn{empty string}
+interchangeably. Now, on to the quoting rules:
+
+@itemize @value{BULLET}
+@item
+Quoted items can be concatenated with nonquoted items as well as with other
+quoted items. The shell turns everything into one argument for
+the command.
+
+@item
+Preceding any single character with a backslash (@samp{\}) quotes
+that character. The shell removes the backslash and passes the quoted
+character on to the command.
+
+@item
+@cindex @code{\} (backslash), in shell commands
+@cindex backslash (@code{\}), in shell commands
+@cindex single quote (@code{'}), in shell commands
+@cindex @code{'} (single quote), in shell commands
+Single quotes protect everything between the opening and closing quotes.
+The shell does no interpretation of the quoted text, passing it on verbatim
+to the command.
+It is @emph{impossible} to embed a single quote inside single-quoted text.
+Refer back to
+@DBREF{Comments}
+for an example of what happens if you try.
+
+@item
+@cindex double quote (@code{"}), in shell commands
+@cindex @code{"} (double quote), in shell commands
+Double quotes protect most things between the opening and closing quotes.
+The shell does at least variable and command substitution on the quoted text.
+Different shells may do additional kinds of processing on double-quoted text.
+
+Because certain characters within double-quoted text are processed by the shell,
+they must be @dfn{escaped} within the text. Of note are the characters
+@samp{$}, @samp{`}, @samp{\}, and @samp{"}, all of which must be preceded by
+a backslash within double-quoted text if they are to be passed on literally
+to the program. (The leading backslash is stripped first.)
+Thus, the example seen
+@ifnotinfo
+previously
+@end ifnotinfo
+in @ref{Read Terminal}:
+
+@example
+awk 'BEGIN @{ print "Don\47t Panic!" @}'
+@end example
+
+@noindent
+could instead be written this way:
+
+@example
+$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+@print{} Don't Panic!
+@end example
+
+@cindex single quote (@code{'}), with double quotes
+@cindex @code{'} (single quote), with double quotes
+Note that the single quote is not special within double quotes.
+
+@item
+Null strings are removed when they occur as part of a non-null
+command-line argument, while explicit null objects are kept.
+For example, to specify that the field separator @code{FS} should
+be set to the null string, use:
+
+@example
+awk -F "" '@var{program}' @var{files} # correct
+@end example
+
+@noindent
+@cindex null strings in @command{gawk} arguments, quoting and
+Don't use this:
+
+@example
+awk -F"" '@var{program}' @var{files} # wrong!
+@end example
+
+@noindent
+In the second case, @command{awk} attempts to use the text of the program
+as the value of @code{FS}, and the first @value{FN} as the text of the program!
+This results in syntax errors at best, and confusing behavior at worst.
+@end itemize
+
+@cindex quoting in @command{gawk} command lines, tricks for
+Mixing single and double quotes is difficult. You have to resort
+to shell quoting tricks, like this:
+
+@example
+$ @kbd{awk 'BEGIN @{ print "Here is a single quote <'"'"'>" @}'}
+@print{} Here is a single quote <'>
+@end example
+
+@noindent
+This program consists of three concatenated quoted strings. The first and the
+third are single-quoted, and the second is double-quoted.
+
+This can be ``simplified'' to:
+
+@example
+$ @kbd{awk 'BEGIN @{ print "Here is a single quote <'\''>" @}'}
+@print{} Here is a single quote <'>
+@end example
+
+@noindent
+Judge for yourself which of these two is the more readable.
+
+Another option is to use double quotes, escaping the embedded, @command{awk}-level
+double quotes:
+
+@example
+$ @kbd{awk "BEGIN @{ print \"Here is a single quote <'>\" @}"}
+@print{} Here is a single quote <'>
+@end example
+
+@noindent
+This option is also painful, because double quotes, backslashes, and dollar signs
+are very common in more advanced @command{awk} programs.
+
+A third option is to use the octal escape sequence equivalents
+(@pxref{Escape Sequences})
+for the
+single- and double-quote characters, like so:
+
+@example
+$ @kbd{awk 'BEGIN @{ print "Here is a single quote <\47>" @}'}
+@print{} Here is a single quote <'>
+$ @kbd{awk 'BEGIN @{ print "Here is a double quote <\42>" @}'}
+@print{} Here is a double quote <">
+@end example
+
+@noindent
+This works nicely, but you should comment clearly what the
+escapes mean.
+
+A fourth option is to use command-line variable assignment, like this:
+
+@example
+$ @kbd{awk -v sq="'" 'BEGIN @{ print "Here is a single quote <" sq ">" @}'}
+@print{} Here is a single quote <'>
+@end example
+
+(Here, the two string constants and the value of @code{sq} are concatenated
+into a single string that is printed by @code{print}.)
+
+If you really need both single and double quotes in your @command{awk}
+program, it is probably best to move it into a separate file, where
+the shell won't be part of the picture and you can say what you mean.
+
+@node DOS Quoting
+@subsubsection Quoting in MS-Windows Batch Files
+
+@ignore
+Date: Wed, 21 May 2008 09:58:43 +0200 (CEST)
+From: jeroen.brink@inter.NL.net
+Subject: (g)awk "contribution"
+To: arnold@skeeve.com
+Message-id: <42220.193.172.132.34.1211356723.squirrel@webmail.internl.net>
+
+Hello Arnold,
+
+maybe you can help me out. Found your email on the GNU/awk online manual
+pages.
+
+I've searched hard to figure out how, on Windows, to print double quotes.
+Couldn't find it in the Quotes area, nor on google or elsewhere. Finally i
+figured out how to do this myself.
+
+How to print all lines in a file surrounded by double quotes (on Windows):
+
+gawk "{ print \"\042\" $0 \"\042\" }" <file>
+
+Maybe this is a helpfull tip for other (Windows) gawk users. However, i
+don't have a clue as to where to "publish" this tip! Do you?
+
+Kind regards,
+
+Jeroen Brink
+@end ignore
+
+Although this @value{DOCUMENT} generally only worries about POSIX systems and the
+POSIX shell, the following issue arises often enough for many users that
+it is worth addressing.
+
+@cindex Brink, Jeroen
+The ``shells'' on Microsoft Windows systems use the double-quote
+character for quoting, and make it difficult or impossible to include an
+escaped double-quote character in a command-line script.
+The following example, courtesy of Jeroen Brink, shows
+how to print all lines in a file surrounded by double quotes:
+
+@example
+gawk "@{ print \"\042\" $0 \"\042\" @}" @var{file}
+@end example
+
+
+@node Sample Data Files
+@section @value{DDF}s for the Examples
+
+@cindex input files, examples
+@cindex @code{mail-list} file
+Many of the examples in this @value{DOCUMENT} take their input from two sample
+@value{DF}s. The first, @file{mail-list}, represents a list of peoples' names
+together with their email addresses and information about those people.
+The second @value{DF}, called @file{inventory-shipped}, contains
+information about monthly shipments. In both files,
+each line is considered to be one @dfn{record}.
+
+In @file{mail-list}, each record contains the name of a person,
+his/her phone number, his/her email address, and a code for his/her relationship
+with the author of the list.
+The columns are aligned using spaces.
+An @samp{A} in the last column
+means that the person is an acquaintance. An @samp{F} in the last
+column means that the person is a friend.
+An @samp{R} means that the person is a relative:
+
+@example
+@c system if test ! -d eg ; then mkdir eg ; fi
+@c system if test ! -d eg/lib ; then mkdir eg/lib ; fi
+@c system if test ! -d eg/data ; then mkdir eg/data ; fi
+@c system if test ! -d eg/prog ; then mkdir eg/prog ; fi
+@c system if test ! -d eg/misc ; then mkdir eg/misc ; fi
+@c file eg/data/mail-list
+Amelia 555-5553 amelia.zodiacusque@@gmail.com F
+Anthony 555-3412 anthony.asserturo@@hotmail.com A
+Becky 555-7685 becky.algebrarum@@gmail.com A
+Bill 555-1675 bill.drowning@@hotmail.com A
+Broderick 555-0542 broderick.aliquotiens@@yahoo.com R
+Camilla 555-2912 camilla.infusarum@@skynet.be R
+Fabius 555-1234 fabius.undevicesimus@@ucb.edu F
+Julie 555-6699 julie.perscrutabor@@skeeve.com F
+Martin 555-6480 martin.codicibus@@hotmail.com A
+Samuel 555-3430 samuel.lanceolis@@shu.edu A
+Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
+@c endfile
+@end example
+
+@cindex @code{inventory-shipped} file
+The @value{DF} @file{inventory-shipped} represents
+information about shipments during the year.
+Each record contains the month, the number
+of green crates shipped, the number of red boxes shipped, the number of
+orange bags shipped, and the number of blue packages shipped,
+respectively. There are 16 entries, covering the 12 months of last year
+and the first four months of the current year.
+An empty line separates the data for the two years:
+
+@example
+@c file eg/data/inventory-shipped
+Jan 13 25 15 115
+Feb 15 32 24 226
+Mar 15 24 34 228
+Apr 31 52 63 420
+May 16 34 29 208
+Jun 31 42 75 492
+Jul 24 34 67 436
+Aug 15 34 47 316
+Sep 13 55 37 277
+Oct 29 54 68 525
+Nov 20 87 82 577
+Dec 17 35 61 401
+
+Jan 21 36 64 620
+Feb 26 58 80 652
+Mar 24 75 70 495
+Apr 21 70 74 514
+@c endfile
+@end example
+
+The sample files are included in the @command{gawk} distribution,
+in the directory @file{awklib/eg/data}.
+
+@node Very Simple
+@section Some Simple Examples
+
+The following command runs a simple @command{awk} program that searches the
+input file @file{mail-list} for the character string @samp{li} (a
+grouping of characters is usually called a @dfn{string};
+the term @dfn{string} is based on similar usage in English, such
+as ``a string of pearls'' or ``a string of cars in a train''):
+
+@example
+awk '/li/ @{ print $0 @}' mail-list
+@end example
+
+@noindent
+When lines containing @samp{li} are found, they are printed because
+@w{@samp{print $0}} means print the current line. (Just @samp{print} by
+itself means the same thing, so we could have written that
+instead.)
+
+You will notice that slashes (@samp{/}) surround the string @samp{li}
+in the @command{awk} program. The slashes indicate that @samp{li}
+is the pattern to search for. This type of pattern is called a
+@dfn{regular expression}, which is covered in more detail later
+(@pxref{Regexp}).
+The pattern is allowed to match parts of words.
+There are
+single quotes around the @command{awk} program so that the shell won't
+interpret any of it as special shell characters.
+
+Here is what this program prints:
+
+@example
+$ @kbd{awk '/li/ @{ print $0 @}' mail-list}
+@print{} Amelia 555-5553 amelia.zodiacusque@@gmail.com F
+@print{} Broderick 555-0542 broderick.aliquotiens@@yahoo.com R
+@print{} Julie 555-6699 julie.perscrutabor@@skeeve.com F
+@print{} Samuel 555-3430 samuel.lanceolis@@shu.edu A
+@end example
+
+@cindex actions, default
+@cindex patterns, default
+In an @command{awk} rule, either the pattern or the action can be omitted,
+but not both. If the pattern is omitted, then the action is performed
+for @emph{every} input line. If the action is omitted, the default
+action is to print all lines that match the pattern.
+
+@cindex actions, empty
+Thus, we could leave out the action (the @code{print} statement and the
+braces) in the previous example and the result would be the same:
+@command{awk} prints all lines matching the pattern @samp{li}. By comparison,
+omitting the @code{print} statement but retaining the braces makes an
+empty action that does nothing (i.e., no lines are printed).
+
+@cindex @command{awk} programs, one-line examples
+Many practical @command{awk} programs are just a line or two long. Following is a
+collection of useful, short programs to get you started. Some of these
+programs contain constructs that haven't been covered yet. (The description
+of the program will give you a good idea of what is going on, but you'll
+need to read the rest of the @value{DOCUMENT} to become an @command{awk} expert!)
+Most of the examples use a @value{DF} named @file{data}. This is just a
+placeholder; if you use these programs yourself, substitute
+your own @value{FN}s for @file{data}.
+For future reference, note that there is often more than
+one way to do things in @command{awk}. At some point, you may want
+to look back at these examples and see if
+you can come up with different ways to do the same things shown here:
+
+@itemize @value{BULLET}
+@item
+Print every line that is longer than 80 characters:
+
+@example
+awk 'length($0) > 80' data
+@end example
+
+The sole rule has a relational expression as its pattern and has no
+action---so it uses the default action, printing the record.
+
+@item
+Print the length of the longest input line:
+
+@example
+awk '@{ if (length($0) > max) max = length($0) @}
+ END @{ print max @}' data
+@end example
+
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
+
+@cindex @command{expand} utility
+@item
+Print the length of the longest line in @file{data}:
+
+@example
+expand data | awk '@{ if (x < length($0)) x = length($0) @}
+ END @{ print "maximum line length is " x @}'
+@end example
+
+This example differs slightly from the previous one:
+the input is processed by the @command{expand} utility to change TABs
+into spaces, so the widths compared are actually the right-margin columns,
+as opposed to the number of input characters on each line.
+
+@item
+Print every line that has at least one field:
+
+@example
+awk 'NF > 0' data
+@end example
+
+This is an easy way to delete blank lines from a file (or rather, to
+create a new file similar to the old file but from which the blank lines
+have been removed).
+
+@item
+Print seven random numbers from 0 to 100, inclusive:
+
+@example
+awk 'BEGIN @{ for (i = 1; i <= 7; i++)
+ print int(101 * rand()) @}'
+@end example
+
+@item
+Print the total number of bytes used by @var{files}:
+
+@example
+ls -l @var{files} | awk '@{ x += $5 @}
+ END @{ print "total bytes: " x @}'
+@end example
+
+@item
+Print the total number of kilobytes used by @var{files}:
+
+@c Don't use \ continuation, not discussed yet
+@c Remember that awk does floating point division,
+@c no need for (x+1023) / 1024
+@example
+ls -l @var{files} | awk '@{ x += $5 @}
+ END @{ print "total K-bytes:", x / 1024 @}'
+@end example
+
+@item
+Print a sorted list of the login names of all users:
+
+@example
+awk -F: '@{ print $1 @}' /etc/passwd | sort
+@end example
+
+@item
+Count the lines in a file:
+
+@example
+awk 'END @{ print NR @}' data
+@end example
+
+@item
+Print the even-numbered lines in the @value{DF}:
+
+@example
+awk 'NR % 2 == 0' data
+@end example
+
+If you used the expression @samp{NR % 2 == 1} instead,
+the program would print the odd-numbered lines.
+@end itemize
+
+@node Two Rules
+@section An Example with Two Rules
+@cindex @command{awk} programs
+
+The @command{awk} utility reads the input files one line at a
+time. For each line, @command{awk} tries the patterns of each rule.
+If several patterns match, then several actions execute in the order in
+which they appear in the @command{awk} program. If no patterns match, then
+no actions run.
+
+After processing all the rules that match the line (and perhaps there are none),
+@command{awk} reads the next line. (However,
+@DBPXREF{Next Statement}
+@ifdocbook
+and @DBREF{Nextfile Statement}.)
+@end ifdocbook
+@ifnotdocbook
+and also @pxref{Nextfile Statement}.)
+@end ifnotdocbook
+This continues until the program reaches the end of the file.
+For example, the following @command{awk} program contains two rules:
+
+@example
+/12/ @{ print $0 @}
+/21/ @{ print $0 @}
+@end example
+
+@noindent
+The first rule has the string @samp{12} as the
+pattern and @samp{print $0} as the action. The second rule has the
+string @samp{21} as the pattern and also has @samp{print $0} as the
+action. Each rule's action is enclosed in its own pair of braces.
+
+This program prints every line that contains the string
+@samp{12} @emph{or} the string @samp{21}. If a line contains both
+strings, it is printed twice, once by each rule.
+
+This is what happens if we run this program on our two sample @value{DF}s,
+@file{mail-list} and @file{inventory-shipped}:
+
+@example
+$ @kbd{awk '/12/ @{ print $0 @}}
+> @kbd{/21/ @{ print $0 @}' mail-list inventory-shipped}
+@print{} Anthony 555-3412 anthony.asserturo@@hotmail.com A
+@print{} Camilla 555-2912 camilla.infusarum@@skynet.be R
+@print{} Fabius 555-1234 fabius.undevicesimus@@ucb.edu F
+@print{} Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
+@print{} Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
+@print{} Jan 21 36 64 620
+@print{} Apr 21 70 74 514
+@end example
+
+@noindent
+Note how the line beginning with @samp{Jean-Paul}
+in @file{mail-list} was printed twice, once for each rule.
+
+@node More Complex
+@section A More Complex Example
+
+Now that we've mastered some simple tasks, let's look at
+what typical @command{awk}
+programs do. This example shows how @command{awk} can be used to
+summarize, select, and rearrange the output of another utility. It uses
+features that haven't been covered yet, so don't worry if you don't
+understand all the details:
+
+@example
+ls -l | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
+@end example
+
+@cindex @command{ls} utility
+This command prints the total number of bytes in all the files in the
+current directory that were last modified in November (of any year).
+The @w{@samp{ls -l}} part of this example is a system command that gives
+you a listing of the files in a directory, including each file's size and the date
+the file was last modified. Its output looks like this:
+
+@example
+-rw-r--r-- 1 arnold user 1933 Nov 7 13:05 Makefile
+-rw-r--r-- 1 arnold user 10809 Nov 7 13:03 awk.h
+-rw-r--r-- 1 arnold user 983 Apr 13 12:14 awk.tab.h
+-rw-r--r-- 1 arnold user 31869 Jun 15 12:20 awkgram.y
+-rw-r--r-- 1 arnold user 22414 Nov 7 13:03 awk1.c
+-rw-r--r-- 1 arnold user 37455 Nov 7 13:03 awk2.c
+-rw-r--r-- 1 arnold user 27511 Dec 9 13:07 awk3.c
+-rw-r--r-- 1 arnold user 7989 Nov 7 13:03 awk4.c
+@end example
+
+@noindent
+@cindex line continuations, with C shell
+The first field contains read-write permissions, the second field contains
+the number of links to the file, and the third field identifies the file's owner.
+The fourth field identifies the file's group.
+The fifth field contains the file's size in bytes. The
+sixth, seventh, and eighth fields contain the month, day, and time,
+respectively, that the file was last modified. Finally, the ninth field
+contains the @value{FN}.
+
+@c @cindex automatic initialization
+@cindex initialization, automatic
+The @samp{$6 == "Nov"} in our @command{awk} program is an expression that
+tests whether the sixth field of the output from @w{@samp{ls -l}}
+matches the string @samp{Nov}. Each time a line has the string
+@samp{Nov} for its sixth field, @command{awk} performs the action
+@samp{sum += $5}. This adds the fifth field (the file's size) to the variable
+@code{sum}. As a result, when @command{awk} has finished reading all the
+input lines, @code{sum} is the total of the sizes of the files whose
+lines matched the pattern. (This works because @command{awk} variables
+are automatically initialized to zero.)
+
+After the last line of output from @command{ls} has been processed, the
+@code{END} rule executes and prints the value of @code{sum}.
+In this example, the value of @code{sum} is 80600.
+
+These more advanced @command{awk} techniques are covered in later sections
+(@pxref{Action Overview}). Before you can move on to more
+advanced @command{awk} programming, you have to know how @command{awk} interprets
+your input and displays your output. By manipulating fields and using
+@code{print} statements, you can produce some very useful and
+impressive-looking reports.
+
+@node Statements/Lines
+@section @command{awk} Statements Versus Lines
+@cindex line breaks
+@cindex newlines
+
+Most often, each line in an @command{awk} program is a separate statement or
+separate rule, like this:
+
+@example
+awk '/12/ @{ print $0 @}
+ /21/ @{ print $0 @}' mail-list inventory-shipped
+@end example
+
+@cindex @command{gawk}, newlines in
+However, @command{gawk} ignores newlines after any of the following
+symbols and keywords:
+
+@example
+, @{ ? : || && do else
+@end example
+
+@noindent
+A newline at any other point is considered the end of the
+statement.@footnote{The @samp{?} and @samp{:} referred to here is the
+three-operand conditional expression described in
+@ref{Conditional Exp}.
+Splitting lines after @samp{?} and @samp{:} is a minor @command{gawk}
+extension; if @option{--posix} is specified
+(@pxref{Options}), then this extension is disabled.}
+
+@cindex @code{\} (backslash), continuing lines and
+@cindex backslash (@code{\}), continuing lines and
+If you would like to split a single statement into two lines at a point
+where a newline would terminate it, you can @dfn{continue} it by ending the
+first line with a backslash character (@samp{\}). The backslash must be
+the final character on the line in order to be recognized as a continuation
+character. A backslash is allowed anywhere in the statement, even
+in the middle of a string or regular expression. For example:
+
+@example
+awk '/This regular expression is too long, so continue it\
+ on the next line/ @{ print $1 @}'
+@end example
+
+@noindent
+@cindex portability, backslash continuation and
+We have generally not used backslash continuation in our sample programs.
+@command{gawk} places no limit on the
+length of a line, so backslash continuation is never strictly necessary;
+it just makes programs more readable. For this same reason, as well as
+for clarity, we have kept most statements short in the programs
+presented throughout the @value{DOCUMENT}. Backslash continuation is
+most useful when your @command{awk} program is in a separate source file
+instead of entered from the command line. You should also note that
+many @command{awk} implementations are more particular about where you
+may use backslash continuation. For example, they may not allow you to
+split a string constant using backslash continuation. Thus, for maximum
+portability of your @command{awk} programs, it is best not to split your
+lines in the middle of a regular expression or a string.
+@c 10/2000: gawk, mawk, and current bell labs awk allow it,
+@c solaris 2.7 nawk does not. Solaris /usr/xpg4/bin/awk does though! sigh.
+
+@cindex @command{csh} utility
+@cindex backslash (@code{\}), continuing lines and, in @command{csh}
+@cindex @code{\} (backslash), continuing lines and, in @command{csh}
+@quotation CAUTION
+@emph{Backslash continuation does not work as described
+with the C shell.} It works for @command{awk} programs in files and
+for one-shot programs, @emph{provided} you are using a POSIX-compliant
+shell, such as the Unix Bourne shell or Bash. But the C shell behaves
+differently! There you must use two backslashes in a row, followed by
+a newline. Note also that when using the C shell, @emph{every} newline
+in your @command{awk} program must be escaped with a backslash. To illustrate:
+
+@example
+% @kbd{awk 'BEGIN @{ \}
+? @kbd{ print \\}
+? @kbd{ "hello, world" \}
+? @kbd{@}'}
+@print{} hello, world
+@end example
+
+@noindent
+Here, the @samp{%} and @samp{?} are the C shell's primary and secondary
+prompts, analogous to the standard shell's @samp{$} and @samp{>}.
+
+Compare the previous example to how it is done with a POSIX-compliant shell:
+
+@example
+$ @kbd{awk 'BEGIN @{}
+> @kbd{print \}
+> @kbd{"hello, world"}
+> @kbd{@}'}
+@print{} hello, world
+@end example
+@end quotation
+
+@command{awk} is a line-oriented language. Each rule's action has to
+begin on the same line as the pattern. To have the pattern and action
+on separate lines, you @emph{must} use backslash continuation; there
+is no other option.
+
+@cindex backslash (@code{\}), continuing lines and, comments and
+@cindex @code{\} (backslash), continuing lines and, comments and
+@cindex commenting, backslash continuation and
+Another thing to keep in mind is that backslash continuation and
+comments do not mix. As soon as @command{awk} sees the @samp{#} that
+starts a comment, it ignores @emph{everything} on the rest of the
+line. For example:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "dont panic" # a friendly \}
+> @kbd{ BEGIN rule}
+> @kbd{@}'}
+@error{} gawk: cmd. line:2: BEGIN rule
+@error{} gawk: cmd. line:2: ^ syntax error
+@end example
+
+@noindent
+In this case, it looks like the backslash would continue the comment onto the
+next line. However, the backslash-newline combination is never even
+noticed because it is ``hidden'' inside the comment. Thus, the
+@code{BEGIN} is noted as a syntax error.
+
+@cindex statements, multiple
+@cindex @code{;} (semicolon), separating statements in actions
+@cindex semicolon (@code{;}), separating statements in actions
+When @command{awk} statements within one rule are short, you might want to put
+more than one of them on a line. This is accomplished by separating the statements
+with a semicolon (@samp{;}).
+This also applies to the rules themselves.
+Thus, the program shown at the start of this @value{SECTION}
+could also be written this way:
+
+@example
+/12/ @{ print $0 @} ; /21/ @{ print $0 @}
+@end example
+
+@quotation NOTE
+The requirement that states that rules on the same line must be
+separated with a semicolon was not in the original @command{awk}
+language; it was added for consistency with the treatment of statements
+within an action.
+@end quotation
+
+@node Other Features
+@section Other Features of @command{awk}
+
+@cindex variables
+The @command{awk} language provides a number of predefined, or
+@dfn{built-in}, variables that your programs can use to get information
+from @command{awk}. There are other variables your program can set
+as well to control how @command{awk} processes your data.
+
+In addition, @command{awk} provides a number of built-in functions for doing
+common computational and string-related operations.
+@command{gawk} provides built-in functions for working with timestamps,
+performing bit manipulation, for runtime string translation (internationalization),
+determining the type of a variable,
+and array sorting.
+
+As we develop our presentation of the @command{awk} language, we will introduce
+most of the variables and many of the functions. They are described
+systematically in @DBREF{Built-in Variables} and in
+@ref{Built-in}.
+
+@node When
+@section When to Use @command{awk}
+
+@cindex @command{awk}, uses for
+Now that you've seen some of what @command{awk} can do,
+you might wonder how @command{awk} could be useful for you. By using
+utility programs, advanced patterns, field separators, arithmetic
+statements, and other selection criteria, you can produce much more
+complex output. The @command{awk} language is very useful for producing
+reports from large amounts of raw data, such as summarizing information
+from the output of other utility programs like @command{ls}.
+(@xref{More Complex}.)
+
+Programs written with @command{awk} are usually much smaller than they would
+be in other languages. This makes @command{awk} programs easy to compose and
+use. Often, @command{awk} programs can be quickly composed at your keyboard,
+used once, and thrown away. Because @command{awk} programs are interpreted, you
+can avoid the (usually lengthy) compilation part of the typical
+edit-compile-test-debug cycle of software development.
+
+@cindex Brian Kernighan's @command{awk}
+Complex programs have been written in @command{awk}, including a complete
+retargetable assembler for
+@ifclear FOR_PRINT
+eight-bit microprocessors (@pxref{Glossary}, for more information),
+@end ifclear
+@ifset FOR_PRINT
+eight-bit microprocessors,
+@end ifset
+and a microcode assembler for a special-purpose Prolog
+computer.
+The original @command{awk}'s capabilities were strained by tasks
+of such complexity, but modern versions are more capable.
+
+@cindex @command{awk} programs, complex
+If you find yourself writing @command{awk} scripts of more than, say,
+a few hundred lines, you might consider using a different programming
+language. The shell is good at string and pattern matching; in addition,
+it allows powerful use of the system utilities. Python offers a nice
+balance between high-level ease of programming and access to system
+facilities.@footnote{Other popular scripting languages include Ruby
+and Perl.}
+
+@node Intro Summary
+@section Summary
+
+@c FIXME: Review this chapter for summary of builtin functions called.
+@itemize @value{BULLET}
+@item
+Programs in @command{awk} consist of @var{pattern}--@var{action} pairs.
+
+@item
+An @var{action} without a @var{pattern} always runs. The default
+@var{action} for a pattern without one is @samp{@{ print $0 @}}.
+
+@item
+Use either
+@samp{awk '@var{program}' @var{files}}
+or
+@samp{awk -f @var{program-file} @var{files}}
+to run @command{awk}.
+
+@item
+You may use the special @samp{#!} header line to create @command{awk}
+programs that are directly executable.
+
+@item
+Comments in @command{awk} programs start with @samp{#} and continue to
+the end of the same line.
+
+@item
+Be aware of quoting issues when writing @command{awk} programs as
+part of a larger shell script (or MS-Windows batch file).
+
+@item
+You may use backslash continuation to continue a source line.
+Lines are automatically continued after
+a comma, open brace, question mark, colon,
+@samp{||}, @samp{&&}, @code{do}, and @code{else}.
+@end itemize
+
+@node Invoking Gawk
+@chapter Running @command{awk} and @command{gawk}
+
+This @value{CHAPTER} covers how to run @command{awk}, both POSIX-standard
+and @command{gawk}-specific command-line options, and what
+@command{awk} and
+@command{gawk} do with nonoption arguments.
+It then proceeds to cover how @command{gawk} searches for source files,
+reading standard input along with other files, @command{gawk}'s
+environment variables, @command{gawk}'s exit status, using include files,
+and obsolete and undocumented options and/or features.
+
+Many of the options and features described here are discussed in
+more detail later in the @value{DOCUMENT}; feel free to skip over
+things in this @value{CHAPTER} that don't interest you right now.
+
+@menu
+* Command Line:: How to run @command{awk}.
+* Options:: Command-line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* Naming Standard Input:: How to specify standard input with other
+ files.
+* Environment Variables:: The environment variables @command{gawk} uses.
+* Exit Status:: @command{gawk}'s exit status.
+* Include Files:: Including other files into your program.
+* Loading Shared Libraries:: Loading shared libraries into your program.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
+@end menu
+
+@node Command Line
+@section Invoking @command{awk}
+@cindex command line, invoking @command{awk} from
+@cindex @command{awk}, invoking
+@cindex arguments, command-line, invoking @command{awk}
+@cindex options, command-line, invoking @command{awk}
+
+There are two ways to run @command{awk}---with an explicit program or with
+one or more program files. Here are templates for both of them; items
+enclosed in [@dots{}] in these templates are optional:
+
+@display
+@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{}
+@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{}
+@end display
+
+@cindex GNU long options
+@cindex long options
+@cindex options, long
+In addition to traditional one-letter POSIX-style options, @command{gawk} also
+supports GNU long options.
+
+@cindex dark corner, invoking @command{awk}
+@cindex lint checking, empty programs
+It is possible to invoke @command{awk} with an empty program:
+
+@example
+awk '' datafile1 datafile2
+@end example
+
+@cindex @option{--lint} option
+@noindent
+Doing so makes little sense, though; @command{awk} exits
+silently when given an empty program.
+@value{DARKCORNER}
+If @option{--lint} has
+been specified on the command line, @command{gawk} issues a
+warning that the program is empty.
+
+@node Options
+@section Command-Line Options
+@cindex options, command-line
+@cindex command line, options
+@cindex GNU long options
+@cindex options, long
+
+Options begin with a dash and consist of a single character.
+GNU-style long options consist of two dashes and a keyword.
+The keyword can be abbreviated, as long as the abbreviation allows the option
+to be uniquely identified. If the option takes an argument, either the
+keyword is immediately followed by an equals sign (@samp{=}) and the
+argument's value, or the keyword and the argument's value are separated
+by whitespace.
+If a particular option with a value is given more than once, it is the
+last value that counts.
+
+@cindex POSIX @command{awk}, GNU long options and
+Each long option for @command{gawk} has a corresponding
+POSIX-style short option.
+The long and short options are
+interchangeable in all contexts.
+The following list describes options mandated by the POSIX standard:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+@cindex @option{-F} option
+@cindex @option{--field-separator} option
+@cindex @code{FS} variable, @code{--field-separator} option and
+Set the @code{FS} variable to @var{fs}
+(@pxref{Field Separators}).
+
+@item -f @var{source-file}
+@itemx --file @var{source-file}
+@cindex @option{-f} option
+@cindex @option{--file} option
+@cindex @command{awk} programs, location of
+Read the @command{awk} program source from @var{source-file}
+instead of in the first nonoption argument.
+This option may be given multiple times; the @command{awk}
+program consists of the concatenation of the contents of
+each specified @var{source-file}.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+@cindex @option{-v} option
+@cindex @option{--assign} option
+@cindex variables, setting
+Set the variable @var{var} to the value @var{val} @emph{before}
+execution of the program begins. Such variable values are available
+inside the @code{BEGIN} rule
+(@pxref{Other Arguments}).
+
+The @option{-v} option can only set one variable, but it can be used
+more than once, setting another variable each time, like this:
+@samp{awk @w{-v foo=1} @w{-v bar=2} @dots{}}.
+
+@cindex predefined variables, @code{-v} option@comma{} setting with
+@cindex variables, predefined @code{-v} option@comma{} setting with
+@quotation CAUTION
+Using @option{-v} to set the values of the built-in
+variables may lead to surprising results. @command{awk} will reset the
+values of those variables as it needs to, possibly ignoring any
+initial value you may have given.
+@end quotation
+
+@item -W @var{gawk-opt}
+@cindex @option{-W} option
+Provide an implementation-specific option.
+This is the POSIX convention for providing implementation-specific options.
+These options
+also have corresponding GNU-style long options.
+Note that the long options may be abbreviated, as long as
+the abbreviations remain unique.
+The full list of @command{gawk}-specific options is provided next.
+
+@item --
+@cindex command line, options, end of
+@cindex options, command-line, end of
+Signal the end of the command-line options. The following arguments
+are not treated as options even if they begin with @samp{-}. This
+interpretation of @option{--} follows the POSIX argument parsing
+conventions.
+
+@cindex @code{-} (hyphen), filenames beginning with
+@cindex hyphen (@code{-}), filenames beginning with
+This is useful if you have @value{FN}s that start with @samp{-},
+or in shell scripts, if you have @value{FN}s that will be specified
+by the user that could start with @samp{-}.
+It is also useful for passing options on to the @command{awk}
+program; see @ref{Getopt Function}.
+@end table
+
+The following list describes @command{gawk}-specific options:
+
+@c Have to use @asis here to get docbook to come out right.
+@table @asis
+@item @option{-b}
+@itemx @option{--characters-as-bytes}
+@cindex @option{-b} option
+@cindex @option{--characters-as-bytes} option
+Cause @command{gawk} to treat all input data as single-byte characters.
+In addition, all output written with @code{print} or @code{printf}
+is treated as single-byte characters.
+
+Normally, @command{gawk} follows the POSIX standard and attempts to process
+its input data according to the current locale (@pxref{Locales}). This can often involve
+converting multibyte characters into wide characters (internally), and
+can lead to problems or confusion if the input data does not contain valid
+multibyte characters. This option is an easy way to tell @command{gawk},
+``Hands off my data!''
+
+@item @option{-c}
+@itemx @option{--traditional}
+@cindex @option{-c} option
+@cindex @option{--traditional} option
+@cindex compatibility mode (@command{gawk}), specifying
+Specify @dfn{compatibility mode}, in which the GNU extensions to
+the @command{awk} language are disabled, so that @command{gawk} behaves just
+like BWK @command{awk}.
+@xref{POSIX/GNU},
+which summarizes the extensions.
+@ifclear FOR_PRINT
+Also see
+@ref{Compatibility Mode}.
+@end ifclear
+
+@item @option{-C}
+@itemx @option{--copyright}
+@cindex @option{-C} option
+@cindex @option{--copyright} option
+@cindex GPL (General Public License), printing
+Print the short version of the General Public License and then exit.
+
+@item @option{-d}[@var{file}]
+@itemx @option{--dump-variables}[@code{=}@var{file}]
+@cindex @option{-d} option
+@cindex @option{--dump-variables} option
+@cindex dump all variables of a program
+@cindex @file{awkvars.out} file
+@cindex files, @file{awkvars.out}
+@cindex variables, global, printing list of
+Print a sorted list of global variables, their types, and final values
+to @var{file}. If no @var{file} is provided, print this
+list to a file named @file{awkvars.out} in the current directory.
+No space is allowed between the @option{-d} and @var{file}, if
+@var{file} is supplied.
+
+@cindex troubleshooting, typographical errors@comma{} global variables
+Having a list of all global variables is a good way to look for
+typographical errors in your programs.
+You would also use this option if you have a large program with a lot of
+functions, and you want to be sure that your functions don't
+inadvertently use global variables that you meant to be local.
+(This is a particularly easy mistake to make with simple variable
+names like @code{i}, @code{j}, etc.)
+
+@item @option{-D}[@var{file}]
+@itemx @option{--debug}[@code{=}@var{file}]
+@cindex @option{-D} option
+@cindex @option{--debug} option
+@cindex @command{awk} debugging, enabling
+Enable debugging of @command{awk} programs
+(@pxref{Debugging}).
+By default, the debugger reads commands interactively from the keyboard
+(standard input).
+The optional @var{file} argument allows you to specify a file with a list
+of commands for the debugger to execute noninteractively.
+No space is allowed between the @option{-D} and @var{file}, if
+@var{file} is supplied.
+
+@item @option{-e} @var{program-text}
+@itemx @option{--source} @var{program-text}
+@cindex @option{-e} option
+@cindex @option{--source} option
+@cindex source code, mixing
+Provide program source code in the @var{program-text}.
+This option allows you to mix source code in files with source
+code that you enter on the command line.
+This is particularly useful
+when you have library functions that you want to use from your command-line
+programs (@pxref{AWKPATH Variable}).
+
+@item @option{-E} @var{file}
+@itemx @option{--exec} @var{file}
+@cindex @option{-E} option
+@cindex @option{--exec} option
+@cindex @command{awk} programs, location of
+@cindex CGI, @command{awk} scripts for
+Similar to @option{-f}, read @command{awk} program text from @var{file}.
+There are two differences from @option{-f}:
+
+@itemize @value{BULLET}
+@item
+This option terminates option processing; anything
+else on the command line is passed on directly to the @command{awk} program.
+
+@item
+Command-line variable assignments of the form
+@samp{@var{var}=@var{value}} are disallowed.
+@end itemize
+
+This option is particularly necessary for World Wide Web CGI applications
+that pass arguments through the URL; using this option prevents a malicious
+(or other) user from passing in options, assignments, or @command{awk} source
+code (via @option{-e}) to the CGI application.@footnote{For more detail,
+please see Section 4.4 of @uref{http://www.ietf.org/rfc/rfc3875,
+RFC 3875}. Also see the
+@uref{http://lists.gnu.org/archive/html/bug-gawk/2014-11/msg00022.html,
+explanatory note sent to the @command{gawk} bug
+mailing list}.}
+This option should be used
+with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
+
+@example
+#! /usr/local/bin/gawk -E
+
+@var{awk program here @dots{}}
+@end example
+
+@item @option{-g}
+@itemx @option{--gen-pot}
+@cindex @option{-g} option
+@cindex @option{--gen-pot} option
+@cindex portable object files, generating
+@cindex files, portable object, generating
+Analyze the source program and
+generate a GNU @command{gettext} portable object template file on standard
+output for all string constants that have been marked for translation.
+@xref{Internationalization},
+for information about this option.
+
+@item @option{-h}
+@itemx @option{--help}
+@cindex @option{-h} option
+@cindex @option{--help} option
+@cindex GNU long options, printing list of
+@cindex options, printing list of
+@cindex printing, list of options
+Print a ``usage'' message summarizing the short- and long-style options
+that @command{gawk} accepts and then exit.
+
+@item @option{-i} @var{source-file}
+@itemx @option{--include} @var{source-file}
+@cindex @option{-i} option
+@cindex @option{--include} option
+@cindex @command{awk} programs, location of
+Read an @command{awk} source library from @var{source-file}. This option
+is completely equivalent to using the @code{@@include} directive inside
+your program. It is very similar to the @option{-f} option,
+but there are two important differences. First, when @option{-i} is
+used, the program source is not loaded if it has been previously
+loaded, whereas with @option{-f}, @command{gawk} always loads the file.
+Second, because this option is intended to be used with code libraries,
+@command{gawk} does not recognize such files as constituting main program
+input. Thus, after processing an @option{-i} argument, @command{gawk}
+still expects to find the main source code via the @option{-f} option
+or on the command line.
+
+@item @option{-l} @var{ext}
+@itemx @option{--load} @var{ext}
+@cindex @option{-l} option
+@cindex @option{--load} option
+@cindex loading, extensions
+Load a dynamic extension named @var{ext}. Extensions
+are stored as system shared libraries.
+This option searches for the library using the @env{AWKLIBPATH}
+environment variable. The correct library suffix for your platform will be
+supplied by default, so it need not be specified in the extension name.
+The extension initialization routine should be named @code{dl_load()}.
+An alternative is to use the @code{@@load} keyword inside the program to load
+a shared library. This advanced feature is described in detail in @ref{Dynamic Extensions}.
+
+@item @option{-L}[@var{value}]
+@itemx @option{--lint}[@code{=}@var{value}]
+@cindex @option{-l} option
+@cindex @option{--lint} option
+@cindex lint checking, issuing warnings
+@cindex warnings, issuing
+Warn about constructs that are dubious or nonportable to
+other @command{awk} implementations.
+No space is allowed between the @option{-L} and @var{value}, if
+@var{value} is supplied.
+Some warnings are issued when @command{gawk} first reads your program. Others
+are issued at runtime, as your program executes.
+With an optional argument of @samp{fatal},
+lint warnings become fatal errors.
+This may be drastic, but its use will certainly encourage the
+development of cleaner @command{awk} programs.
+With an optional argument of @samp{invalid}, only warnings about things
+that are actually invalid are issued. (This is not fully implemented yet.)
+
+Some warnings are only printed once, even if the dubious constructs they
+warn about occur multiple times in your @command{awk} program. Thus,
+when eliminating problems pointed out by @option{--lint}, you should take
+care to search for all occurrences of each inappropriate construct. As
+@command{awk} programs are usually short, doing so is not burdensome.
+
+@item @option{-M}
+@itemx @option{--bignum}
+@cindex @option{-M} option
+@cindex @option{--bignum} option
+Force arbitrary-precision arithmetic on numbers. This option has no effect
+if @command{gawk} is not compiled to use the GNU MPFR and MP libraries
+(@pxref{Arbitrary Precision Arithmetic}).
+
+@item @option{-n}
+@itemx @option{--non-decimal-data}
+@cindex @option{-n} option
+@cindex @option{--non-decimal-data} option
+@cindex hexadecimal values@comma{} enabling interpretation of
+@cindex octal values@comma{} enabling interpretation of
+@cindex troubleshooting, @code{--non-decimal-data} option
+Enable automatic interpretation of octal and hexadecimal
+values in input data
+(@pxref{Nondecimal Data}).
+
+@quotation CAUTION
+This option can severely break old programs. Use with care. Also note
+that this option may disappear in a future version of @command{gawk}.
+@end quotation
+
+@item @option{-N}
+@itemx @option{--use-lc-numeric}
+@cindex @option{-N} option
+@cindex @option{--use-lc-numeric} option
+Force the use of the locale's decimal point character
+when parsing numeric input data (@pxref{Locales}).
+
+@item @option{-o}[@var{file}]
+@itemx @option{--pretty-print}[@code{=}@var{file}]
+@cindex @option{-o} option
+@cindex @option{--pretty-print} option
+Enable pretty-printing of @command{awk} programs.
+By default, the output program is created in a file named @file{awkprof.out}
+(@pxref{Profiling}).
+The optional @var{file} argument allows you to specify a different
+@value{FN} for the output.
+No space is allowed between the @option{-o} and @var{file}, if
+@var{file} is supplied.
+
+@quotation NOTE
+In the past, this option would also execute your program.
+This is no longer the case.
+@end quotation
+
+@item @option{-O}
+@itemx @option{--optimize}
+@cindex @option{--optimize} option
+@cindex @option{-O} option
+Enable some optimizations on the internal representation of the program.
+At the moment, this includes just simple constant folding.
+
+@item @option{-p}[@var{file}]
+@itemx @option{--profile}[@code{=}@var{file}]
+@cindex @option{-p} option
+@cindex @option{--profile} option
+@cindex @command{awk} profiling, enabling
+Enable profiling of @command{awk} programs
+(@pxref{Profiling}).
+By default, profiles are created in a file named @file{awkprof.out}.
+The optional @var{file} argument allows you to specify a different
+@value{FN} for the profile file.
+No space is allowed between the @option{-p} and @var{file}, if
+@var{file} is supplied.
+
+The profile contains execution counts for each statement in the program
+in the left margin, and function call counts for each function.
+
+@item @option{-P}
+@itemx @option{--posix}
+@cindex @option{-P} option
+@cindex @option{--posix} option
+@cindex POSIX mode
+@cindex @command{gawk}, extensions@comma{} disabling
+Operate in strict POSIX mode. This disables all @command{gawk}
+extensions (just like @option{--traditional}) and
+disables all extensions not allowed by POSIX.
+@DBXREF{Common Extensions} for a summary of the extensions
+in @command{gawk} that are disabled by this option.
+Also,
+the following additional
+restrictions apply:
+
+@itemize @value{BULLET}
+
+@cindex newlines
+@cindex whitespace, newlines as
+@item
+Newlines do not act as whitespace to separate fields when @code{FS} is
+equal to a single space
+(@pxref{Fields}).
+
+@item
+Newlines are not allowed after @samp{?} or @samp{:}
+(@pxref{Conditional Exp}).
+
+
+@cindex @code{FS} variable, as TAB character
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single TAB character
+(@pxref{Field Separators}).
+
+@cindex locale decimal point character
+@cindex decimal point character, locale specific
+@item
+The locale's decimal point character is used for parsing input
+data (@pxref{Locales}).
+@end itemize
+
+@c @cindex automatic warnings
+@c @cindex warnings, automatic
+@cindex @option{--traditional} option, @code{--posix} option and
+@cindex @option{--posix} option, @code{--traditional} option and
+If you supply both @option{--traditional} and @option{--posix} on the
+command line, @option{--posix} takes precedence. @command{gawk}
+issues a warning if both options are supplied.
+
+@item @option{-r}
+@itemx @option{--re-interval}
+@cindex @option{-r} option
+@cindex @option{--re-interval} option
+@cindex regular expressions, interval expressions and
+Allow interval expressions
+(@pxref{Regexp Operators})
+in regexps.
+This is now @command{gawk}'s default behavior.
+Nevertheless, this option remains (both for backward compatibility
+and for use in combination with @option{--traditional}).
+
+@item @option{-S}
+@itemx @option{--sandbox}
+@cindex @option{-S} option
+@cindex @option{--sandbox} option
+@cindex sandbox mode
+Disable the @code{system()} function,
+input redirections with @code{getline},
+output redirections with @code{print} and @code{printf},
+and dynamic extensions.
+This is particularly useful when you want to run @command{awk} scripts
+from questionable sources and need to make sure the scripts
+can't access your system (other than the specified input @value{DF}).
+
+@item @option{-t}
+@itemx @option{--lint-old}
+@cindex @option{-L} option
+@cindex @option{--lint-old} option
+Warn about constructs that are not available in the original version of
+@command{awk} from Version 7 Unix
+(@pxref{V7/SVR3.1}).
+
+@item @option{-V}
+@itemx @option{--version}
+@cindex @option{-V} option
+@cindex @option{--version} option
+@cindex @command{gawk}, versions of, information about@comma{} printing
+Print version information for this particular copy of @command{gawk}.
+This allows you to determine if your copy of @command{gawk} is up to date
+with respect to whatever the Free Software Foundation is currently
+distributing.
+It is also useful for bug reports
+(@pxref{Bugs}).
+@end table
+
+As long as program text has been supplied,
+any other options are flagged as invalid with a warning message but
+are otherwise ignored.
+
+@cindex @option{-F} option, @option{-Ft} sets @code{FS} to TAB
+In compatibility mode, as a special case, if the value of @var{fs} supplied
+to the @option{-F} option is @samp{t}, then @code{FS} is set to the TAB
+character (@code{"\t"}). This is true only for @option{--traditional} and not
+for @option{--posix}
+(@pxref{Field Separators}).
+
+@cindex @option{-f} option, multiple uses
+The @option{-f} option may be used more than once on the command line.
+If it is, @command{awk} reads its program source from all of the named files, as
+if they had been concatenated together into one big file. This is
+useful for creating libraries of @command{awk} functions. These functions
+can be written once and then retrieved from a standard place, instead
+of having to be included in each individual program.
+The @option{-i} option is similar in this regard.
+(As mentioned in
+@ref{Definition Syntax},
+function names must be unique.)
+
+With standard @command{awk}, library functions can still be used, even
+if the program is entered at the keyboard,
+by specifying @samp{-f /dev/tty}. After typing your program,
+type @kbd{Ctrl-d} (the end-of-file character) to terminate it.
+(You may also use @samp{-f -} to read program source from the standard
+input, but then you will not be able to also use the standard input as a
+source of data.)
+
+Because it is clumsy using the standard @command{awk} mechanisms to mix
+source file and command-line @command{awk} programs, @command{gawk}
+provides the @option{-e} option. This does not require you to
+preempt the standard input for your source code; it allows you to easily
+mix command-line and library source code (@pxref{AWKPATH Variable}).
+As with @option{-f}, the @option{-e} and @option{-i}
+options may also be used multiple times on the command line.
+
+@cindex @option{-e} option
+If no @option{-f} or @option{-e} option is specified, then @command{gawk}
+uses the first nonoption command-line argument as the text of the
+program source code.
+
+@cindex @env{POSIXLY_CORRECT} environment variable
+@cindex lint checking, @env{POSIXLY_CORRECT} environment variable
+@cindex POSIX mode
+If the environment variable @env{POSIXLY_CORRECT} exists,
+then @command{gawk} behaves in strict POSIX mode, exactly as if
+you had supplied @option{--posix}.
+Many GNU programs look for this environment variable to suppress
+extensions that conflict with POSIX, but @command{gawk} behaves
+differently: it suppresses all extensions, even those that do not
+conflict with POSIX, and behaves in
+strict POSIX mode. If @option{--lint} is supplied on the command line
+and @command{gawk} turns on POSIX mode because of @env{POSIXLY_CORRECT},
+then it issues a warning message indicating that POSIX
+mode is in effect.
+You would typically set this variable in your shell's startup file.
+For a Bourne-compatible shell (such as Bash), you would add these
+lines to the @file{.profile} file in your home directory:
+
+@example
+POSIXLY_CORRECT=true
+export POSIXLY_CORRECT
+@end example
+
+@cindex @command{csh} utility, @env{POSIXLY_CORRECT} environment variable
+For a C shell-compatible
+shell,@footnote{Not recommended.}
+you would add this line to the @file{.login} file in your home directory:
+
+@example
+setenv POSIXLY_CORRECT true
+@end example
+
+@cindex portability, @env{POSIXLY_CORRECT} environment variable
+Having @env{POSIXLY_CORRECT} set is not recommended for daily use,
+but it is good for testing the portability of your programs to other
+environments.
+
+@node Other Arguments
+@section Other Command-Line Arguments
+@cindex command line, arguments
+@cindex arguments, command-line
+
+Any additional arguments on the command line are normally treated as
+input files to be processed in the order specified. However, an
+argument that has the form @code{@var{var}=@var{value}}, assigns
+the value @var{value} to the variable @var{var}---it does not specify a
+file at all. (See @ref{Assignment Options}.) In the following example,
+@var{count=1} is a variable assignment, not a @value{FN}:
+
+@example
+awk -f program.awk file1 count=1 file2
+@end example
+
+@cindex @command{gawk}, @code{ARGIND} variable in
+@cindex @code{ARGIND} variable, command-line arguments
+@cindex @code{ARGV} array, indexing into
+@cindex @code{ARGC}/@code{ARGV} variables, command-line arguments
+All the command-line arguments are made available to your @command{awk} program in the
+@code{ARGV} array (@pxref{Built-in Variables}). Command-line options
+and the program text (if present) are omitted from @code{ARGV}.
+All other arguments, including variable assignments, are
+included. As each element of @code{ARGV} is processed, @command{gawk}
+sets @code{ARGIND} to the index in @code{ARGV} of the
+current element.
+
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
+@cindex input files, variable assignments and
+@cindex variable assignments and input files
+The distinction between @value{FN} arguments and variable-assignment
+arguments is made when @command{awk} is about to open the next input file.
+At that point in execution, it checks the @value{FN} to see whether
+it is really a variable assignment; if so, @command{awk} sets the variable
+instead of reading a file.
+
+Therefore, the variables actually receive the given values after all
+previously specified files have been read. In particular, the values of
+variables assigned in this fashion are @emph{not} available inside a
+@code{BEGIN} rule
+(@pxref{BEGIN/END}),
+because such rules are run before @command{awk} begins scanning the argument list.
+
+@cindex dark corner, escape sequences
+The variable values given on the command line are processed for escape
+sequences (@pxref{Escape Sequences}).
+@value{DARKCORNER}
+
+In some very early implementations of @command{awk}, when a variable assignment
+occurred before any @value{FN}s, the assignment would happen @emph{before}
+the @code{BEGIN} rule was executed. @command{awk}'s behavior was thus
+inconsistent; some command-line assignments were available inside the
+@code{BEGIN} rule, while others were not. Unfortunately,
+some applications came to depend
+upon this ``feature.'' When @command{awk} was changed to be more consistent,
+the @option{-v} option was added to accommodate applications that depended
+upon the old behavior.
+
+The variable assignment feature is most useful for assigning to variables
+such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
+output formats, before scanning the @value{DF}s. It is also useful for
+controlling state if multiple passes are needed over a @value{DF}. For
+example:
+
+@cindex files, multiple passes over
+@example
+awk 'pass == 1 @{ @var{pass 1 stuff} @}
+ pass == 2 @{ @var{pass 2 stuff} @}' pass=1 mydata pass=2 mydata
+@end example
+
+Given the variable assignment feature, the @option{-F} option for setting
+the value of @code{FS} is not
+strictly necessary. It remains for historical compatibility.
+
+@node Naming Standard Input
+@section Naming Standard Input
+
+Often, you may wish to read standard input together with other files.
+For example, you may wish to read one file, read standard input coming
+from a pipe, and then read another file.
+
+The way to name the standard input, with all versions of @command{awk},
+is to use a single, standalone minus sign or dash, @samp{-}. For example:
+
+@example
+@var{some_command} | awk -f myprog.awk file1 - file2
+@end example
+
+@noindent
+Here, @command{awk} first reads @file{file1}, then it reads
+the output of @var{some_command}, and finally it reads
+@file{file2}.
+
+You may also use @code{"-"} to name standard input when reading
+files with @code{getline} (@pxref{Getline/File}).
+
+In addition, @command{gawk} allows you to specify the special
+@value{FN} @file{/dev/stdin}, both on the command line and
+with @code{getline}.
+Some other versions of @command{awk} also support this, but it
+is not standard.
+(Some operating systems provide a @file{/dev/stdin} file
+in the filesystem; however, @command{gawk} always processes
+this @value{FN} itself.)
+
+@node Environment Variables
+@section The Environment Variables @command{gawk} Uses
+@cindex environment variables used by @command{gawk}
+
+A number of environment variables influence how @command{gawk}
+behaves.
+
+@menu
+* AWKPATH Variable:: Searching directories for @command{awk}
+ programs.
+* AWKLIBPATH Variable:: Searching directories for @command{awk} shared
+ libraries.
+* Other Environment Variables:: The environment variables.
+@end menu
+
+@node AWKPATH Variable
+@subsection The @env{AWKPATH} Environment Variable
+@cindex @env{AWKPATH} environment variable
+@cindex directories, searching for source files
+@cindex search paths, for source files
+@cindex differences in @command{awk} and @command{gawk}, @env{AWKPATH} environment variable
+@ifinfo
+The previous @value{SECTION} described how @command{awk} program files can be named
+on the command line with the @option{-f} option.
+@end ifinfo
+In most @command{awk}
+implementations, you must supply a precise pathname for each program
+file, unless the file is in the current directory.
+But with @command{gawk}, if the @value{FN} supplied to the @option{-f}
+or @option{-i} options
+does not contain a directory separator @samp{/}, then @command{gawk} searches a list of
+directories (called the @dfn{search path}) one by one, looking for a
+file with the specified name.
+
+The search path is a string consisting of directory names
+separated by colons.@footnote{Semicolons on MS-Windows and MS-DOS.}
+@command{gawk} gets its search path from the
+@env{AWKPATH} environment variable. If that variable does not exist,
+or if it has an empty value,
+@command{gawk} uses a default path (described shortly).
+
+The search path feature is particularly helpful for building libraries
+of useful @command{awk} functions. The library files can be placed in a
+standard directory in the default path and then specified on
+the command line with a short @value{FN}. Otherwise, you would have to
+type the full @value{FN} for each file.
+
+By using the @option{-i} or @option{-f} options, your command-line
+@command{awk} programs can use facilities in @command{awk} library files
+(@pxref{Library Functions}).
+Path searching is not done if @command{gawk} is in compatibility mode.
+This is true for both @option{--traditional} and @option{--posix}.
+@xref{Options}.
+
+If the source code file is not found after the initial search, the path is searched
+again after adding the suffix @samp{.awk} to the @value{FN}.
+
+@command{gawk}'s path search mechanism is similar
+to the shell's.
+(See @uref{http://www.gnu.org/software/bash/manual/,
+@cite{The Bourne-Again SHell manual}}.)
+It treats a null entry in the path as indicating the current
+directory.
+(A null entry is indicated by starting or ending the path with a
+colon or by placing two colons next to each other [@samp{::}].)
+
+@quotation NOTE
+To include the current directory in the path, either place @file{.}
+as an entry in the path or write a null entry in the path.
+
+Different past versions of @command{gawk} would also look explicitly in
+the current directory, either before or after the path search. As of
+@value{PVERSION} 4.1.2, this no longer happens; if you wish to look
+in the current directory, you must include @file{.} either as a separate
+entry or as a null entry in the search path.
+@end quotation
+
+The default value for @env{AWKPATH} is
+@samp{.:/usr/local/share/awk}.@footnote{Your version of @command{gawk}
+may use a different directory; it
+will depend upon how @command{gawk} was built and installed. The actual
+directory is the value of @code{$(datadir)} generated when
+@command{gawk} was configured. You probably don't need to worry about this,
+though.} Since @file{.} is included at the beginning, @command{gawk}
+searches first in the current directory and then in @file{/usr/local/share/awk}.
+In practice, this means that you will rarely need to change the
+value of @env{AWKPATH}.
+
+@xref{Shell Startup Files}, for information on functions that help to
+manipulate the @env{AWKPATH} variable.
+
+@command{gawk} places the value of the search path that it used into
+@code{ENVIRON["AWKPATH"]}. This provides access to the actual search
+path value from within an @command{awk} program.
+
+Although you can change @code{ENVIRON["AWKPATH"]} within your @command{awk}
+program, this has no effect on the running program's behavior. This makes
+sense: the @env{AWKPATH} environment variable is used to find the program
+source files. Once your program is running, all the files have been
+found, and @command{gawk} no longer needs to use @env{AWKPATH}.
+
+@node AWKLIBPATH Variable
+@subsection The @env{AWKLIBPATH} Environment Variable
+@cindex @env{AWKLIBPATH} environment variable
+@cindex directories, searching for loadable extensions
+@cindex search paths, for loadable extensions
+@cindex differences in @command{awk} and @command{gawk}, @code{AWKLIBPATH} environment variable
+
+The @env{AWKLIBPATH} environment variable is similar to the @env{AWKPATH}
+variable, but it is used to search for loadable extensions (stored as
+system shared libraries) specified with the @option{-l} option rather
+than for source files. If the extension is not found, the path is
+searched again after adding the appropriate shared library suffix for
+the platform. For example, on GNU/Linux systems, the suffix @samp{.so}
+is used. The search path specified is also used for extensions loaded
+via the @code{@@load} keyword (@pxref{Loading Shared Libraries}).
+
+If @env{AWKLIBPATH} does not exist in the environment, or if it has
+an empty value, @command{gawk} uses a default path; this
+is typically @samp{/usr/local/lib/gawk}, although it can vary depending
+upon how @command{gawk} was built.
+
+@xref{Shell Startup Files}, for information on functions that help to
+manipulate the @env{AWKLIBPATH} variable.
+
+@command{gawk} places the value of the search path that it used into
+@code{ENVIRON["AWKLIBPATH"]}. This provides access to the actual search
+path value from within an @command{awk} program.
+
+@node Other Environment Variables
+@subsection Other Environment Variables
+
+A number of other environment variables affect @command{gawk}'s
+behavior, but they are more specialized. Those in the following
+list are meant to be used by regular users:
+
+@table @env
+@item GAWK_MSEC_SLEEP
+Specifies the interval between connection retries,
+in milliseconds. On systems that do not support
+the @code{usleep()} system call,
+the value is rounded up to an integral number of seconds.
+
+@item GAWK_READ_TIMEOUT
+Specifies the time, in milliseconds, for @command{gawk} to
+wait for input before returning with an error.
+@xref{Read Timeout}.
+
+@item GAWK_SOCK_RETRIES
+Controls the number of times @command{gawk} attempts to
+retry a two-way TCP/IP (socket) connection before giving up.
+@xref{TCP/IP Networking}.
+
+@item POSIXLY_CORRECT
+Causes @command{gawk} to switch to POSIX-compatibility
+mode, disabling all traditional and GNU extensions.
+@xref{Options}.
+@end table
+
+The environment variables in the following list are meant
+for use by the @command{gawk} developers for testing and tuning.
+They are subject to change. The variables are:
+
+@table @env
+@item AWKBUFSIZE
+This variable only affects @command{gawk} on POSIX-compliant systems.
+With a value of @samp{exact}, @command{gawk} uses the size of each input
+file as the size of the memory buffer to allocate for I/O. Otherwise,
+the value should be a number, and @command{gawk} uses that number as
+the size of the buffer to allocate. (When this variable is not set,
+@command{gawk} uses the smaller of the file's size and the ``default''
+blocksize, which is usually the filesystem's I/O blocksize.)
+
+@item AWK_HASH
+If this variable exists with a value of @samp{gst}, @command{gawk}
+switches to using the hash function from GNU Smalltalk for
+managing arrays.
+This function may be marginally faster than the standard function.
+
+@item AWKREADFUNC
+If this variable exists, @command{gawk} switches to reading source
+files one line at a time, instead of reading in blocks. This exists
+for debugging problems on filesystems on non-POSIX operating systems
+where I/O is performed in records, not in blocks.
+
+@item GAWK_MSG_SRC
+If this variable exists, @command{gawk} includes the @value{FN}
+and line number within the @command{gawk} source code
+from which warning and/or fatal messages
+are generated. Its purpose is to help isolate the source of a
+message, as there are multiple places that produce the
+same warning or error message.
+
+@item GAWK_NO_DFA
+If this variable exists, @command{gawk} does not use the DFA regexp matcher
+for ``does it match'' kinds of tests. This can cause @command{gawk}
+to be slower. Its purpose is to help isolate differences between the
+two regexp matchers that @command{gawk} uses internally. (There aren't
+supposed to be differences, but occasionally theory and practice don't
+coordinate with each other.)
+
+@item GAWK_STACKSIZE
+This specifies the amount by which @command{gawk} should grow its
+internal evaluation stack, when needed.
+
+@item INT_CHAIN_MAX
+This specifies intended maximum number of items @command{gawk} will maintain on a
+hash chain for managing arrays indexed by integers.
+
+@item STR_CHAIN_MAX
+This specifies intended maximum number of items @command{gawk} will maintain on a
+hash chain for managing arrays indexed by strings.
+
+@item TIDYMEM
+If this variable exists, @command{gawk} uses the @code{mtrace()} library
+calls from the GNU C library to help track down possible memory leaks.
+@end table
+
+@node Exit Status
+@section @command{gawk}'s Exit Status
+
+@cindex exit status, of @command{gawk}
+If the @code{exit} statement is used with a value
+(@pxref{Exit Statement}), then @command{gawk} exits with
+the numeric value given to it.
+
+Otherwise, if there were no problems during execution,
+@command{gawk} exits with the value of the C constant
+@code{EXIT_SUCCESS}. This is usually zero.
+
+If an error occurs, @command{gawk} exits with the value of
+the C constant @code{EXIT_FAILURE}. This is usually one.
+
+If @command{gawk} exits because of a fatal error, the exit
+status is two. On non-POSIX systems, this value may be mapped
+to @code{EXIT_FAILURE}.
+
+@node Include Files
+@section Including Other Files into Your Program
+
+@c Panos Papadopoulos <panos1962@gmail.com> contributed the original
+@c text for this section.
+
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+@cindex @code{@@include} directive
+@cindex file inclusion, @code{@@include} directive
+@cindex including files, @code{@@include} directive
+The @code{@@include} keyword can be used to read external @command{awk} source
+files. This gives you the ability to split large @command{awk} source files
+into smaller, more manageable pieces, and also lets you reuse common @command{awk}
+code from various @command{awk} scripts. In other words, you can group
+together @command{awk} functions used to carry out specific tasks
+into external files. These files can be used just like function libraries,
+using the @code{@@include} keyword in conjunction with the @env{AWKPATH}
+environment variable. Note that source files may also be included
+using the @option{-i} option.
+
+Let's see an example.
+We'll start with two (trivial) @command{awk} scripts, namely
+@file{test1} and @file{test2}. Here is the @file{test1} script:
+
+@example
+BEGIN @{
+ print "This is script test1."
+@}
+@end example
+
+@noindent
+and here is @file{test2}:
+
+@example
+@@include "test1"
+BEGIN @{
+ print "This is script test2."
+@}
+@end example
+
+Running @command{gawk} with @file{test2}
+produces the following result:
+
+@example
+$ @kbd{gawk -f test2}
+@print{} This is script test1.
+@print{} This is script test2.
+@end example
+
+@code{gawk} runs the @file{test2} script, which includes @file{test1}
+using the @code{@@include}
+keyword. So, to include external @command{awk} source files, you just
+use @code{@@include} followed by the name of the file to be included,
+enclosed in double quotes.
+
+@quotation NOTE
+Keep in mind that this is a language construct and the @value{FN} cannot
+be a string variable, but rather just a literal string constant in double quotes.
+@end quotation
+
+The files to be included may be nested; e.g., given a third
+script, namely @file{test3}:
+
+@example
+@@include "test2"
+BEGIN @{
+ print "This is script test3."
+@}
+@end example
+
+@noindent
+Running @command{gawk} with the @file{test3} script produces the
+following results:
+
+@example
+$ @kbd{gawk -f test3}
+@print{} This is script test1.
+@print{} This is script test2.
+@print{} This is script test3.
+@end example
+
+The @value{FN} can, of course, be a pathname. For example:
+
+@example
+@@include "../io_funcs"
+@end example
+
+@noindent
+and:
+
+@example
+@@include "/usr/awklib/network"
+@end example
+
+@noindent
+are both valid. The @env{AWKPATH} environment variable can be of great
+value when using @code{@@include}. The same rules for the use
+of the @env{AWKPATH} variable in command-line file searches
+(@pxref{AWKPATH Variable}) apply to
+@code{@@include} also.
+
+This is very helpful in constructing @command{gawk} function libraries.
+If you have a large script with useful, general-purpose @command{awk}
+functions, you can break it down into library files and put those files
+in a special directory. You can then include those ``libraries,''
+either by using the full pathnames of the files, or by setting the @env{AWKPATH}
+environment variable accordingly and then using @code{@@include} with
+just the file part of the full pathname. Of course,
+you can keep library files in more than one directory;
+the more complex the working
+environment is, the more directories you may need to organize the files
+to be included.
+
+Given the ability to specify multiple @option{-f} options, the
+@code{@@include} mechanism is not strictly necessary.
+However, the @code{@@include} keyword
+can help you in constructing self-contained @command{gawk} programs,
+thus reducing the need for writing complex and tedious command lines.
+In particular, @code{@@include} is very useful for writing CGI scripts
+to be run from web pages.
+
+As mentioned in @ref{AWKPATH Variable}, the current directory is always
+searched first for source files, before searching in @env{AWKPATH};
+this also applies to files named with @code{@@include}.
+
+@node Loading Shared Libraries
+@section Loading Dynamic Extensions into Your Program
+
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+@cindex @code{@@load} directive
+@cindex loading extensions, @code{@@load} directive
+@cindex extensions, loading, @code{@@load} directive
+The @code{@@load} keyword can be used to read external @command{awk} extensions
+(stored as system shared libraries).
+This allows you to link in compiled code that may offer superior
+performance and/or give you access to extended capabilities not supported
+by the @command{awk} language. The @env{AWKLIBPATH} variable is used to
+search for the extension. Using @code{@@load} is completely equivalent
+to using the @option{-l} command-line option.
+
+If the extension is not initially found in @env{AWKLIBPATH}, another
+search is conducted after appending the platform's default shared library
+suffix to the @value{FN}. For example, on GNU/Linux systems, the suffix
+@samp{.so} is used:
+
+@example
+$ @kbd{gawk '@@load "ordchr"; BEGIN @{print chr(65)@}'}
+@print{} A
+@end example
+
+@noindent
+This is equivalent to the following example:
+
+@example
+$ @kbd{gawk -lordchr 'BEGIN @{print chr(65)@}'}
+@print{} A
+@end example
+
+@noindent
+For command-line usage, the @option{-l} option is more convenient,
+but @code{@@load} is useful for embedding inside an @command{awk} source file
+that requires access to an extension.
+
+@ref{Dynamic Extensions}, describes how to write extensions (in C or C++)
+that can be loaded with either @code{@@load} or the @option{-l} option.
+It also describes the @code{ordchr} extension.
+
+@node Obsolete
+@section Obsolete Options and/or Features
+
+@c update this section for each release!
+
+@cindex options, deprecated
+@cindex features, deprecated
+@cindex obsolete features
+This @value{SECTION} describes features and/or command-line options from
+previous releases of @command{gawk} that either are not available in the
+current version or are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+The process-related special files @file{/dev/pid}, @file{/dev/ppid},
+@file{/dev/pgrpid}, and @file{/dev/user} were deprecated in @command{gawk}
+3.1, but still worked. As of @value{PVERSION} 4.0, they are no longer
+interpreted specially by @command{gawk}. (Use @code{PROCINFO} instead;
+see @ref{Auto-set}.)
+
+@ignore
+This @value{SECTION}
+is thus essentially a place holder,
+in case some option becomes obsolete in a future version of @command{gawk}.
+@end ignore
+
+@node Undocumented
+@section Undocumented Options and Features
+@cindex undocumented features
+@cindex features, undocumented
+@cindex Skywalker, Luke
+@cindex Kenobi, Obi-Wan
+@cindex Jedi knights
+@cindex Knights, jedi
+@quotation
+@i{Use the Source, Luke!}
+@author Obi-Wan
+@end quotation
+
+@cindex shells, sea
+This @value{SECTION} intentionally left
+blank.
+
+@ignore
+@c If these came out in the Info file or TeX document, then they wouldn't
+@c be undocumented, would they?
+
+@command{gawk} has one undocumented option:
+
+@table @code
+@item -W nostalgia
+@itemx --nostalgia
+Print the message @samp{awk: bailing out near line 1} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @command{awk} and by a t--shirt.
+The message is @emph{not} subject to translation in non-English locales.
+@c so there! nyah, nyah.
+@end table
+
+Early versions of @command{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @command{awk} programs. Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@command{gawk} actually supports this but it is purposely undocumented
+because it is bad style. The correct way to write such a program
+is either:
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or:
+
+@example
+awk '@{ sum += $1 @}
+ END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines}, for a fuller explanation.
+
+You can insert newlines after the @samp{;} in @code{for} loops.
+This seems to have been a long-undocumented feature in Unix @command{awk}.
+
+Similarly, you may use @code{print} or @code{printf} statements in the
+@var{init} and @var{increment} parts of a @code{for} loop. This is another
+long-undocumented ``feature'' of Unix @code{awk}.
+
+@end ignore
+
+@node Invoking Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Use either
+@samp{awk '@var{program}' @var{files}}
+or
+@samp{awk -f @var{program-file} @var{files}}
+to run @command{awk}.
+
+@item
+The three standard options for all versions of @command{awk} are
+@option{-f}, @option{-F}, and @option{-v}. @command{gawk} supplies these
+and many others, as well as corresponding GNU-style long options.
+
+@item
+Nonoption command-line arguments are usually treated as @value{FN}s,
+unless they have the form @samp{@var{var}=@var{value}}, in which case
+they are taken as variable assignments to be performed at that point
+in processing the input.
+
+@item
+All nonoption command-line arguments, excluding the program text,
+are placed in the @code{ARGV} array. Adjusting @code{ARGC} and @code{ARGV}
+affects how @command{awk} processes input.
+
+@item
+You can use a single minus sign (@samp{-}) to refer to standard input
+on the command line. @command{gawk} also lets you use the special
+@value{FN} @file{/dev/stdin}.
+
+@item
+@command{gawk} pays attention to a number of environment variables.
+@env{AWKPATH}, @env{AWKLIBPATH}, and @env{POSIXLY_CORRECT} are the
+most important ones.
+
+@item
+@command{gawk}'s exit status conveys information to the program
+that invoked it. Use the @code{exit} statement from within
+an @command{awk} program to set the exit status.
+
+@item
+@command{gawk} allows you to include other @command{awk} source files into
+your program using the @code{@@include} statement and/or the @option{-i}
+and @option{-f} command-line options.
+
+@item
+@command{gawk} allows you to load additional functions written in C
+or C++ using the @code{@@load} statement and/or the @option{-l} option.
+(This advanced feature is described later, in @ref{Dynamic Extensions}.)
+@end itemize
+
+@node Regexp
+@chapter Regular Expressions
+@cindex regexp
+@cindex regular expressions
+
+A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
+set of strings.
+Because regular expressions are such a fundamental part of @command{awk}
+programming, their format and use deserve a separate @value{CHAPTER}.
+
+@cindex forward slash (@code{/}) to enclose regular expressions
+@cindex @code{/} (forward slash) to enclose regular expressions
+A regular expression enclosed in slashes (@samp{/})
+is an @command{awk} pattern that matches every input record whose text
+belongs to that set.
+The simplest regular expression is a sequence of letters, numbers, or
+both. Such a regexp matches any string that contains that sequence.
+Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
+Thus, the pattern @code{/foo/} matches any input record containing
+the three adjacent characters @samp{foo} @emph{anywhere} in the record. Other
+kinds of regexps let you specify more complicated classes of strings.
+
+@ifnotinfo
+Initially, the examples in this @value{CHAPTER} are simple.
+As we explain more about how
+regular expressions work, we present more complicated instances.
+@end ifnotinfo
+
+@menu
+* Regexp Usage:: How to Use Regular Expressions.
+* Escape Sequences:: How to write nonprinting characters.
+* Regexp Operators:: Regular Expression Operators.
+* Bracket Expressions:: What can go between @samp{[...]}.
+* Leftmost Longest:: How much text matches.
+* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
+@end menu
+
+@node Regexp Usage
+@section How to Use Regular Expressions
+
+@cindex regular expressions, as patterns
+A regular expression can be used as a pattern by enclosing it in
+slashes. Then the regular expression is tested against the
+entire text of each record. (Normally, it only needs
+to match some part of the text in order to succeed.) For example, the
+following prints the second field of each record where the string
+@samp{li} appears anywhere in the record:
+
+@example
+$ @kbd{awk '/li/ @{ print $2 @}' mail-list}
+@print{} 555-5553
+@print{} 555-0542
+@print{} 555-6699
+@print{} 555-3430
+@end example
+
+@cindex regular expressions, operators
+@cindex operators, string-matching
+@c @cindex operators, @code{~}
+@cindex string-matching operators
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{!~}
+@cindex @code{if} statement, use of regexps in
+@cindex @code{while} statement, use of regexps in
+@cindex @code{do}-@code{while} statement, use of regexps in
+@c @cindex statements, @code{if}
+@c @cindex statements, @code{while}
+@c @cindex statements, @code{do}
+Regular expressions can also be used in matching expressions. These
+expressions allow you to specify the string to match against; it need
+not be the entire current input record. The two operators @samp{~}
+and @samp{!~} perform regular expression comparisons. Expressions
+using these operators can be used as patterns, or in @code{if},
+@code{while}, @code{for}, and @code{do} statements.
+(@xref{Statements}.)
+For example, the following is true if the expression @var{exp} (taken
+as a string) matches @var{regexp}:
+
+@example
+@var{exp} ~ /@var{regexp}/
+@end example
+
+@noindent
+This example matches, or selects, all input records with the uppercase
+letter @samp{J} somewhere in the first field:
+
+@example
+$ @kbd{awk '$1 ~ /J/' inventory-shipped}
+@print{} Jan 13 25 15 115
+@print{} Jun 31 42 75 492
+@print{} Jul 24 34 67 436
+@print{} Jan 21 36 64 620
+@end example
+
+So does this:
+
+@example
+awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
+@end example
+
+This next example is true if the expression @var{exp}
+(taken as a character string)
+does @emph{not} match @var{regexp}:
+
+@example
+@var{exp} !~ /@var{regexp}/
+@end example
+
+The following example matches,
+or selects, all input records whose first field @emph{does not} contain
+the uppercase letter @samp{J}:
+
+@example
+$ @kbd{awk '$1 !~ /J/' inventory-shipped}
+@print{} Feb 15 32 24 226
+@print{} Mar 15 24 34 228
+@print{} Apr 31 52 63 420
+@print{} May 16 34 29 208
+@dots{}
+@end example
+
+@cindex regexp constants
+@cindex constant regexps
+@cindex regular expressions, constants, See regexp constants
+When a regexp is enclosed in slashes, such as @code{/foo/}, we call it
+a @dfn{regexp constant}, much like @code{5.27} is a numeric constant and
+@code{"foo"} is a string constant.
+
+@node Escape Sequences
+@section Escape Sequences
+
+@cindex escape sequences, in strings
+@cindex backslash (@code{\}), in escape sequences
+@cindex @code{\} (backslash), in escape sequences
+Some characters cannot be included literally in string constants
+(@code{"foo"}) or regexp constants (@code{/foo/}).
+Instead, they should be represented with @dfn{escape sequences},
+which are character sequences beginning with a backslash (@samp{\}).
+One use of an escape sequence is to include a double-quote character in
+a string constant. Because a plain double quote ends the string, you
+must use @samp{\"} to represent an actual double-quote character as a
+part of the string. For example:
+
+@example
+$ @kbd{awk 'BEGIN @{ print "He said \"hi!\" to her." @}'}
+@print{} He said "hi!" to her.
+@end example
+
+The backslash character itself is another character that cannot be
+included normally; you must write @samp{\\} to put one backslash in the
+string or regexp. Thus, the string whose contents are the two characters
+@samp{"} and @samp{\} must be written @code{"\"\\"}.
+
+Other escape sequences represent unprintable characters
+such as TAB or newline. There is nothing to stop you from entering most
+unprintable characters directly in a string constant or regexp constant,
+but they may look ugly.
+
+The following list presents
+all the escape sequences used in @command{awk} and
+what they represent. Unless noted otherwise, all these escape
+sequences apply to both string constants and regexp constants:
+
+@table @code
+@item \\
+A literal backslash, @samp{\}.
+
+@c @cindex @command{awk} language, V.4 version
+@cindex @code{\} (backslash), @code{\a} escape sequence
+@cindex backslash (@code{\}), @code{\a} escape sequence
+@item \a
+The ``alert'' character, @kbd{Ctrl-g}, ASCII code 7 (BEL).
+(This often makes some sort of audible noise.)
+
+@cindex @code{\} (backslash), @code{\b} escape sequence
+@cindex backslash (@code{\}), @code{\b} escape sequence
+@item \b
+Backspace, @kbd{Ctrl-h}, ASCII code 8 (BS).
+
+@cindex @code{\} (backslash), @code{\f} escape sequence
+@cindex backslash (@code{\}), @code{\f} escape sequence
+@item \f
+Formfeed, @kbd{Ctrl-l}, ASCII code 12 (FF).
+
+@cindex @code{\} (backslash), @code{\n} escape sequence
+@cindex backslash (@code{\}), @code{\n} escape sequence
+@item \n
+Newline, @kbd{Ctrl-j}, ASCII code 10 (LF).
+
+@cindex @code{\} (backslash), @code{\r} escape sequence
+@cindex backslash (@code{\}), @code{\r} escape sequence
+@item \r
+Carriage return, @kbd{Ctrl-m}, ASCII code 13 (CR).
+
+@cindex @code{\} (backslash), @code{\t} escape sequence
+@cindex backslash (@code{\}), @code{\t} escape sequence
+@item \t
+Horizontal TAB, @kbd{Ctrl-i}, ASCII code 9 (HT).
+
+@c @cindex @command{awk} language, V.4 version
+@cindex @code{\} (backslash), @code{\v} escape sequence
+@cindex backslash (@code{\}), @code{\v} escape sequence
+@item \v
+Vertical TAB, @kbd{Ctrl-k}, ASCII code 11 (VT).
+
+@cindex @code{\} (backslash), @code{\}@var{nnn} escape sequence
+@cindex backslash (@code{\}), @code{\}@var{nnn} escape sequence
+@item \@var{nnn}
+The octal value @var{nnn}, where @var{nnn} stands for 1 to 3 digits
+between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
+(escape) character is @samp{\033}.
+
+@c @cindex @command{awk} language, V.4 version
+@c @cindex @command{awk} language, POSIX version
+@cindex @code{\} (backslash), @code{\x} escape sequence
+@cindex backslash (@code{\}), @code{\x} escape sequence
+@cindex common extensions, @code{\x} escape sequence
+@cindex extensions, common@comma{} @code{\x} escape sequence
+@item \x@var{hh}@dots{}
+The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
+of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+(The @samp{\x} escape sequence is not allowed in POSIX awk.)
+
+@quotation CAUTION
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+@c FIXME: Add exact version here.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
+However, using more than two hexadecimal digits produced
+undefined results.
+As of @value{PVERSION} @strong{FIXME:} 4.3.0, only two digits
+are processed.
+@end quotation
+
+@cindex @code{\} (backslash), @code{\/} escape sequence
+@cindex backslash (@code{\}), @code{\/} escape sequence
+@item \/
+A literal slash (necessary for regexp constants only).
+This sequence is used when you want to write a regexp
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
+in order to tell @command{awk} to keep processing the rest of the regexp.
+
+@cindex @code{\} (backslash), @code{\"} escape sequence
+@cindex backslash (@code{\}), @code{\"} escape sequence
+@item \"
+A literal double quote (necessary for string constants only).
+This sequence is used when you want to write a string
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
+in order to tell @command{awk} to keep processing the rest of the string.
+@end table
+
+In @command{gawk}, a number of additional two-character sequences that begin
+with a backslash have special meaning in regexps.
+@xref{GNU Regexp Operators}.
+
+In a regexp, a backslash before any character that is not in the previous list
+and not listed in
+@DBREF{GNU Regexp Operators}
+means that the next character should be taken literally, even if it would
+normally be a regexp operator. For example, @code{/a\+b/} matches the three
+characters @samp{a+b}.
+
+@cindex backslash (@code{\}), in escape sequences
+@cindex @code{\} (backslash), in escape sequences
+@cindex portability
+For complete portability, do not use a backslash before any character not
+shown in the previous list or that is not an operator.
+
+@c 11/2014: Moved so as to not stack sidebars
+@sidebar Backslash Before Regular Characters
+@cindex portability, backslash in escape sequences
+@cindex POSIX @command{awk}, backslashes in string constants
+@cindex backslash (@code{\}), in escape sequences, POSIX and
+@cindex @code{\} (backslash), in escape sequences, POSIX and
+
+@cindex troubleshooting, backslash before nonspecial character
+If you place a backslash in a string constant before something that is
+not one of the characters previously listed, POSIX @command{awk} purposely
+leaves what happens as undefined. There are two choices:
+
+@c @cindex automatic warnings
+@c @cindex warnings, automatic
+@cindex Brian Kernighan's @command{awk}
+@table @asis
+@item Strip the backslash out
+This is what BWK @command{awk} and @command{gawk} both do.
+For example, @code{"a\qc"} is the same as @code{"aqc"}.
+(Because this is such an easy bug both to introduce and to miss,
+@command{gawk} warns you about it.)
+Consider @samp{FS = @w{"[ \t]+\|[ \t]+"}} to use vertical bars
+surrounded by whitespace as the field separator. There should be
+two backslashes in the string: @samp{FS = @w{"[ \t]+\\|[ \t]+"}}.)
+@c I did this! This is why I added the warning.
+
+@cindex @command{gawk}, escape sequences
+@cindex Unix @command{awk}, backslashes in escape sequences
+@cindex @command{mawk} utility
+@item Leave the backslash alone
+Some other @command{awk} implementations do this.
+In such implementations, typing @code{"a\qc"} is the same as typing
+@code{"a\\qc"}.
+@end table
+@end sidebar
+
+To summarize:
+
+@itemize @value{BULLET}
+@item
+The escape sequences in the preceding list are always processed first,
+for both string constants and regexp constants. This happens very early,
+as soon as @command{awk} reads your program.
+
+@item
+@command{gawk} processes both regexp constants and dynamic regexps
+(@pxref{Computed Regexps}),
+for the special operators listed in
+@ref{GNU Regexp Operators}.
+
+@item
+A backslash before any other character means to treat that character
+literally.
+@end itemize
+
+@sidebar Escape Sequences for Metacharacters
+@cindex metacharacters, escape sequences for
+
+Suppose you use an octal or hexadecimal
+escape to represent a regexp metacharacter.
+(See @ref{Regexp Operators}.)
+Does @command{awk} treat the character as a literal character or as a regexp
+operator?
+
+@cindex dark corner, escape sequences, for metacharacters
+Historically, such characters were taken literally.
+@value{DARKCORNER}
+However, the POSIX standard indicates that they should be treated
+as real metacharacters, which is what @command{gawk} does.
+In compatibility mode (@pxref{Options}),
+@command{gawk} treats the characters represented by octal and hexadecimal
+escape sequences literally when used in regexp constants. Thus,
+@code{/a\52b/} is equivalent to @code{/a\*b/}.
+@end sidebar
+
+@node Regexp Operators
+@section Regular Expression Operators
+@cindex regular expressions, operators
+@cindex metacharacters in regular expressions
+
+You can combine regular expressions with special characters,
+called @dfn{regular expression operators} or @dfn{metacharacters}, to
+increase the power and versatility of regular expressions.
+
+The escape sequences described
+@ifnotinfo
+earlier
+@end ifnotinfo
+in @DBREF{Escape Sequences}
+are valid inside a regexp. They are introduced by a @samp{\} and
+are recognized and converted into corresponding real characters as
+the very first step in processing regexps.
+
+Here is a list of metacharacters. All characters that are not escape
+sequences and that are not listed here stand for themselves:
+
+@c Use @asis so the docbook comes out ok. Sigh.
+@table @asis
+@cindex backslash (@code{\}), regexp operator
+@cindex @code{\} (backslash), regexp operator
+@item @code{\}
+This suppresses the special meaning of a character when
+matching. For example, @samp{\$}
+matches the character @samp{$}.
+
+@cindex regular expressions, anchors in
+@cindex Texinfo, chapter beginnings in files
+@cindex @code{^} (caret), regexp operator
+@cindex caret (@code{^}), regexp operator
+@item @code{^}
+This matches the beginning of a string. @samp{^@@chapter}
+matches @samp{@@chapter} at the beginning of a string,
+for example, and can be used
+to identify chapter beginnings in Texinfo source files.
+The @samp{^} is known as an @dfn{anchor}, because it anchors the pattern to
+match only at the beginning of the string.
+
+It is important to realize that @samp{^} does not match the beginning of
+a line (the point right after a @samp{\n} newline character) embedded in a string.
+The condition is not true in the following example:
+
+@example
+if ("line1\nLINE 2" ~ /^L/) @dots{}
+@end example
+
+@cindex @code{$} (dollar sign), regexp operator
+@cindex dollar sign (@code{$}), regexp operator
+@item @code{$}
+This is similar to @samp{^}, but it matches only at the end of a string.
+For example, @samp{p$}
+matches a record that ends with a @samp{p}. The @samp{$} is an anchor
+and does not match the end of a line
+(the point right before a @samp{\n} newline character)
+embedded in a string.
+The condition in the following example is not true:
+
+@example
+if ("line1\nLINE 2" ~ /1$/) @dots{}
+@end example
+
+@cindex @code{.} (period), regexp operator
+@cindex period (@code{.}), regexp operator
+@item @code{.} (period)
+This matches any single character,
+@emph{including} the newline character. For example, @samp{.P}
+matches any single character followed by a @samp{P} in a string. Using
+concatenation, we can make a regular expression such as @samp{U.A}, which
+matches any three-character sequence that begins with @samp{U} and ends
+with @samp{A}.
+
+@cindex POSIX @command{awk}, period (@code{.})@comma{} using
+In strict POSIX mode (@pxref{Options}),
+@samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+Otherwise, @sc{nul} is just another character. Other versions of @command{awk}
+may not be able to match the @sc{nul} character.
+
+@cindex @code{[]} (square brackets), regexp operator
+@cindex square brackets (@code{[]}), regexp operator
+@cindex bracket expressions
+@cindex character sets, See Also bracket expressions
+@cindex character lists, See bracket expressions
+@cindex character classes, See bracket expressions
+@item @code{[}@dots{}@code{]}
+This is called a @dfn{bracket expression}.@footnote{In other literature,
+you may see a bracket expression referred to as either a
+@dfn{character set}, a @dfn{character class}, or a @dfn{character list}.}
+It matches any @emph{one} of the characters that are enclosed in
+the square brackets. For example, @samp{[MVX]} matches any one of
+the characters @samp{M}, @samp{V}, or @samp{X} in a string. A full
+discussion of what can be inside the square brackets of a bracket expression
+is given in
+@ref{Bracket Expressions}.
+
+@cindex bracket expressions, complemented
+@item @code{[^}@dots{}@code{]}
+This is a @dfn{complemented bracket expression}. The first character after
+the @samp{[} @emph{must} be a @samp{^}. It matches any characters
+@emph{except} those in the square brackets. For example, @samp{[^awk]}
+matches any character that is not an @samp{a}, @samp{w},
+or @samp{k}.
+
+@cindex @code{|} (vertical bar)
+@cindex vertical bar (@code{|})
+@item @code{|}
+This is the @dfn{alternation operator} and it is used to specify
+alternatives. The @samp{|} has the lowest precedence of all the regular
+expression operators. For example, @samp{^P|[aeiouy]} matches any string
+that matches either @samp{^P} or @samp{[aeiouy]}. This means it matches
+any string that starts with @samp{P} or contains (anywhere within it)
+a lowercase English vowel.
+
+The alternation applies to the largest possible regexps on either side.
+
+@cindex @code{()} (parentheses), regexp operator
+@cindex parentheses @code{()}, regexp operator
+@item @code{(}@dots{}@code{)}
+Parentheses are used for grouping in regular expressions, as in
+arithmetic. They can be used to concatenate regular expressions
+containing the alternation operator, @samp{|}. For example,
+@samp{@@(samp|code)\@{[^@}]+\@}} matches both @samp{@@code@{foo@}} and
+@samp{@@samp@{bar@}}.
+(These are Texinfo formatting control sequences. The @samp{+} is
+explained further on in this list.)
+
+@cindex @code{*} (asterisk), @code{*} operator, as regexp operator
+@cindex asterisk (@code{*}), @code{*} operator, as regexp operator
+@item @code{*}
+This symbol means that the preceding regular expression should be
+repeated as many times as necessary to find a match. For example, @samp{ph*}
+applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
+of one @samp{p} followed by any number of @samp{h}s. This also matches
+just @samp{p} if no @samp{h}s are present.
+
+There are two subtle points to understand about how @samp{*} works.
+First, the @samp{*} applies only to the single preceding regular expression
+component (e.g., in @samp{ph*}, it applies just to the @samp{h}).
+To cause @samp{*} to apply to a larger subexpression, use parentheses:
+@samp{(ph)*} matches @samp{ph}, @samp{phph}, @samp{phphph}, and so on.
+
+Second, @samp{*} finds as many repetitions as possible. If the text
+to be matched is @samp{phhhhhhhhhhhhhhooey}, @samp{ph*} matches all of
+the @samp{h}s.
+
+@cindex @code{+} (plus sign), regexp operator
+@cindex plus sign (@code{+}), regexp operator
+@item @code{+}
+This symbol is similar to @samp{*}, except that the preceding expression must be
+matched at least once. This means that @samp{wh+y}
+would match @samp{why} and @samp{whhy}, but not @samp{wy}, whereas
+@samp{wh*y} would match all three.
+
+@cindex @code{?} (question mark), regexp operator
+@cindex question mark (@code{?}), regexp operator
+@item @code{?}
+This symbol is similar to @samp{*}, except that the preceding expression can be
+matched either once or not at all. For example, @samp{fe?d}
+matches @samp{fed} and @samp{fd}, but nothing else.
+
+@cindex interval expressions, regexp operator
+@item @code{@{}@var{n}@code{@}}
+@itemx @code{@{}@var{n}@code{,@}}
+@itemx @code{@{}@var{n}@code{,}@var{m}@code{@}}
+One or two numbers inside braces denote an @dfn{interval expression}.
+If there is one number in the braces, the preceding regexp is repeated
+@var{n} times.
+If there are two numbers separated by a comma, the preceding regexp is
+repeated @var{n} to @var{m} times.
+If there is one number followed by a comma, then the preceding regexp
+is repeated at least @var{n} times:
+
+@table @code
+@item wh@{3@}y
+Matches @samp{whhhy}, but not @samp{why} or @samp{whhhhy}.
+
+@item wh@{3,5@}y
+Matches @samp{whhhy}, @samp{whhhhy}, or @samp{whhhhhy} only.
+
+@item wh@{2,@}y
+Matches @samp{whhy}, @samp{whhhy}, and so on.
+@end table
+
+@cindex POSIX @command{awk}, interval expressions in
+Interval expressions were not traditionally available in @command{awk}.
+They were added as part of the POSIX standard to make @command{awk}
+and @command{egrep} consistent with each other.
+
+@cindex @command{gawk}, interval expressions and
+Initially, because old programs may use @samp{@{} and @samp{@}} in regexp
+constants,
+@command{gawk} did @emph{not} match interval expressions
+in regexps.
+
+However, beginning with @value{PVERSION} 4.0,
+@command{gawk} does match interval expressions by default.
+This is because compatibility with POSIX has become more
+important to most @command{gawk} users than compatibility with
+old programs.
+
+For programs that use @samp{@{} and @samp{@}} in regexp constants,
+it is good practice to always escape them with a backslash. Then the
+regexp constants are valid and work the way you want them to, using
+any version of @command{awk}.@footnote{Use two backslashes if you're
+using a string constant with a regexp operator or function.}
+
+Finally, when @samp{@{} and @samp{@}} appear in regexp constants
+in a way that cannot be interpreted as an interval expression
+(such as @code{/q@{a@}/}), then they stand for themselves.
+@end table
+
+@cindex precedence, regexp operators
+@cindex regular expressions, operators, precedence of
+In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators,
+as well as the braces @samp{@{} and @samp{@}},
+have
+the highest precedence, followed by concatenation, and finally by @samp{|}.
+As in arithmetic, parentheses can change how operators are grouped.
+
+@cindex POSIX @command{awk}, regular expressions and
+@cindex @command{gawk}, regular expressions, precedence
+In POSIX @command{awk} and @command{gawk}, the @samp{*}, @samp{+}, and
+@samp{?} operators stand for themselves when there is nothing in the
+regexp that precedes them. For example, @code{/+/} matches a literal
+plus sign. However, many other versions of @command{awk} treat such a
+usage as a syntax error.
+
+If @command{gawk} is in compatibility mode (@pxref{Options}), interval
+expressions are not available in regular expressions.
+
+@node Bracket Expressions
+@section Using Bracket Expressions
+@cindex bracket expressions
+@cindex bracket expressions, range expressions
+@cindex range expressions (regexps)
+@cindex character lists in regular expression
+
+As mentioned earlier, a bracket expression matches any character among
+those listed between the opening and closing square brackets.
+
+Within a bracket expression, a @dfn{range expression} consists of two
+characters separated by a hyphen. It matches any single character that
+sorts between the two characters, based upon the system's native character
+set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}.
+(See @DBREF{Ranges and Locales} for an explanation of how the POSIX
+standard and @command{gawk} have changed over time. This is mainly
+of historical interest.)
+
+@cindex @code{\} (backslash), in bracket expressions
+@cindex backslash (@code{\}), in bracket expressions
+@cindex @code{^} (caret), in bracket expressions
+@cindex caret (@code{^}), in bracket expressions
+@cindex @code{-} (hyphen), in bracket expressions
+@cindex hyphen (@code{-}), in bracket expressions
+To include one of the characters @samp{\}, @samp{]}, @samp{-}, or @samp{^} in a
+bracket expression, put a @samp{\} in front of it. For example:
+
+@example
+[d\]]
+@end example
+
+@noindent
+matches either @samp{d} or @samp{]}.
+Additionally, if you place @samp{]} right after the opening
+@samp{[}, the closing bracket is treated as one of the
+characters to be matched.
+
+@cindex POSIX @command{awk}, bracket expressions and
+@cindex Extended Regular Expressions (EREs)
+@cindex EREs (Extended Regular Expressions)
+@cindex @command{egrep} utility
+The treatment of @samp{\} in bracket expressions
+is compatible with other @command{awk}
+implementations and is also mandated by POSIX.
+The regular expressions in @command{awk} are a superset
+of the POSIX specification for Extended Regular Expressions (EREs).
+POSIX EREs are based on the regular expressions accepted by the
+traditional @command{egrep} utility.
+
+@cindex bracket expressions, character classes
+@cindex POSIX @command{awk}, bracket expressions and, character classes
+@dfn{Character classes} are a feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but the
+actual characters can vary from country to country and/or
+from character set to character set. For example, the notion of what
+is an alphabetic character differs between the United States and France.
+
+A character class is only valid in a regexp @emph{inside} the
+brackets of a bracket expression. Character classes consist of @samp{[:},
+a keyword denoting the class, and @samp{:]}.
+@ref{table-char-classes} lists the character classes defined by the
+POSIX standard.
+
+@float Table,table-char-classes
+@caption{POSIX character classes}
+@multitable @columnfractions .15 .85
+@headitem Class @tab Meaning
+@item @code{[:alnum:]} @tab Alphanumeric characters
+@item @code{[:alpha:]} @tab Alphabetic characters
+@item @code{[:blank:]} @tab Space and TAB characters
+@item @code{[:cntrl:]} @tab Control characters
+@item @code{[:digit:]} @tab Numeric characters
+@item @code{[:graph:]} @tab Characters that are both printable and visible
+(a space is printable but not visible, whereas an @samp{a} is both)
+@item @code{[:lower:]} @tab Lowercase alphabetic characters
+@item @code{[:print:]} @tab Printable characters (characters that are not control characters)
+@item @code{[:punct:]} @tab Punctuation characters (characters that are not letters, digits,
+control characters, or space characters)
+@item @code{[:space:]} @tab Space characters (such as space, TAB, and formfeed, to name a few)
+@item @code{[:upper:]} @tab Uppercase alphabetic characters
+@item @code{[:xdigit:]} @tab Characters that are hexadecimal digits
+@end multitable
+@end float
+
+For example, before the POSIX standard, you had to write @code{/[A-Za-z0-9]/}
+to match alphanumeric characters. If your
+character set had other alphabetic characters in it, this would not
+match them.
+With the POSIX character classes, you can write
+@code{/[[:alnum:]]/} to match the alphabetic
+and numeric characters in your character set.
+
+@c Thanks to
+@c Date: Tue, 01 Jul 2014 07:39:51 +0200
+@c From: Hermann Peifer <peifer@gmx.eu>
+Some utilities that match regular expressions provide a nonstandard
+@code{[:ascii:]} character class; @command{awk} does not. However, you
+can simulate such a construct using @code{[\x00-\x7F]}. This matches
+all values numerically between zero and 127, which is the defined
+range of the ASCII character set. Use a complemented character list
+(@code{[^\x00-\x7F]}) to match any single-byte characters that are not
+in the ASCII range.
+
+@cindex bracket expressions, collating elements
+@cindex bracket expressions, non-ASCII
+@cindex collating elements
+Two additional special sequences can appear in bracket expressions.
+These apply to non-ASCII character sets, which can have single symbols
+(called @dfn{collating elements}) that are represented with more than one
+character. They can also have several characters that are equivalent for
+@dfn{collating}, or sorting, purposes. (For example, in French, a plain ``e''
+and a grave-accented ``@`e'' are equivalent.)
+These sequences are:
+
+@table @asis
+@cindex bracket expressions, collating symbols
+@cindex collating symbols
+@item Collating symbols
+Multicharacter collating elements enclosed between
+@samp{[.} and @samp{.]}. For example, if @samp{ch} is a collating element,
+then @samp{[[.ch.]]} is a regexp that matches this collating element, whereas
+@samp{[ch]} is a regexp that matches either @samp{c} or @samp{h}.
+
+@cindex bracket expressions, equivalence classes
+@item Equivalence classes
+Locale-specific names for a list of
+characters that are equal. The name is enclosed between
+@samp{[=} and @samp{=]}.
+For example, the name @samp{e} might be used to represent all of
+``e,'' ``@`e,'' and ``@'e.'' In this case, @samp{[[=e=]]} is a regexp
+that matches any of @samp{e}, @samp{@'e}, or @samp{@`e}.
+@end table
+
+These features are very valuable in non-English-speaking locales.
+
+@cindex internationalization, localization, character classes
+@cindex @command{gawk}, character classes and
+@cindex POSIX @command{awk}, bracket expressions and, character classes
+@quotation CAUTION
+The library functions that @command{gawk} uses for regular
+expression matching currently recognize only POSIX character classes;
+they do not recognize collating symbols or equivalence classes.
+@end quotation
+@c maybe one day ...
+
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function to make a change to the input
+record. (@code{sub()} replaces the first instance of any text matched
+by the first argument with the string provided as the second argument;
+@pxref{String Functions}). Here, the regexp @code{/a+/} indicates ``one
+or more @samp{a} characters,'' and the replacement text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@cindex regular expressions, computed
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some older versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} $0 ~ "[ >>> \t\n]" <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end sidebar
+
+@node GNU Regexp Operators
+@section @command{gawk}-Specific Regexp Operators
+
+@c This section adapted (long ago) from the regex-0.12 manual
+
+@cindex regular expressions, operators, @command{gawk}
+@cindex @command{gawk}, regular expressions, operators
+@cindex operators, GNU-specific
+@cindex regular expressions, operators, for words
+@cindex word, regexp definition of
+GNU software that deals with regular expressions provides a number of
+additional regexp operators. These operators are described in this
+@value{SECTION} and are specific to @command{gawk};
+they are not available in other @command{awk} implementations.
+Most of the additional operators deal with word matching.
+For our purposes, a @dfn{word} is a sequence of one or more letters, digits,
+or underscores (@samp{_}):
+
+@table @code
+@c @cindex operators, @code{\s} (@command{gawk})
+@cindex backslash (@code{\}), @code{\s} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\s} operator (@command{gawk})
+@item \s
+Matches any whitespace character.
+Think of it as shorthand for
+@w{@samp{[[:space:]]}}.
+
+@c @cindex operators, @code{\S} (@command{gawk})
+@cindex backslash (@code{\}), @code{\S} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\S} operator (@command{gawk})
+@item \S
+Matches any character that is not whitespace.
+Think of it as shorthand for
+@w{@samp{[^[:space:]]}}.
+
+@c @cindex operators, @code{\w} (@command{gawk})
+@cindex backslash (@code{\}), @code{\w} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\w} operator (@command{gawk})
+@item \w
+Matches any word-constituent character---that is, it matches any
+letter, digit, or underscore. Think of it as shorthand for
+@w{@samp{[[:alnum:]_]}}.
+
+@c @cindex operators, @code{\W} (@command{gawk})
+@cindex backslash (@code{\}), @code{\W} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\W} operator (@command{gawk})
+@item \W
+Matches any character that is not word-constituent.
+Think of it as shorthand for
+@w{@samp{[^[:alnum:]_]}}.
+
+@c @cindex operators, @code{\<} (@command{gawk})
+@cindex backslash (@code{\}), @code{\<} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\<} operator (@command{gawk})
+@item \<
+Matches the empty string at the beginning of a word.
+For example, @code{/\<away/} matches @samp{away} but not
+@samp{stowaway}.
+
+@c @cindex operators, @code{\>} (@command{gawk})
+@cindex backslash (@code{\}), @code{\>} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\>} operator (@command{gawk})
+@item \>
+Matches the empty string at the end of a word.
+For example, @code{/stow\>/} matches @samp{stow} but not @samp{stowaway}.
+
+@c @cindex operators, @code{\y} (@command{gawk})
+@cindex backslash (@code{\}), @code{\y} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\y} operator (@command{gawk})
+@cindex word boundaries@comma{} matching
+@item \y
+Matches the empty string at either the beginning or the
+end of a word (i.e., the word boundar@strong{y}). For example, @samp{\yballs?\y}
+matches either @samp{ball} or @samp{balls}, as a separate word.
+
+@c @cindex operators, @code{\B} (@command{gawk})
+@cindex backslash (@code{\}), @code{\B} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\B} operator (@command{gawk})
+@item \B
+Matches the empty string that occurs between two
+word-constituent characters. For example,
+@code{/\Brat\B/} matches @samp{crate} but it does not match @samp{dirty rat}.
+@samp{\B} is essentially the opposite of @samp{\y}.
+@end table
+
+@cindex buffers, operators for
+@cindex regular expressions, operators, for buffers
+@cindex operators, string-matching, for buffers
+There are two other operators that work on buffers. In Emacs, a
+@dfn{buffer} is, naturally, an Emacs buffer.
+Other GNU programs, including @command{gawk},
+consider the entire string to match as the buffer.
+The operators are:
+
+@table @code
+@item \`
+@c @cindex operators, @code{\`} (@command{gawk})
+@cindex backslash (@code{\}), @code{\`} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\`} operator (@command{gawk})
+Matches the empty string at the
+beginning of a buffer (string).
+
+@c @cindex operators, @code{\'} (@command{gawk})
+@cindex backslash (@code{\}), @code{\'} operator (@command{gawk})
+@cindex @code{\} (backslash), @code{\'} operator (@command{gawk})
+@item \'
+Matches the empty string at the
+end of a buffer (string).
+@end table
+
+@cindex @code{^} (caret), regexp operator
+@cindex caret (@code{^}), regexp operator
+@cindex @code{?} (question mark), regexp operator
+@cindex question mark (@code{?}), regexp operator
+Because @samp{^} and @samp{$} always work in terms of the beginning
+and end of strings, these operators don't add any new capabilities
+for @command{awk}. They are provided for compatibility with other
+GNU software.
+
+@cindex @command{gawk}, word-boundary operator
+@cindex word-boundary operator (@command{gawk})
+@cindex operators, word-boundary (@command{gawk})
+In other GNU software, the word-boundary operator is @samp{\b}. However,
+that conflicts with the @command{awk} language's definition of @samp{\b}
+as backspace, so @command{gawk} uses a different letter.
+An alternative method would have been to require two backslashes in the
+GNU operators, but this was deemed too confusing. The current
+method of using @samp{\y} for the GNU @samp{\b} appears to be the
+lesser of two evils.
+
+@cindex regular expressions, @command{gawk}, command-line options
+@cindex @command{gawk}, command-line options, and regular expressions
+The various command-line options
+(@pxref{Options})
+control how @command{gawk} interprets characters in regexps:
+
+@table @asis
+@item No options
+In the default case, @command{gawk} provides all the facilities of
+POSIX regexps and the
+@ifnotinfo
+previously described
+GNU regexp operators.
+@end ifnotinfo
+@ifnottex
+@ifnotdocbook
+GNU regexp operators described
+in @ref{Regexp Operators}.
+@end ifnotdocbook
+@end ifnottex
+
+@item @code{--posix}
+Match only POSIX regexps; the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}). Interval expressions
+are allowed.
+
+@cindex Brian Kernighan's @command{awk}
+@item @code{--traditional}
+Match traditional Unix @command{awk} regexps. The GNU operators
+are not special, and interval expressions are not available.
+Because BWK @command{awk} supports them,
+the POSIX character classes (@samp{[[:alnum:]]}, etc.) are available.
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, if @option{--traditional}
+has been provided.
+Otherwise, interval expressions are available by default.
+@end table
+
+@node Case-sensitivity
+@section Case Sensitivity in Matching
+
+@cindex regular expressions, case sensitivity
+@cindex case sensitivity, regexps and
+Case is normally significant in regular expressions, both when matching
+ordinary characters (i.e., not metacharacters) and inside bracket
+expressions. Thus, a @samp{w} in a regular expression matches only a lowercase
+@samp{w} and not an uppercase @samp{W}.
+
+The simplest way to do a case-independent match is to use a bracket
+expression---for example, @samp{[Ww]}. However, this can be cumbersome if
+you need to use it often, and it can make the regular expressions harder
+to read. There are two alternatives that you might prefer.
+
+One way to perform a case-insensitive match at a particular point in the
+program is to convert the data to a single case, using the
+@code{tolower()} or @code{toupper()} built-in string functions (which we
+haven't discussed yet;
+@pxref{String Functions}).
+For example:
+
+@example
+tolower($1) ~ /foo/ @{ @dots{} @}
+@end example
+
+@noindent
+converts the first field to lowercase before matching against it.
+This works in any POSIX-compliant @command{awk}.
+
+@cindex @command{gawk}, regular expressions, case sensitivity
+@cindex case sensitivity, @command{gawk}
+@cindex differences in @command{awk} and @command{gawk}, regular expressions
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@cindex @code{IGNORECASE} variable, with @code{~} and @code{!~} operators
+@cindex @command{gawk}, @code{IGNORECASE} variable in
+@c @cindex variables, @code{IGNORECASE}
+Another method, specific to @command{gawk}, is to set the variable
+@code{IGNORECASE} to a nonzero value (@pxref{Built-in Variables}).
+When @code{IGNORECASE} is not zero, @emph{all} regexp and string
+operations ignore case.
+
+Changing the value of @code{IGNORECASE} dynamically controls the
+case sensitivity of the program as it runs. Case is significant by
+default because @code{IGNORECASE} (like most variables) is initialized
+to zero:
+
+@example
+x = "aB"
+if (x ~ /ab/) @dots{} # this test will fail
+
+IGNORECASE = 1
+if (x ~ /ab/) @dots{} # now it will succeed
+@end example
+
+In general, you cannot use @code{IGNORECASE} to make certain rules
+case insensitive and other rules case sensitive, as there is no
+straightforward way
+to set @code{IGNORECASE} just for the pattern of
+a particular rule.@footnote{Experienced C and C++ programmers will note
+that it is possible, using something like
+@samp{IGNORECASE = 1 && /foObAr/ @{ @dots{} @}}
+and
+@samp{IGNORECASE = 0 || /foobar/ @{ @dots{} @}}.
+However, this is somewhat obscure and we don't recommend it.}
+To do this, use either bracket expressions or @code{tolower()}. However, one
+thing you can do with @code{IGNORECASE} only is dynamically turn
+case sensitivity on or off for all the rules at once.
+
+@code{IGNORECASE} can be set on the command line or in a @code{BEGIN} rule
+(@pxref{Other Arguments}; also
+@pxref{Using BEGIN/END}).
+Setting @code{IGNORECASE} from the command line is a way to make
+a program case insensitive without having to edit it.
+
+@c @cindex ISO 8859-1
+@c @cindex ISO Latin-1
+In multibyte locales,
+the equivalences between upper-
+and lowercase characters are tested based on the wide-character values of
+the locale's character set.
+Otherwise, the characters are tested based
+on the ISO-8859-1 (ISO Latin-1)
+character set. This character set is a superset of the traditional 128
+ASCII characters, which also provides a number of characters suitable
+for use with European languages.@footnote{If you don't understand this,
+don't worry about it; it just means that @command{gawk} does
+the right thing.}
+
+The value of @code{IGNORECASE} has no effect if @command{gawk} is in
+compatibility mode (@pxref{Options}).
+Case is always significant in compatibility mode.
+
+@node Regexp Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Regular expressions describe sets of strings to be matched.
+In @command{awk}, regular expression constants are written enclosed
+between slashes: @code{/}@dots{}@code{/}.
+
+@item
+Regexp constants may be used standalone in patterns and
+in conditional expressions, or as part of matching expressions
+using the @samp{~} and @samp{!~} operators.
+
+@item
+Escape sequences let you represent nonprintable characters and
+also let you represent regexp metacharacters as literal characters
+to be matched.
+
+@item
+Regexp operators provide grouping, alternation, and repetition.
+
+@item
+Bracket expressions give you a shorthand for specifying sets
+of characters that can match at a particular point in a regexp.
+Within bracket expressions, POSIX character classes let you specify
+certain groups of characters in a locale-independent fashion.
+
+@item
+Regular expressions match the leftmost longest text in the string being
+matched. This matters for cases where you need to know the extent of
+the match, such as for text substitution and when the record separator
+is a regexp.
+
+@item
+Matching expressions may use dynamic regexps (i.e., string values
+treated as regular expressions).
+
+@item
+@command{gawk}'s @code{IGNORECASE} variable lets you control the
+case sensitivity of regexp matching. In other @command{awk}
+versions, use @code{tolower()} or @code{toupper()}.
+
+@end itemize
+
+
+@node Reading Files
+@chapter Reading Input Files
+
+@cindex reading input files
+@cindex input files, reading
+@cindex input files
+@cindex @code{FILENAME} variable
+In the typical @command{awk} program,
+@command{awk} reads all input either from the
+standard input (by default, this is the keyboard, but often it is a pipe from another
+command) or from files whose names you specify on the @command{awk}
+command line. If you specify input files, @command{awk} reads them
+in order, processing all the data from one before going on to the next.
+The name of the current input file can be found in the predefined variable
+@code{FILENAME}
+(@pxref{Built-in Variables}).
+
+@cindex records
+@cindex fields
+The input is read in units called @dfn{records}, and is processed by the
+rules of your program one record at a time.
+By default, each record is one line. Each
+record is automatically split into chunks called @dfn{fields}.
+This makes it more convenient for programs to work on the parts of a record.
+
+@cindex @code{getline} command
+On rare occasions, you may need to use the @code{getline} command.
+The @code{getline} command is valuable, both because it
+can do explicit input from any number of files, and because the files
+used with it do not have to be named on the @command{awk} command line
+(@pxref{Getline}).
+
+@menu
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Nonconstant Fields:: Nonconstant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Constant Size:: Reading constant width data.
+* Splitting By Content:: Defining Fields By Content
+* Multiple Line:: Reading multiline records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+* Read Timeout:: Reading input with a timeout.
+* Command-line directories:: What happens if you put a directory on the
+ command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
+@end menu
+
+@node Records
+@section How Input Is Split into Records
+
+@cindex input, splitting into records
+@cindex records, splitting input into
+@cindex @code{NR} variable
+@cindex @code{FNR} variable
+@command{awk} divides the input for your program into records and fields.
+It keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a predefined variable
+called @code{FNR}, which is reset to zero every time a new file is started.
+Another predefined variable, @code{NR}, records the total number of input
+records read so far from all @value{DF}s. It starts at zero, but is
+never automatically reset to zero.
+
+@menu
+* awk split records:: How standard @command{awk} splits records.
+* gawk split records:: How @command{gawk} splits records.
+@end menu
+
+@node awk split records
+@subsection Record Splitting with Standard @command{awk}
+
+@cindex separators, for records
+@cindex record separators
+Records are separated by a character called the @dfn{record separator}.
+By default, the record separator is the newline character.
+This is why records are, by default, single lines.
+A different character can be used for the record separator by
+assigning the character to the predefined variable @code{RS}.
+
+@cindex newlines, as record separators
+@cindex @code{RS} variable
+Like any other variable,
+the value of @code{RS} can be changed in the @command{awk} program
+with the assignment operator, @samp{=}
+(@pxref{Assignment Ops}).
+The new record-separator character should be enclosed in quotation marks,
+which indicate a string constant. Often, the right time to do this is
+at the beginning of execution, before any input is processed,
+so that the very first record is read with the proper separator.
+To do this, use the special @code{BEGIN} pattern
+(@pxref{BEGIN/END}).
+For example:
+
+@example
+awk 'BEGIN @{ RS = "u" @}
+ @{ print $0 @}' mail-list
+@end example
+
+@noindent
+changes the value of @code{RS} to @samp{u}, before reading any input.
+This is a string whose first character is the letter ``u''; as a result, records
+are separated by the letter ``u.'' Then the input file is read, and the second
+rule in the @command{awk} program (the action with no pattern) prints each
+record. Because each @code{print} statement adds a newline at the end of
+its output, this @command{awk} program copies the input
+with each @samp{u} changed to a newline. Here are the results of running
+the program on @file{mail-list}:
+
+@example
+$ @kbd{awk 'BEGIN @{ RS = "u" @}}
+> @kbd{@{ print $0 @}' mail-list}
+@print{} Amelia 555-5553 amelia.zodiac
+@print{} sq
+@print{} e@@gmail.com F
+@print{} Anthony 555-3412 anthony.assert
+@print{} ro@@hotmail.com A
+@print{} Becky 555-7685 becky.algebrar
+@print{} m@@gmail.com A
+@print{} Bill 555-1675 bill.drowning@@hotmail.com A
+@print{} Broderick 555-0542 broderick.aliq
+@print{} otiens@@yahoo.com R
+@print{} Camilla 555-2912 camilla.inf
+@print{} sar
+@print{} m@@skynet.be R
+@print{} Fabi
+@print{} s 555-1234 fabi
+@print{} s.
+@print{} ndevicesim
+@print{} s@@
+@print{} cb.ed
+@print{} F
+@print{} J
+@print{} lie 555-6699 j
+@print{} lie.perscr
+@print{} tabor@@skeeve.com F
+@print{} Martin 555-6480 martin.codicib
+@print{} s@@hotmail.com A
+@print{} Sam
+@print{} el 555-3430 sam
+@print{} el.lanceolis@@sh
+@print{} .ed
+@print{} A
+@print{} Jean-Pa
+@print{} l 555-2127 jeanpa
+@print{} l.campanor
+@print{} m@@ny
+@print{} .ed
+@print{} R
+@print{}
+@end example
+
+@noindent
+Note that the entry for the name @samp{Bill} is not split.
+In the original @value{DF}
+(@pxref{Sample Data Files}),
+the line looks like this:
+
+@example
+Bill 555-1675 bill.drowning@@hotmail.com A
+@end example
+
+@noindent
+It contains no @samp{u} so there is no reason to split the record,
+unlike the others which have one or more occurrences of the @samp{u}.
+In fact, this record is treated as part of the previous record;
+the newline separating them in the output
+is the original newline in the @value{DF}, not the one added by
+@command{awk} when it printed the record!
+
+@cindex record separators, changing
+@cindex separators, for records
+Another way to change the record separator is on the command line,
+using the variable-assignment feature
+(@pxref{Other Arguments}):
+
+@example
+awk '@{ print $0 @}' RS="u" mail-list
+@end example
+
+@noindent
+This sets @code{RS} to @samp{u} before processing @file{mail-list}.
+
+Using an alphabetic character such as @samp{u} for the record separator
+is highly likely to produce strange results.
+Using an unusual character such as @samp{/} is more likely to
+produce correct behavior in the majority of cases, but there
+are no guarantees. The moral is: Know Your Data.
+
+When using regular characters as the record separator,
+there is one unusual case that occurs when @command{gawk} is
+being fully POSIX-compliant (@pxref{Options}).
+Then, the following (extreme) pipeline prints a surprising @samp{1}:
+
+@example
+$ @kbd{echo | gawk --posix 'BEGIN @{ RS = "a" @} ; @{ print NF @}'}
+@print{} 1
+@end example
+
+There is one field, consisting of a newline. The value of the built-in
+variable @code{NF} is the number of fields in the current record.
+(In the normal case, @command{gawk} treats the newline as whitespace,
+printing @samp{0} as the result. Most other versions of @command{awk}
+also act this way.)
+
+@cindex dark corner, input files
+Reaching the end of an input file terminates the current input record,
+even if the last character in the file is not the character in @code{RS}.
+@value{DARKCORNER}
+
+@cindex empty strings
+@cindex null strings
+@cindex strings, empty, See null strings
+The empty string @code{""} (a string without any characters)
+has a special meaning
+as the value of @code{RS}. It means that records are separated
+by one or more blank lines and nothing else.
+@DBXREF{Multiple Line} for more details.
+
+If you change the value of @code{RS} in the middle of an @command{awk} run,
+the new value is used to delimit subsequent records, but the record
+currently being processed, as well as records already processed, are not
+affected.
+
+@cindex @command{gawk}, @code{RT} variable in
+@cindex @code{RT} variable
+@cindex records, terminating
+@cindex terminating records
+@cindex differences in @command{awk} and @command{gawk}, record separators
+@cindex regular expressions, as record separators
+@cindex record separators, regular expressions as
+@cindex separators, for records, regular expressions as
+After the end of the record has been determined, @command{gawk}
+sets the variable @code{RT} to the text in the input that matched
+@code{RS}.
+
+@node gawk split records
+@subsection Record Splitting with @command{gawk}
+
+@cindex common extensions, @code{RS} as a regexp
+@cindex extensions, common@comma{} @code{RS} as a regexp
+When using @command{gawk},
+the value of @code{RS} is not limited to a one-character
+string. It can be any regular expression
+(@pxref{Regexp}). @value{COMMONEXT}
+In general, each record
+ends at the next string that matches the regular expression; the next
+record starts at the end of the matching string. This general rule is
+actually at work in the usual case, where @code{RS} contains just a
+newline: a record ends at the beginning of the next matching string (the
+next newline in the input), and the following record starts just after
+the end of this string (at the first character of the following line).
+The newline, because it matches @code{RS}, is not part of either record.
+
+When @code{RS} is a single character, @code{RT}
+contains the same single character. However, when @code{RS} is a
+regular expression, @code{RT} contains
+the actual input text that matched the regular expression.
+
+If the input file ended without any text that matches @code{RS},
+@command{gawk} sets @code{RT} to the null string.
+
+The following example illustrates both of these features.
+It sets @code{RS} equal to a regular expression that
+matches either a newline or a series of one or more uppercase letters
+with optional leading and/or trailing whitespace:
+
+@example
+$ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
+> @kbd{gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}}
+> @kbd{@{ print "Record =", $0,"and RT = [" RT "]" @}'}
+@print{} Record = record 1 and RT = [ AAAA ]
+@print{} Record = record 2 and RT = [ BBBB ]
+@print{} Record = record 3 and RT = [
+@print{} ]
+@end example
+
+@noindent
+The square brackets delineate the contents of @code{RT}, letting you
+see the leading and trailing whitespace. The final value of
+@code{RT} is a newline.
+@DBXREF{Simple Sed} for a more useful example
+of @code{RS} as a regexp and @code{RT}.
+
+If you set @code{RS} to a regular expression that allows optional
+trailing text, such as @samp{RS = "abc(XYZ)?"}, it is possible, due
+to implementation constraints, that @command{gawk} may match the leading
+part of the regular expression, but not the trailing part, particularly
+if the input text that could match the trailing part is fairly long.
+@command{gawk} attempts to avoid this problem, but currently, there's
+no guarantee that this will never happen.
+
+@quotation NOTE
+Remember that in @command{awk}, the @samp{^} and @samp{$} anchor
+metacharacters match the beginning and end of a @emph{string}, and not
+the beginning and end of a @emph{line}. As a result, something like
+@samp{RS = "^[[:upper:]]"} can only match at the beginning of a file.
+This is because @command{gawk} views the input file as one long string
+that happens to contain newline characters.
+It is thus best to avoid anchor metacharacters in the value of @code{RS}.
+@end quotation
+
+@cindex differences in @command{awk} and @command{gawk}, @code{RS}/@code{RT} variables
+The use of @code{RS} as a regular expression and the @code{RT}
+variable are @command{gawk} extensions; they are not available in
+compatibility mode
+(@pxref{Options}).
+In compatibility mode, only the first character of the value of
+@code{RS} determines the end of the record.
+
+@sidebar @code{RS = "\0"} Is Not Portable
+@cindex portability, data files as single record
+There are times when you might want to treat an entire @value{DF} as a
+single record. The only way to make this happen is to give @code{RS}
+a value that you know doesn't occur in the input file. This is hard
+to do in a general way, such that a program always works for arbitrary
+input files.
+
+You might think that for text files, the @sc{nul} character, which
+consists of a character with all bits equal to zero, is a good
+value to use for @code{RS} in this case:
+
+@example
+BEGIN @{ RS = "\0" @} # whole file becomes one record?
+@end example
+
+@cindex differences in @command{awk} and @command{gawk}, strings, storing
+@command{gawk} in fact accepts this, and uses the @sc{nul}
+character for the record separator.
+This works for certain special files, such as @file{/proc/environ} on
+GNU/Linux systems, where the @sc{nul} character is in fact the record separator.
+However, this usage is @emph{not} portable
+to most other @command{awk} implementations.
+
+@cindex dark corner, strings, storing
+Almost all other @command{awk} implementations@footnote{At least that we know
+about.} store strings internally as C-style strings. C strings use the
+@sc{nul} character as the string terminator. In effect, this means that
+@samp{RS = "\0"} is the same as @samp{RS = ""}.
+@value{DARKCORNER}
+
+It happens that recent versions of @command{mawk} can use the @sc{nul}
+character as a record separator. However, this is a special case:
+@command{mawk} does not allow embedded @sc{nul} characters in strings.
+(This may change in a future version of @command{mawk}.)
+
+@cindex records, treating files as
+@cindex treating files, as single records
+@DBXREF{Readfile Function} for an interesting way to read
+whole files. If you are using @command{gawk}, see @DBREF{Extension Sample
+Readfile} for another option.
+@end sidebar
+
+@node Fields
+@section Examining Fields
+
+@cindex examining fields
+@cindex fields
+@cindex accessing fields
+@cindex fields, examining
+@cindex POSIX @command{awk}, field separators and
+@cindex field separators, POSIX and
+@cindex separators, field, POSIX and
+When @command{awk} reads an input record, the record is
+automatically @dfn{parsed} or separated by the @command{awk} utility into chunks
+called @dfn{fields}. By default, fields are separated by @dfn{whitespace},
+like words in a line.
+Whitespace in @command{awk} means any string of one or more spaces,
+TABs, or newlines;@footnote{In POSIX @command{awk}, newlines are not
+considered whitespace for separating fields.} other characters
+that are considered whitespace by other languages
+(such as formfeed, vertical tab, etc.) are @emph{not} considered
+whitespace by @command{awk}.
+
+The purpose of fields is to make it more convenient for you to refer to
+these pieces of the record. You don't have to use them---you can
+operate on the whole record if you want---but fields are what make
+simple @command{awk} programs so powerful.
+
+@cindex field operator @code{$}
+@cindex @code{$} (dollar sign), @code{$} field operator
+@cindex dollar sign (@code{$}), @code{$} field operator
+@cindex field operators@comma{} dollar sign as
+You use a dollar-sign (@samp{$})
+to refer to a field in an @command{awk} program,
+followed by the number of the field you want. Thus, @code{$1}
+refers to the first field, @code{$2} to the second, and so on.
+(Unlike the Unix shells, the field numbers are not limited to single digits.
+@code{$127} is the 127th field in the record.)
+For example, suppose the following is a line of input:
+
+@example
+This seems like a pretty nice example.
+@end example
+
+@noindent
+Here the first field, or @code{$1}, is @samp{This}, the second field, or
+@code{$2}, is @samp{seems}, and so on. Note that the last field,
+@code{$7}, is @samp{example.}. Because there is no space between the
+@samp{e} and the @samp{.}, the period is considered part of the seventh
+field.
+
+@cindex @code{NF} variable
+@cindex fields, number of
+@code{NF} is a predefined variable whose value is the number of fields
+in the current record. @command{awk} automatically updates the value
+of @code{NF} each time it reads a record. No matter how many fields
+there are, the last field in a record can be represented by @code{$NF}.
+So, @code{$NF} is the same as @code{$7}, which is @samp{example.}.
+If you try to reference a field beyond the last
+one (such as @code{$8} when the record has only seven fields), you get
+the empty string. (If used in a numeric operation, you get zero.)
+
+The use of @code{$0}, which looks like a reference to the ``zero-th'' field, is
+a special case: it represents the whole input record. Use it
+when you are not interested in specific fields.
+Here are some more examples:
+
+@example
+$ @kbd{awk '$1 ~ /li/ @{ print $0 @}' mail-list}
+@print{} Amelia 555-5553 amelia.zodiacusque@@gmail.com F
+@print{} Julie 555-6699 julie.perscrutabor@@skeeve.com F
+@end example
+
+@noindent
+This example prints each record in the file @file{mail-list} whose first
+field contains the string @samp{li}.
+
+By contrast, the following example looks for @samp{li} in @emph{the
+entire record} and prints the first and last fields for each matching
+input record:
+
+@example
+$ @kbd{awk '/li/ @{ print $1, $NF @}' mail-list}
+@print{} Amelia F
+@print{} Broderick R
+@print{} Julie F
+@print{} Samuel A
+@end example
+
+@node Nonconstant Fields
+@section Nonconstant Field Numbers
+@cindex fields, numbers
+@cindex field numbers
+
+A field number need not be a constant. Any expression in
+the @command{awk} language can be used after a @samp{$} to refer to a
+field. The value of the expression specifies the field number. If the
+value is a string, rather than a number, it is converted to a number.
+Consider this example:
+
+@example
+awk '@{ print $NR @}'
+@end example
+
+@noindent
+Recall that @code{NR} is the number of records read so far: one in the
+first record, two in the second, and so on. So this example prints the first
+field of the first record, the second field of the second record, and so
+on. For the twentieth record, field number 20 is printed; most likely,
+the record has fewer than 20 fields, so this prints a blank line.
+Here is another example of using expressions as field numbers:
+
+@example
+awk '@{ print $(2*2) @}' mail-list
+@end example
+
+@command{awk} evaluates the expression @samp{(2*2)} and uses
+its value as the number of the field to print. The @samp{*} sign
+represents multiplication, so the expression @samp{2*2} evaluates to four.
+The parentheses are used so that the multiplication is done before the
+@samp{$} operation; they are necessary whenever there is a binary
+operator@footnote{A @dfn{binary operator}, such as @samp{*} for
+multiplication, is one that takes two operands. The distinction
+is required, because @command{awk} also has unary (one-operand)
+and ternary (three-operand) operators.}
+in the field-number expression. This example, then, prints the
+type of relationship (the fourth field) for every line of the file
+@file{mail-list}. (All of the @command{awk} operators are listed, in
+order of decreasing precedence, in
+@ref{Precedence}.)
+
+If the field number you compute is zero, you get the entire record.
+Thus, @samp{$(2-2)} has the same value as @code{$0}. Negative field
+numbers are not allowed; trying to reference one usually terminates
+the program. (The POSIX standard does not define
+what happens when you reference a negative field number. @command{gawk}
+notices this and terminates your program. Other @command{awk}
+implementations may behave differently.)
+
+As mentioned in @ref{Fields},
+@command{awk} stores the current record's number of fields in the built-in
+variable @code{NF} (also @pxref{Built-in Variables}). Thus, the expression
+@code{$NF} is not a special feature---it is the direct consequence of
+evaluating @code{NF} and using its value as a field number.
+
+@node Changing Fields
+@section Changing the Contents of a Field
+
+@cindex fields, changing contents of
+The contents of a field, as seen by @command{awk}, can be changed within an
+@command{awk} program; this changes what @command{awk} perceives as the
+current input record. (The actual input is untouched; @command{awk} @emph{never}
+modifies the input file.)
+Consider the following example and its output:
+
+@example
+$ @kbd{awk '@{ nboxes = $3 ; $3 = $3 - 10}
+> @kbd{print nboxes, $3 @}' inventory-shipped}
+@print{} 25 15
+@print{} 32 22
+@print{} 24 14
+@dots{}
+@end example
+
+@noindent
+The program first saves the original value of field three in the variable
+@code{nboxes}.
+The @samp{-} sign represents subtraction, so this program reassigns
+field three, @code{$3}, as the original value of field three minus ten:
+@samp{$3 - 10}. (@xref{Arithmetic Ops}.)
+Then it prints the original and new values for field three.
+(Someone in the warehouse made a consistent mistake while inventorying
+the red boxes.)
+
+For this to work, the text in @code{$3} must make sense
+as a number; the string of characters must be converted to a number
+for the computer to do arithmetic on it. The number resulting
+from the subtraction is converted back to a string of characters that
+then becomes field three.
+@xref{Conversion}.
+
+When the value of a field is changed (as perceived by @command{awk}), the
+text of the input record is recalculated to contain the new field where
+the old one was. In other words, @code{$0} changes to reflect the altered
+field. Thus, this program
+prints a copy of the input file, with 10 subtracted from the second
+field of each line:
+
+@example
+$ @kbd{awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped}
+@print{} Jan 3 25 15 115
+@print{} Feb 5 32 24 226
+@print{} Mar 5 24 34 228
+@dots{}
+@end example
+
+It is also possible to assign contents to fields that are out
+of range. For example:
+
+@example
+$ @kbd{awk '@{ $6 = ($5 + $4 + $3 + $2)}
+> @kbd{ print $6 @}' inventory-shipped}
+@print{} 168
+@print{} 297
+@print{} 301
+@dots{}
+@end example
+
+@cindex adding, fields
+@cindex fields, adding
+@noindent
+We've just created @code{$6}, whose value is the sum of fields
+@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign
+represents addition. For the file @file{inventory-shipped}, @code{$6}
+represents the total number of parcels shipped for a particular month.
+
+Creating a new field changes @command{awk}'s internal copy of the current
+input record, which is the value of @code{$0}. Thus, if you do @samp{print $0}
+after adding a field, the record printed includes the new field, with
+the appropriate number of field separators between it and the previously
+existing fields.
+
+@cindex @code{OFS} variable
+@cindex output field separator, See @code{OFS} variable
+@cindex field separators, See Also @code{OFS}
+This recomputation affects and is affected by
+@code{NF} (the number of fields; @pxref{Fields}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.
+The exact format of @code{$0} is also affected by a feature that has not been discussed yet:
+the @dfn{output field separator}, @code{OFS},
+used to separate the fields (@pxref{Output Separators}).
+
+Note, however, that merely @emph{referencing} an out-of-range field
+does @emph{not} change the value of either @code{$0} or @code{NF}.
+Referencing an out-of-range field only produces an empty string. For
+example:
+
+@example
+if ($(NF+1) != "")
+ print "can't happen"
+else
+ print "everything is normal"
+@end example
+
+@noindent
+should print @samp{everything is normal}, because @code{NF+1} is certain
+to be out of range. (@DBXREF{If Statement}
+for more information about @command{awk}'s @code{if-else} statements.
+@DBXREF{Typing and Comparison}
+for more information about the @samp{!=} operator.)
+
+It is important to note that making an assignment to an existing field
+changes the
+value of @code{$0} but does not change the value of @code{NF},
+even when you assign the empty string to a field. For example:
+
+@example
+$ @kbd{echo a b c d | awk '@{ OFS = ":"; $2 = ""}
+> @kbd{print $0; print NF @}'}
+@print{} a::c:d
+@print{} 4
+@end example
+
+@noindent
+The field is still there; it just has an empty value, delimited by
+the two colons between @samp{a} and @samp{c}.
+This example shows what happens if you create a new field:
+
+@example
+$ @kbd{echo a b c d | awk '@{ OFS = ":"; $2 = ""; $6 = "new"}
+> @kbd{print $0; print NF @}'}
+@print{} a::c:d::new
+@print{} 6
+@end example
+
+@noindent
+The intervening field, @code{$5}, is created with an empty value
+(indicated by the second pair of adjacent colons),
+and @code{NF} is updated with the value six.
+
+@cindex dark corner, @code{NF} variable, decrementing
+@cindex @code{NF} variable, decrementing
+Decrementing @code{NF} throws away the values of the fields
+after the new value of @code{NF} and recomputes @code{$0}.
+@value{DARKCORNER}
+Here is an example:
+
+@example
+$ @kbd{echo a b c d e f | awk '@{ print "NF =", NF;}
+> @kbd{ NF = 3; print $0 @}'}
+@print{} NF = 6
+@print{} a b c
+@end example
+
+@cindex portability, @code{NF} variable@comma{} decrementing
+@quotation CAUTION
+Some versions of @command{awk} don't
+rebuild @code{$0} when @code{NF} is decremented.
+@end quotation
+
+Finally, there are times when it is convenient to force
+@command{awk} to rebuild the entire record, using the current
+value of the fields and @code{OFS}. To do this, use the
+seemingly innocuous assignment:
+
+@example
+$1 = $1 # force record to be reconstituted
+print $0 # or whatever else with $0
+@end example
+
+@noindent
+This forces @command{awk} to rebuild the record. It does help
+to add a comment, as we've shown here.
+
+There is a flip side to the relationship between @code{$0} and
+the fields. Any assignment to @code{$0} causes the record to be
+reparsed into fields using the @emph{current} value of @code{FS}.
+This also applies to any built-in function that updates @code{$0},
+such as @code{sub()} and @code{gsub()}
+(@pxref{String Functions}).
+
+@sidebar Understanding @code{$0}
+
+It is important to remember that @code{$0} is the @emph{full}
+record, exactly as it was read from the input. This includes
+any leading or trailing whitespace, and the exact whitespace (or other
+characters) that separate the fields.
+
+It is a common error to try to change the field separators
+in a record simply by setting @code{FS} and @code{OFS}, and then
+expecting a plain @samp{print} or @samp{print $0} to print the
+modified record.
+
+But this does not work, because nothing was done to change the record
+itself. Instead, you must force the record to be rebuilt, typically
+with a statement such as @samp{$1 = $1}, as described earlier.
+@end sidebar
+
+
+@node Field Separators
+@section Specifying How Fields Are Separated
+
+@menu
+* Default Field Splitting:: How fields are normally separated.
+* Regexp Field Splitting:: Using regexps as the field separator.
+* Single Character Fields:: Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Full Line Fields:: Making the full line be a single field.
+* Field Splitting Summary:: Some final points and a summary table.
+@end menu
+
+@cindex @code{FS} variable
+@cindex fields, separating
+@cindex field separators
+@cindex fields, separating
+The @dfn{field separator}, which is either a single character or a regular
+expression, controls the way @command{awk} splits an input record into fields.
+@command{awk} scans the input record for character sequences that
+match the separator; the fields themselves are the text between the matches.
+
+In the examples that follow, we use the bullet symbol (@bullet{}) to
+represent spaces in the output.
+If the field separator is @samp{oo}, then the following line:
+
+@example
+moo goo gai pan
+@end example
+
+@noindent
+is split into three fields: @samp{m}, @samp{@bullet{}g}, and
+@samp{@bullet{}gai@bullet{}pan}.
+Note the leading spaces in the values of the second and third fields.
+
+@cindex troubleshooting, @command{awk} uses @code{FS} not @code{IFS}
+The field separator is represented by the predefined variable @code{FS}.
+Shell programmers take note: @command{awk} does @emph{not} use the
+name @code{IFS} that is used by the POSIX-compliant shells (such as
+the Unix Bourne shell, @command{sh}, or Bash).
+
+@cindex @code{FS} variable, changing value of
+The value of @code{FS} can be changed in the @command{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops}).
+Often, the right time to do this is at the beginning of execution
+before any input has been processed, so that the very first record
+is read with the proper separator. To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:
+
+@example
+awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
+@end example
+
+@cindex @code{BEGIN} pattern
+@noindent
+Given the input line:
+
+@example
+John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+this @command{awk} program extracts and prints the string
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+
+@cindex field separators, choice of
+@cindex regular expressions as field separators
+@cindex field separators, regular expressions as
+Sometimes the input data contains separator characters that don't
+separate fields the way you thought they would. For instance, the
+person's name in the example we just used might have a title or
+suffix attached, such as:
+
+@example
+John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+The same program would extract @samp{@bullet{}LXIX}, instead of
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+If you were expecting the program to print the
+address, you would be surprised. The moral is to choose your data layout and
+separator characters carefully to prevent such problems.
+(If the data is not in a form that is easy to process, perhaps you
+can massage it first with a separate @command{awk} program.)
+
+
+@node Default Field Splitting
+@subsection Whitespace Normally Separates Fields
+
+@cindex newlines, as field separators
+@cindex whitespace, as field separators
+Fields are normally separated by whitespace sequences
+(spaces, TABs, and newlines), not by single spaces. Two spaces in a row do not
+delimit an empty field. The default value of the field separator @code{FS}
+is a string containing a single space, @w{@code{" "}}. If @command{awk}
+interpreted this value in the usual way, each space character would separate
+fields, so two spaces in a row would make an empty field between them.
+The reason this does not happen is that a single space as the value of
+@code{FS} is a special case---it is taken to specify the default manner
+of delimiting fields.
+
+If @code{FS} is any other single character, such as @code{","}, then
+each occurrence of that character separates two fields. Two consecutive
+occurrences delimit an empty field. If the character occurs at the
+beginning or the end of the line, that too delimits an empty field. The
+space character is the only single character that does not follow these
+rules.
+
+@node Regexp Field Splitting
+@subsection Using Regular Expressions to Separate Fields
+
+@cindex regular expressions, as field separators
+@cindex field separators, regular expressions as
+The previous @value{SUBSECTION}
+discussed the use of single characters or simple strings as the
+value of @code{FS}.
+More generally, the value of @code{FS} may be a string containing any
+regular expression. In this case, each match in the record for the regular
+expression separates fields. For example, the assignment:
+
+@example
+FS = ", \t"
+@end example
+
+@noindent
+makes every area of an input line that consists of a comma followed by a
+space and a TAB into a field separator.
+@ifinfo
+(@samp{\t}
+is an @dfn{escape sequence} that stands for a TAB;
+@pxref{Escape Sequences},
+for the complete list of similar escape sequences.)
+@end ifinfo
+
+For a less trivial example of a regular expression, try using
+single spaces to separate fields the way single commas are used.
+@code{FS} can be set to @w{@code{"[@ ]"}} (left bracket, space, right
+bracket). This regular expression matches a single space and nothing else
+(@pxref{Regexp}).
+
+There is an important difference between the two cases of @samp{FS = @w{" "}}
+(a single space) and @samp{FS = @w{"[ \t\n]+"}}
+(a regular expression matching one or more spaces, TABs, or newlines).
+For both values of @code{FS}, fields are separated by @dfn{runs}
+(multiple adjacent occurrences) of spaces, TABs,
+and/or newlines. However, when the value of @code{FS} is @w{@code{" "}},
+@command{awk} first strips leading and trailing whitespace from
+the record and then decides where the fields are.
+For example, the following pipeline prints @samp{b}:
+
+@example
+$ @kbd{echo ' a b c d ' | awk '@{ print $2 @}'}
+@print{} b
+@end example
+
+@noindent
+However, this pipeline prints @samp{a} (note the extra spaces around
+each letter):
+
+@example
+$ @kbd{echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t\n]+" @}}
+> @kbd{@{ print $2 @}'}
+@print{} a
+@end example
+
+@noindent
+@cindex null strings
+@cindex strings, null
+@cindex empty strings, See null strings
+In this case, the first field is null, or empty.
+
+The stripping of leading and trailing whitespace also comes into
+play whenever @code{$0} is recomputed. For instance, study this pipeline:
+
+@example
+$ @kbd{echo ' a b c d' | awk '@{ print; $2 = $2; print @}'}
+@print{} a b c d
+@print{} a b c d
+@end example
+
+@noindent
+The first @code{print} statement prints the record as it was read,
+with leading whitespace intact. The assignment to @code{$2} rebuilds
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
+separated by the value of @code{OFS} (which is a space by default).
+Because the leading whitespace was ignored when finding @code{$1},
+it is not part of the new @code{$0}. Finally, the last @code{print}
+statement prints the new @code{$0}.
+
+@cindex @code{FS}, containing @code{^}
+@cindex @code{^} (caret), in @code{FS}
+@cindex dark corner, @code{^}, in @code{FS}
+There is an additional subtlety to be aware of when using regular expressions
+for field splitting.
+It is not well specified in the POSIX standard, or anywhere else, what @samp{^}
+means when splitting fields. Does the @samp{^} match only at the beginning of
+the entire record? Or is each field separator a new string? It turns out that
+different @command{awk} versions answer this question differently, and you
+should not rely on any specific behavior in your programs.
+@value{DARKCORNER}
+
+@cindex Brian Kernighan's @command{awk}
+As a point of information, BWK @command{awk} allows @samp{^}
+to match only at the beginning of the record. @command{gawk}
+also works this way. For example:
+
+@example
+$ @kbd{echo 'xxAA xxBxx C' |}
+> @kbd{gawk -F '(^x+)|( +)' '@{ for (i = 1; i <= NF; i++)}
+> @kbd{ printf "-->%s<--\n", $i @}'}
+@print{} --><--
+@print{} -->AA<--
+@print{} -->xxBxx<--
+@print{} -->C<--
+@end example
+
+@node Single Character Fields
+@subsection Making Each Character a Separate Field
+
+@cindex common extensions, single character fields
+@cindex extensions, common@comma{} single character fields
+@cindex differences in @command{awk} and @command{gawk}, single-character fields
+@cindex single-character fields
+@cindex fields, single-character
+There are times when you may want to examine each character
+of a record separately. This can be done in @command{gawk} by
+simply assigning the null string (@code{""}) to @code{FS}. @value{COMMONEXT}
+In this case,
+each individual character in the record becomes a separate field.
+For example:
+
+@example
+$ @kbd{echo a b | gawk 'BEGIN @{ FS = "" @}}
+> @kbd{@{}
+> @kbd{for (i = 1; i <= NF; i = i + 1)}
+> @kbd{print "Field", i, "is", $i}
+> @kbd{@}'}
+@print{} Field 1 is a
+@print{} Field 2 is
+@print{} Field 3 is b
+@end example
+
+@cindex dark corner, @code{FS} as null string
+@cindex @code{FS} variable, as null string
+Traditionally, the behavior of @code{FS} equal to @code{""} was not defined.
+In this case, most versions of Unix @command{awk} simply treat the entire record
+as only having one field.
+@value{DARKCORNER}
+In compatibility mode
+(@pxref{Options}),
+if @code{FS} is the null string, then @command{gawk} also
+behaves this way.
+
+@node Command Line Field Separator
+@subsection Setting @code{FS} from the Command Line
+@cindex @option{-F} option, command-line
+@cindex field separator, on command line
+@cindex command line, @code{FS} on@comma{} setting
+@cindex @code{FS} variable, setting from command line
+
+@code{FS} can be set on the command line. Use the @option{-F} option to
+do so. For example:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+@noindent
+sets @code{FS} to the @samp{,} character. Notice that the option uses
+an uppercase @samp{F} instead of a lowercase @samp{f}. The latter
+option (@option{-f}) specifies a file containing an @command{awk} program.
+
+The value used for the argument to @option{-F} is processed in exactly the
+same way as assignments to the predefined variable @code{FS}.
+Any special characters in the field separator must be escaped
+appropriately. For example, to use a @samp{\} as the field separator
+on the command line, you would have to type:
+
+@example
+# same as FS = "\\"
+awk -F\\\\ '@dots{}' files @dots{}
+@end example
+
+@noindent
+@cindex @code{\} (backslash), as field separator
+@cindex backslash (@code{\}), as field separator
+Because @samp{\} is used for quoting in the shell, @command{awk} sees
+@samp{-F\\}. Then @command{awk} processes the @samp{\\} for escape
+characters (@pxref{Escape Sequences}), finally yielding
+a single @samp{\} to use for the field separator.
+
+@c @cindex historical features
+As a special case, in compatibility mode
+(@pxref{Options}),
+if the argument to @option{-F} is @samp{t}, then @code{FS} is set to
+the TAB character. If you type @samp{-F\t} at the
+shell, without any quotes, the @samp{\} gets deleted, so @command{awk}
+figures that you really want your fields to be separated with TABs and
+not @samp{t}s. Use @samp{-v FS="t"} or @samp{-F"[t]"} on the command line
+if you really do want to separate your fields with @samp{t}s.
+Use @samp{-F '\t'} when not in compatibility mode to specify that TABs
+separate fields.
+
+As an example, let's use an @command{awk} program file called @file{edu.awk}
+that contains the pattern @code{/edu/} and the action @samp{print $1}:
+
+@example
+/edu/ @{ print $1 @}
+@end example
+
+Let's also set @code{FS} to be the @samp{-} character and run the
+program on the file @file{mail-list}. The following command prints a
+list of the names of the people that work at or attend a university, and
+the first three digits of their phone numbers:
+
+@example
+$ @kbd{awk -F- -f edu.awk mail-list}
+@print{} Fabius 555
+@print{} Samuel 555
+@print{} Jean
+@end example
+
+@noindent
+Note the third line of output. The third line
+in the original file looked like this:
+
+@example
+Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
+@end example
+
+The @samp{-} as part of the person's name was used as the field
+separator, instead of the @samp{-} in the phone number that was
+originally intended. This demonstrates why you have to be careful in
+choosing your field and record separators.
+
+@cindex Unix @command{awk}, password files@comma{} field separators and
+Perhaps the most common use of a single character as the field separator
+occurs when processing the Unix system password file. On many Unix
+systems, each user has a separate entry in the system password file, one
+line per user. The information in these lines is separated by colons.
+The first field is the user's login name and the second is the user's
+encrypted or shadow password. (A shadow password is indicated by the
+presence of a single @samp{x} in the second field.) A password file
+entry might look like this:
+
+@cindex Robbins, Arnold
+@example
+arnold:x:2076:10:Arnold Robbins:/home/arnold:/bin/bash
+@end example
+
+The following program searches the system password file and prints
+the entries for users whose full name is not indicated:
+
+@example
+awk -F: '$5 == ""' /etc/passwd
+@end example
+
+@node Full Line Fields
+@subsection Making the Full Line Be a Single Field
+
+Occasionally, it's useful to treat the whole input line as a
+single field. This can be done easily and portably simply by
+setting @code{FS} to @code{"\n"} (a newline):@footnote{Thanks to
+Andrew Schorr for this tip.}
+
+@example
+awk -F'\n' '@var{program}' @var{files @dots{}}
+@end example
+
+@noindent
+When you do this, @code{$1} is the same as @code{$0}.
+
+@sidebar Changing @code{FS} Does Not Affect the Fields
+
+@cindex POSIX @command{awk}, field separators and
+@cindex field separator, POSIX and
+According to the POSIX standard, @command{awk} is supposed to behave
+as if each record is split into fields at the time it is read.
+In particular, this means that if you change the value of @code{FS}
+after a record is read, the value of the fields (i.e., how they were split)
+should reflect the old value of @code{FS}, not the new one.
+
+@cindex dark corner, field separators
+@cindex @command{sed} utility
+@cindex stream editors
+However, many older implementations of @command{awk} do not work this way. Instead,
+they defer splitting the fields until a field is actually
+referenced. The fields are split
+using the @emph{current} value of @code{FS}!
+@value{DARKCORNER}
+This behavior can be difficult
+to diagnose. The following example illustrates the difference
+between the two methods.
+(The @command{sed}@footnote{The @command{sed} utility is a ``stream editor.''
+Its behavior is also defined by the POSIX standard.}
+command prints just the first line of @file{/etc/passwd}.)
+
+@example
+sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
+@end example
+
+@noindent
+which usually prints:
+
+@example
+root
+@end example
+
+@noindent
+on an incorrect implementation of @command{awk}, while @command{gawk}
+prints the full first line of the file, something like:
+
+@example
+root:x:0:0:Root:/:
+@end example
+@end sidebar
+
+@node Field Splitting Summary
+@subsection Field-Splitting Summary
+
+It is important to remember that when you assign a string constant
+as the value of @code{FS}, it undergoes normal @command{awk} string
+processing. For example, with Unix @command{awk} and @command{gawk},
+the assignment @samp{FS = "\.."} assigns the character string @code{".."}
+to @code{FS} (the backslash is stripped). This creates a regexp meaning
+``fields are separated by occurrences of any two characters.''
+If instead you want fields to be separated by a literal period followed
+by any single character, use @samp{FS = "\\.."}.
+
+The following list summarizes how fields are split, based on the value
+of @code{FS} (@samp{==} means ``is equal to''):
+
+@table @code
+@item FS == " "
+Fields are separated by runs of whitespace. Leading and trailing
+whitespace are ignored. This is the default.
+
+@item FS == @var{any other single character}
+Fields are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty fields, as do leading and
+trailing occurrences.
+The character can even be a regexp metacharacter; it does not need
+to be escaped.
+
+@item FS == @var{regexp}
+Fields are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty fields.
+
+@item FS == ""
+Each individual character in the record becomes a separate field.
+(This is a common extension; it is not specified by the POSIX standard.)
+@end table
+
+@sidebar @code{FS} and @code{IGNORECASE}
+
+The @code{IGNORECASE} variable
+(@pxref{User-modified})
+affects field splitting @emph{only} when the value of @code{FS} is a regexp.
+It has no effect when @code{FS} is a single character, even if
+that character is a letter. Thus, in the following code:
+
+@example
+FS = "c"
+IGNORECASE = 1
+$0 = "aCa"
+print $1
+@end example
+
+@noindent
+The output is @samp{aCa}. If you really want to split fields on an
+alphabetic character while ignoring case, use a regexp that will
+do it for you (e.g., @samp{FS = "[c]"}). In this case, @code{IGNORECASE}
+will take effect.
+@end sidebar
+
+
+@node Constant Size
+@section Reading Fixed-Width Data
+
+@cindex data, fixed-width
+@cindex fixed-width data
+@cindex advanced features, fixed-width data
+
+@c O'Reilly doesn't like it as a note the first thing in the section.
+This @value{SECTION} discusses an advanced
+feature of @command{gawk}. If you are a novice @command{awk} user,
+you might want to skip it on the first reading.
+
+@command{gawk} provides a facility for dealing with fixed-width fields
+with no distinctive field separator. For example, data of this nature
+arises in the input for old Fortran programs where numbers are run
+together, or in the output of programs that did not anticipate the use
+of their output as input for other programs.
+
+An example of the latter is a table where all the columns are lined up by
+the use of a variable number of spaces and @emph{empty fields are just
+spaces}. Clearly, @command{awk}'s normal field splitting based on @code{FS}
+does not work well in this case. Although a portable @command{awk} program
+can use a series of @code{substr()} calls on @code{$0}
+(@pxref{String Functions}),
+this is awkward and inefficient for a large number of fields.
+
+@cindex troubleshooting, fatal errors, field widths@comma{} specifying
+@cindex @command{w} utility
+@cindex @code{FIELDWIDTHS} variable
+@cindex @command{gawk}, @code{FIELDWIDTHS} variable in
+The splitting of an input record into fixed-width fields is specified by
+assigning a string containing space-separated numbers to the built-in
+variable @code{FIELDWIDTHS}. Each number specifies the width of the field,
+@emph{including} columns between fields. If you want to ignore the columns
+between fields, you can specify the width as a separate field that is
+subsequently ignored.
+It is a fatal error to supply a field width that is not a positive number.
+The following data is the output of the Unix @command{w} utility. It is useful
+to illustrate the use of @code{FIELDWIDTHS}:
+
+@example
+@group
+ 10:06pm up 21 days, 14:04, 23 users
+User tty login@ idle JCPU PCPU what
+hzuo ttyV0 8:58pm 9 5 vi p24.tex
+hzang ttyV3 6:37pm 50 -csh
+eklye ttyV5 9:53pm 7 1 em thes.tex
+dportein ttyV6 8:17pm 1:47 -csh
+gierd ttyD3 10:00pm 1 elm
+dave ttyD4 9:47pm 4 4 w
+brent ttyp0 26Jun91 4:46 26:46 4:41 bash
+dave ttyq4 26Jun9115days 46 46 wnewmail
+@end group
+@end example
+
+The following program takes this input, converts the idle time to
+number of seconds, and prints out the first two fields and the calculated
+idle time:
+
+@example
+BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
+NR > 2 @{
+ idle = $4
+ sub(/^ +/, "", idle) # strip leading spaces
+ if (idle == "")
+ idle = 0
+ if (idle ~ /:/) @{
+ split(idle, t, ":")
+ idle = t[1] * 60 + t[2]
+ @}
+ if (idle ~ /days/)
+ idle *= 24 * 60 * 60
+
+ print $1, $2, idle
+@}
+@end example
+
+@quotation NOTE
+The preceding program uses a number of @command{awk} features that
+haven't been introduced yet.
+@end quotation
+
+Running the program on the data produces the following results:
+
+@example
+hzuo ttyV0 0
+hzang ttyV3 50
+eklye ttyV5 0
+dportein ttyV6 107
+gierd ttyD3 1
+dave ttyD4 0
+brent ttyp0 286
+dave ttyq4 1296000
+@end example
+
+Another (possibly more practical) example of fixed-width input data
+is the input from a deck of balloting cards. In some parts of
+the United States, voters mark their choices by punching holes in computer
+cards. These cards are then processed to count the votes for any particular
+candidate or on any particular issue. Because a voter may choose not to
+vote on some issue, any column on the card may be empty. An @command{awk}
+program for processing such data could use the @code{FIELDWIDTHS} feature
+to simplify reading the data. (Of course, getting @command{gawk} to run on
+a system with card readers is another story!)
+
+@cindex @command{gawk}, splitting fields and
+Assigning a value to @code{FS} causes @command{gawk} to use
+@code{FS} for field splitting again. Use @samp{FS = FS} to make this happen,
+without having to know the current value of @code{FS}.
+In order to tell which kind of field splitting is in effect,
+use @code{PROCINFO["FS"]}
+(@pxref{Auto-set}).
+The value is @code{"FS"} if regular field splitting is being used,
+or it is @code{"FIELDWIDTHS"} if fixed-width field splitting is being used:
+
+@example
+if (PROCINFO["FS"] == "FS")
+ @var{regular field splitting} @dots{}
+else if (PROCINFO["FS"] == "FIELDWIDTHS")
+ @var{fixed-width field splitting} @dots{}
+else
+ @var{content-based field splitting} @dots{} @ii{(see next @value{SECTION})}
+@end example
+
+This information is useful when writing a function
+that needs to temporarily change @code{FS} or @code{FIELDWIDTHS},
+read some records, and then restore the original settings
+(@DBPXREF{Passwd Functions}
+for an example of such a function).
+
+@node Splitting By Content
+@section Defining Fields by Content
+
+@c O'Reilly doesn't like it as a note the first thing in the section.
+This @value{SECTION} discusses an advanced
+feature of @command{gawk}. If you are a novice @command{awk} user,
+you might want to skip it on the first reading.
+
+@cindex advanced features, specifying field content
+Normally, when using @code{FS}, @command{gawk} defines the fields as the
+parts of the record that occur in between each field separator. In other
+words, @code{FS} defines what a field @emph{is not}, instead of what a field
+@emph{is}.
+However, there are times when you really want to define the fields by
+what they are, and not by what they are not.
+
+The most notorious such case
+is so-called @dfn{comma-separated values} (CSV) data. Many spreadsheet programs,
+for example, can export their data into text files, where each record is
+terminated with a newline, and fields are separated by commas. If only
+commas separated the data, there wouldn't be an issue. The problem comes when
+one of the fields contains an @emph{embedded} comma.
+In such cases, most programs embed the field in double quotes.@footnote{The
+CSV format lacked a formal standard definition for many years.
+@uref{http://www.ietf.org/rfc/rfc4180.txt, RFC 4180}
+standardizes the most common practices.}
+So we might have data like this:
+
+@example
+@c file eg/misc/addresses.csv
+Robbins,Arnold,"1234 A Pretty Street, NE",MyTown,MyState,12345-6789,USA
+@c endfile
+@end example
+
+@cindex @command{gawk}, @code{FPAT} variable in
+@cindex @code{FPAT} variable
+The @code{FPAT} variable offers a solution for cases like this.
+The value of @code{FPAT} should be a string that provides a regular expression.
+This regular expression describes the contents of each field.
+
+In the case of CSV data as presented here, each field is either ``anything that
+is not a comma,'' or ``a double quote, anything that is not a double quote, and a
+closing double quote.'' If written as a regular expression constant
+(@pxref{Regexp}),
+we would have @code{/([^,]+)|("[^"]+")/}.
+Writing this as a string requires us to escape the double quotes, leading to:
+
+@example
+FPAT = "([^,]+)|(\"[^\"]+\")"
+@end example
+
+Putting this to use, here is a simple program to parse the data:
+
+@example
+@c file eg/misc/simple-csv.awk
+BEGIN @{
+ FPAT = "([^,]+)|(\"[^\"]+\")"
+@}
+
+@{
+ print "NF = ", NF
+ for (i = 1; i <= NF; i++) @{
+ printf("$%d = <%s>\n", i, $i)
+ @}
+@}
+@c endfile
+@end example
+
+When run, we get the following:
+
+@example
+$ @kbd{gawk -f simple-csv.awk addresses.csv}
+NF = 7
+$1 = <Robbins>
+$2 = <Arnold>
+$3 = <"1234 A Pretty Street, NE">
+$4 = <MyTown>
+$5 = <MyState>
+$6 = <12345-6789>
+$7 = <USA>
+@end example
+
+Note the embedded comma in the value of @code{$3}.
+
+A straightforward improvement when processing CSV data of this sort
+would be to remove the quotes when they occur, with something like this:
+
+@example
+if (substr($i, 1, 1) == "\"") @{
+ len = length($i)
+ $i = substr($i, 2, len - 2) # Get text within the two quotes
+@}
+@end example
+
+As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified})
+affects field splitting with @code{FPAT}.
+
+Assigning a value to @code{FPAT} overrides field splitting
+with @code{FS} and with @code{FIELDWIDTHS}.
+Similar to @code{FIELDWIDTHS}, the value of @code{PROCINFO["FS"]}
+will be @code{"FPAT"} if content-based field splitting is being used.
+
+@quotation NOTE
+Some programs export CSV data that contains embedded newlines between
+the double quotes. @command{gawk} provides no way to deal with this.
+Even though a formal specification for CSV data exists, there isn't much
+more to be done;
+the @code{FPAT} mechanism provides an elegant solution for the majority
+of cases, and the @command{gawk} developers are satisfied with that.
+@end quotation
+
+As written, the regexp used for @code{FPAT} requires that each field
+have a least one character. A straightforward modification
+(changing changed the first @samp{+} to @samp{*}) allows fields to be empty:
+
+@example
+FPAT = "([^,]*)|(\"[^\"]+\")"
+@end example
+
+Finally, the @code{patsplit()} function makes the same functionality
+available for splitting regular strings (@pxref{String Functions}).
+
+To recap, @command{gawk} provides three independent methods
+to split input records into fields. @command{gawk} uses whichever
+mechanism was last chosen based on which of the three
+variables---@code{FS}, @code{FIELDWIDTHS}, and @code{FPAT}---was
+last assigned to.
+
+@node Multiple Line
+@section Multiple-Line Records
+
+@cindex multiple-line records
+@cindex records, multiline
+@cindex input, multiline records
+@cindex files, reading, multiline records
+@cindex input, files, See input files
+In some databases, a single line cannot conveniently hold all the
+information in one entry. In such cases, you can use multiline
+records. The first step in doing this is to choose your data format.
+
+@cindex record separators, with multiline records
+One technique is to use an unusual character or string to separate
+records. For example, you could use the formfeed character (written
+@samp{\f} in @command{awk}, as in C) to separate them, making each record
+a page of the file. To do this, just set the variable @code{RS} to
+@code{"\f"} (a string containing the formfeed character). Any
+other character could equally well be used, as long as it won't be part
+of the data in a record.
+
+@cindex @code{RS} variable, multiline records and
+Another technique is to have blank lines separate records. By a special
+dispensation, an empty string as the value of @code{RS} indicates that
+records are separated by one or more blank lines. When @code{RS} is set
+to the empty string, each record always ends at the first blank line
+encountered. The next record doesn't start until the first nonblank
+line that follows. No matter how many blank lines appear in a row, they
+all act as one record separator.
+(Blank lines must be completely empty; lines that contain only
+whitespace do not count.)
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+You can achieve the same effect as @samp{RS = ""} by assigning the
+string @code{"\n\n+"} to @code{RS}. This regexp matches the newline
+at the end of the record and one or more blank lines after the record.
+In addition, a regular expression always matches the longest possible
+sequence when there is a choice
+(@pxref{Leftmost Longest}).
+So the next record doesn't start until
+the first nonblank line that follows---no matter how many blank lines
+appear in a row, they are considered one record separator.
+
+@cindex dark corner, multiline records
+However, there is an important difference between @samp{RS = ""} and
+@samp{RS = "\n\n+"}. In the first case, leading newlines in the input
+@value{DF} are ignored, and if a file ends without extra blank lines
+after the last record, the final newline is removed from the record.
+In the second case, this special processing is not done.
+@value{DARKCORNER}
+
+@cindex field separator, in multiline records
+@cindex @code{FS}, in multiline records
+Now that the input is separated into records, the second step is to
+separate the fields in the record. One way to do this is to divide each
+of the lines into fields in the normal manner. This happens by default
+as the result of a special feature. When @code{RS} is set to the empty
+string, @emph{and} @code{FS} is set to a single character,
+the newline character @emph{always} acts as a field separator.
+This is in addition to whatever field separations result from
+@code{FS}.@footnote{When @code{FS} is the null string (@code{""})
+or a regexp, this special feature of @code{RS} does not apply.
+It does apply to the default field separator of a single space:
+@samp{FS = @w{" "}}.}
+
+The original motivation for this special exception was probably to provide
+useful behavior in the default case (i.e., @code{FS} is equal
+to @w{@code{" "}}). This feature can be a problem if you really don't
+want the newline character to separate fields, because there is no way to
+prevent it. However, you can work around this by using the @code{split()}
+function to break up the record manually
+(@pxref{String Functions}).
+If you have a single character field separator, you can work around
+the special feature in a different way, by making @code{FS} into a
+regexp for that single character. For example, if the field
+separator is a percent character, instead of
+@samp{FS = "%"}, use @samp{FS = "[%]"}.
+
+Another way to separate fields is to
+put each field on a separate line: to do this, just set the
+variable @code{FS} to the string @code{"\n"}. (This single
+character separator matches a single newline.)
+A practical example of a @value{DF} organized this way might be a mailing
+list, where each entry is separated by blank lines. Consider a mailing
+list in a file named @file{addresses}, which looks like this:
+
+@example
+Jane Doe
+123 Main Street
+Anywhere, SE 12345-6789
+
+John Smith
+456 Tree-lined Avenue
+Smallville, MW 98765-4321
+@dots{}
+@end example
+
+@noindent
+A simple program to process this file is as follows:
+
+@example
+# addrs.awk --- simple mailing list program
+
+# Records are separated by blank lines.
+# Each line is one field.
+BEGIN @{ RS = "" ; FS = "\n" @}
+
+@{
+ print "Name is:", $1
+ print "Address is:", $2
+ print "City and State are:", $3
+ print ""
+@}
+@end example
+
+Running the program produces the following output:
+
+@example
+$ @kbd{awk -f addrs.awk addresses}
+@print{} Name is: Jane Doe
+@print{} Address is: 123 Main Street
+@print{} City and State are: Anywhere, SE 12345-6789
+@print{}
+@print{} Name is: John Smith
+@print{} Address is: 456 Tree-lined Avenue
+@print{} City and State are: Smallville, MW 98765-4321
+@print{}
+@dots{}
+@end example
+
+@DBXREF{Labels Program} for a more realistic program dealing with
+address lists. The following list summarizes how records are split,
+based on the value of
+@ifinfo
+@code{RS}.
+(@samp{==} means ``is equal to.'')
+@end ifinfo
+@ifnotinfo
+@code{RS}:
+@end ifnotinfo
+
+@table @code
+@item RS == "\n"
+Records are separated by the newline character (@samp{\n}). In effect,
+every line in the @value{DF} is a separate record, including blank lines.
+This is the default.
+
+@item RS == @var{any single character}
+Records are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty records.
+
+@item RS == ""
+Records are separated by runs of blank lines.
+When @code{FS} is a single character, then
+the newline character
+always serves as a field separator, in addition to whatever value
+@code{FS} may have. Leading and trailing newlines in a file are ignored.
+
+@item RS == @var{regexp}
+Records are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty records.
+(This is a @command{gawk} extension; it is not specified by the
+POSIX standard.)
+@end table
+
+@cindex @command{gawk}, @code{RT} variable in
+@cindex @code{RT} variable
+If not in compatibility mode (@pxref{Options}), @command{gawk} sets
+@code{RT} to the input text that matched the value specified by @code{RS}.
+But if the input file ended without any text that matches @code{RS},
+then @command{gawk} sets @code{RT} to the null string.
+
+@node Getline
+@section Explicit Input with @code{getline}
+
+@cindex @code{getline} command, explicit input with
+@cindex input, explicit
+So far we have been getting our input data from @command{awk}'s main
+input stream---either the standard input (usually your keyboard, sometimes
+the output from another program) or from the
+files specified on the command line. The @command{awk} language has a
+special built-in command called @code{getline} that
+can be used to read input under your explicit control.
+
+The @code{getline} command is used in several different ways and should
+@emph{not} be used by beginners.
+The examples that follow the explanation of the @code{getline} command
+include material that has not been covered yet. Therefore, come back
+and study the @code{getline} command @emph{after} you have reviewed the
+rest of
+@ifinfo
+this @value{DOCUMENT}
+@end ifinfo
+@ifhtml
+this @value{DOCUMENT}
+@end ifhtml
+@ifnotinfo
+@ifnothtml
+Parts I and II
+@end ifnothtml
+@end ifnotinfo
+and have a good knowledge of how @command{awk} works.
+
+@cindex @command{gawk}, @code{ERRNO} variable in
+@cindex @code{ERRNO} variable, with @command{getline} command
+@cindex differences in @command{awk} and @command{gawk}, @code{getline} command
+@cindex @code{getline} command, return values
+@cindex @option{--sandbox} option, input redirection with @code{getline}
+
+The @code{getline} command returns 1 if it finds a record and 0 if
+it encounters the end of the file. If there is some error in getting
+a record, such as a file that cannot be opened, then @code{getline}
+returns @minus{}1. In this case, @command{gawk} sets the variable
+@code{ERRNO} to a string describing the error that occurred.
+
+In the following examples, @var{command} stands for a string value that
+represents a shell command.
+
+@quotation NOTE
+When @option{--sandbox} is specified (@pxref{Options}),
+reading lines from files, pipes, and coprocesses is disabled.
+@end quotation
+
+@menu
+* Plain Getline:: Using @code{getline} with no arguments.
+* Getline/Variable:: Using @code{getline} into a variable.
+* Getline/File:: Using @code{getline} from a file.
+* Getline/Variable/File:: Using @code{getline} into a variable from a
+ file.
+* Getline/Pipe:: Using @code{getline} from a pipe.
+* Getline/Variable/Pipe:: Using @code{getline} into a variable from a
+ pipe.
+* Getline/Coprocess:: Using @code{getline} from a coprocess.
+* Getline/Variable/Coprocess:: Using @code{getline} into a variable from a
+ coprocess.
+* Getline Notes:: Important things to know about @code{getline}.
+* Getline Summary:: Summary of @code{getline} Variants.
+@end menu
+
+@node Plain Getline
+@subsection Using @code{getline} with No Arguments
+
+The @code{getline} command can be used without arguments to read input
+from the current input file. All it does in this case is read the next
+input record and split it up into fields. This is useful if you've
+finished processing the current record, but want to do some special
+processing on the next record @emph{right now}. For example:
+
+@example
+# Remove text between /* and */, inclusive
+@{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
+ print("unexpected EOF or error:", ERRNO) > "/dev/stderr"
+ exit
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
+@}
+@end example
+
+@c 8/2014: Here is some sample input:
+@ignore
+mon/*comment*/key
+rab/*commen
+t*/bit
+horse /*comment*/more text
+part 1 /*comment*/part 2 /*comment*/part 3
+no comment
+@end ignore
+
+This @command{awk} program deletes C-style comments (@samp{/* @dots{}
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
+statements, you could perform more complicated processing on the
+decommented input, such as searching for matches of a regular
+expression. (This program has a subtle problem---it does not work if one
+comment ends and another begins on the same line.)
+
+This form of the @code{getline} command sets @code{NF},
+@code{NR}, @code{FNR}, @code{RT}, and the value of @code{$0}.
+
+@quotation NOTE
+The new value of @code{$0} is used to test
+the patterns of any subsequent rules. The original value
+of @code{$0} that triggered the rule that executed @code{getline}
+is lost.
+By contrast, the @code{next} statement reads a new record
+but immediately begins processing it normally, starting with the first
+rule in the program. @xref{Next Statement}.
+@end quotation
+
+@node Getline/Variable
+@subsection Using @code{getline} into a Variable
+@cindex @code{getline} into a variable
+@cindex variables, @code{getline} command into@comma{} using
+
+You can use @samp{getline @var{var}} to read the next record from
+@command{awk}'s input into the variable @var{var}. No other processing is
+done.
+For example, suppose the next line is a comment or a special string,
+and you want to read it without triggering
+any rules. This form of @code{getline} allows you to read that line
+and store it in a variable so that the main
+read-a-line-and-check-each-rule loop of @command{awk} never sees it.
+The following example swaps every two lines of input:
+
+@example
+@{
+ if ((getline tmp) > 0) @{
+ print tmp
+ print $0
+ @} else
+ print $0
+@}
+@end example
+
+@noindent
+It takes the following list:
+
+@example
+wan
+tew
+free
+phore
+@end example
+
+@noindent
+and produces these results:
+
+@example
+tew
+wan
+phore
+free
+@end example
+
+The @code{getline} command used in this way sets only the variables
+@code{NR}, @code{FNR}, and @code{RT} (and of course, @var{var}).
+The record is not
+split into fields, so the values of the fields (including @code{$0}) and
+the value of @code{NF} do not change.
+
+@node Getline/File
+@subsection Using @code{getline} from a File
+
+@cindex @code{getline} from a file
+@cindex input redirection
+@cindex redirection of input
+@cindex @code{<} (left angle bracket), @code{<} operator (I/O)
+@cindex left angle bracket (@code{<}), @code{<} operator (I/O)
+@cindex operators, input/output
+Use @samp{getline < @var{file}} to read the next record from @var{file}.
+Here @var{file} is a string-valued expression that
+specifies the @value{FN}. @samp{< @var{file}} is called a @dfn{redirection}
+because it directs input to come from a different place.
+For example, the following
+program reads its input record from the file @file{secondary.input} when it
+encounters a first field with a value equal to 10 in the current input
+file:
+
+@example
+@{
+ if ($1 == 10) @{
+ getline < "secondary.input"
+ print
+ @} else
+ print
+@}
+@end example
+
+Because the main input stream is not used, the values of @code{NR} and
+@code{FNR} are not changed. However, the record it reads is split into fields in
+the normal manner, so the values of @code{$0} and the other fields are
+changed, resulting in a new value of @code{NF}.
+@code{RT} is also set.
+
+@cindex POSIX @command{awk}, @code{<} operator and
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{getline < @var{expression}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
+because the concatenation operator (not discussed yet; @pxref{Concatenation})
+is not parenthesized. You should write it as @samp{getline < (dir "/" file)} if
+you want your program to be portable to all @command{awk} implementations.
+
+@node Getline/Variable/File
+@subsection Using @code{getline} into a Variable from a File
+@cindex variables, @code{getline} command into@comma{} using
+
+Use @samp{getline @var{var} < @var{file}} to read input
+from the file
+@var{file}, and put it in the variable @var{var}. As earlier, @var{file}
+is a string-valued expression that specifies the file from which to read.
+
+In this version of @code{getline}, none of the predefined variables are
+changed and the record is not split into fields. The only variable
+changed is @var{var}.@footnote{This is not quite true. @code{RT} could
+be changed if @code{RS} is a regular expression.}
+For example, the following program copies all the input files to the
+output, except for records that say @w{@samp{@@include @var{filename}}}.
+Such a record is replaced by the contents of the file
+@var{filename}:
+
+@example
+@{
+ if (NF == 2 && $1 == "@@include") @{
+ while ((getline line < $2) > 0)
+ print line
+ close($2)
+ @} else
+ print
+@}
+@end example
+
+Note here how the name of the extra input file is not built into
+the program; it is taken directly from the data, specifically from the second field on
+the @code{@@include} line.
+
+The @code{close()} function is called to ensure that if two identical
+@code{@@include} lines appear in the input, the entire specified file is
+included twice.
+@xref{Close Files And Pipes}.
+
+One deficiency of this program is that it does not process nested
+@code{@@include} statements
+(i.e., @code{@@include} statements in included files)
+the way a true macro preprocessor would.
+@DBXREF{Igawk Program} for a program
+that does handle nested @code{@@include} statements.
+
+@node Getline/Pipe
+@subsection Using @code{getline} from a Pipe
+
+@c From private email, dated October 2, 1988. Used by permission, March 2013.
+@cindex Kernighan, Brian
+@quotation
+@i{Omniscience has much to recommend it.
+Failing that, attention to details would be useful.}
+@author Brian Kernighan
+@end quotation
+
+@cindex @code{|} (vertical bar), @code{|} operator (I/O)
+@cindex vertical bar (@code{|}), @code{|} operator (I/O)
+@cindex input pipeline
+@cindex pipe, input
+@cindex operators, input/output
+The output of a command can also be piped into @code{getline}, using
+@samp{@var{command} | getline}. In
+this case, the string @var{command} is run as a shell command and its output
+is piped into @command{awk} to be used as input. This form of @code{getline}
+reads one record at a time from the pipe.
+For example, the following program copies its input to its output, except for
+lines that begin with @samp{@@execute}, which are replaced by the output
+produced by running the rest of the line as a shell command:
+
+@example
+@{
+ if ($1 == "@@execute") @{
+ tmp = substr($0, 10) # Remove "@@execute"
+ while ((tmp | getline) > 0)
+ print
+ close(tmp)
+ @} else
+ print
+@}
+@end example
+
+@noindent
+The @code{close()} function is called to ensure that if two identical
+@samp{@@execute} lines appear in the input, the command is run for
+each one.
+@ifnottex
+@ifnotdocbook
+@xref{Close Files And Pipes}.
+@end ifnotdocbook
+@end ifnottex
+@c This example is unrealistic, since you could just use system
+Given the input:
+
+@example
+foo
+bar
+baz
+@@execute who
+bletch
+@end example
+
+@noindent
+the program might produce:
+
+@cindex Robbins, Bill
+@cindex Robbins, Miriam
+@cindex Robbins, Arnold
+@example
+foo
+bar
+baz
+arnold ttyv0 Jul 13 14:22
+miriam ttyp0 Jul 13 14:23 (murphy:0)
+bill ttyp1 Jul 13 14:23 (murphy:0)
+bletch
+@end example
+
+@noindent
+Notice that this program ran the command @command{who} and printed the result.
+(If you try this program yourself, you will of course get different results,
+depending upon who is logged in on your system.)
+
+This variation of @code{getline} splits the record into fields, sets the
+value of @code{NF}, and recomputes the value of @code{$0}. The values of
+@code{NR} and @code{FNR} are not changed.
+@code{RT} is set.
+
+@cindex POSIX @command{awk}, @code{|} I/O operator and
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{@var{expression} | getline} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}---for example, @samp{@w{"echo "} "date" | getline} is ambiguous
+because the concatenation operator is not parenthesized. You should
+write it as @samp{(@w{"echo "} "date") | getline} if you want your program
+to be portable to all @command{awk} implementations.
+
+@cindex Brian Kernighan's @command{awk}
+@cindex @command{mawk} utility
+@quotation NOTE
+Unfortunately, @command{gawk} has not been consistent in its treatment
+of a construct like @samp{@w{"echo "} "date" | getline}.
+Most versions, including the current version, treat it at as
+@samp{@w{("echo "} "date") | getline}.
+(This is also how BWK @command{awk} behaves.)
+Some versions changed and treated it as
+@samp{@w{"echo "} ("date" | getline)}.
+(This is how @command{mawk} behaves.)
+In short, @emph{always} use explicit parentheses, and then you won't
+have to worry.
+@end quotation
+
+@node Getline/Variable/Pipe
+@subsection Using @code{getline} into a Variable from a Pipe
+@cindex variables, @code{getline} command into@comma{} using
+
+When you use @samp{@var{command} | getline @var{var}}, the
+output of @var{command} is sent through a pipe to
+@code{getline} and into the variable @var{var}. For example, the
+following program reads the current date and time into the variable
+@code{current_time}, using the @command{date} utility, and then
+prints it:
+
+@example
+BEGIN @{
+ "date" | getline current_time
+ close("date")
+ print "Report printed on " current_time
+@}
+@end example
+
+In this version of @code{getline}, none of the predefined variables are
+changed and the record is not split into fields. However, @code{RT} is set.
+
+@ifinfo
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{@var{expression} | getline @var{var}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{@w{"echo "} "date" | getline @var{var}} is ambiguous
+because the concatenation operator is not parenthesized. You should
+write it as @samp{(@w{"echo "} "date") | getline @var{var}} if you want your
+program to be portable to other @command{awk} implementations.
+@end ifinfo
+
+@node Getline/Coprocess
+@subsection Using @code{getline} from a Coprocess
+@cindex coprocesses, @code{getline} from
+@cindex @code{getline} command, coprocesses@comma{} using from
+@cindex @code{|} (vertical bar), @code{|&} operator (I/O)
+@cindex vertical bar (@code{|}), @code{|&} operator (I/O)
+@cindex operators, input/output
+@cindex differences in @command{awk} and @command{gawk}, input/output operators
+
+Input into @code{getline} from a pipe is a one-way operation.
+The command that is started with @samp{@var{command} | getline} only
+sends data @emph{to} your @command{awk} program.
+
+On occasion, you might want to send data to another program
+for processing and then read the results back.
+@command{gawk} allows you to start a @dfn{coprocess}, with which two-way
+communications are possible. This is done with the @samp{|&}
+operator.
+Typically, you write data to the coprocess first and then
+read results back, as shown in the following:
+
+@example
+print "@var{some query}" |& "db_server"
+"db_server" |& getline
+@end example
+
+@noindent
+which sends a query to @command{db_server} and then reads the results.
+
+The values of @code{NR} and
+@code{FNR} are not changed,
+because the main input stream is not used.
+However, the record is split into fields in
+the normal manner, thus changing the values of @code{$0}, of the other fields,
+and of @code{NF} and @code{RT}.
+
+Coprocesses are an advanced feature. They are discussed here only because
+this is the @value{SECTION} on @code{getline}.
+@xref{Two-way I/O},
+where coprocesses are discussed in more detail.
+
+@node Getline/Variable/Coprocess
+@subsection Using @code{getline} into a Variable from a Coprocess
+@cindex variables, @code{getline} command into@comma{} using
+
+When you use @samp{@var{command} |& getline @var{var}}, the output from
+the coprocess @var{command} is sent through a two-way pipe to @code{getline}
+and into the variable @var{var}.
+
+In this version of @code{getline}, none of the predefined variables are
+changed and the record is not split into fields. The only variable
+changed is @var{var}.
+However, @code{RT} is set.
+
+@ifinfo
+Coprocesses are an advanced feature. They are discussed here only because
+this is the @value{SECTION} on @code{getline}.
+@xref{Two-way I/O},
+where coprocesses are discussed in more detail.
+@end ifinfo
+
+@node Getline Notes
+@subsection Points to Remember About @code{getline}
+Here are some miscellaneous points about @code{getline} that
+you should bear in mind:
+
+@itemize @value{BULLET}
+@item
+When @code{getline} changes the value of @code{$0} and @code{NF},
+@command{awk} does @emph{not} automatically jump to the start of the
+program and start testing the new record against every pattern.
+However, the new record is tested against any subsequent rules.
+
+@cindex differences in @command{awk} and @command{gawk}, implementation limitations
+@cindex implementation issues, @command{gawk}, limits
+@cindex @command{awk}, implementations, limits
+@cindex @command{gawk}, implementation issues, limits
+@item
+Some very old @command{awk} implementations limit the number of pipelines that an @command{awk}
+program may have open to just one. In @command{gawk}, there is no such limit.
+You can open as many pipelines (and coprocesses) as the underlying operating
+system permits.
+
+@cindex side effects, @code{FILENAME} variable
+@cindex @code{FILENAME} variable, @code{getline}@comma{} setting with
+@cindex dark corner, @code{FILENAME} variable
+@cindex @code{getline} command, @code{FILENAME} variable and
+@cindex @code{BEGIN} pattern, @code{getline} and
+@item
+An interesting side effect occurs if you use @code{getline} without a
+redirection inside a @code{BEGIN} rule. Because an unredirected @code{getline}
+reads from the command-line @value{DF}s, the first @code{getline} command
+causes @command{awk} to set the value of @code{FILENAME}. Normally,
+@code{FILENAME} does not have a value inside @code{BEGIN} rules, because you
+have not yet started to process the command-line @value{DF}s.
+@value{DARKCORNER}
+(See @ref{BEGIN/END};
+also @pxref{Auto-set}.)
+
+@item
+Using @code{FILENAME} with @code{getline}
+(@samp{getline < FILENAME})
+is likely to be a source for
+confusion. @command{awk} opens a separate input stream from the
+current input file. However, by not using a variable, @code{$0}
+and @code{NF} are still updated. If you're doing this, it's
+probably by accident, and you should reconsider what it is you're
+trying to accomplish.
+
+@item
+@DBREF{Getline Summary} presents a table summarizing the
+@code{getline} variants and which variables they can affect.
+It is worth noting that those variants which do not use redirection
+can cause @code{FILENAME} to be updated if they cause
+@command{awk} to start reading a new input file.
+
+@item
+@cindex Moore, Duncan
+If the variable being assigned is an expression with side effects,
+different versions of @command{awk} behave differently upon encountering
+end-of-file. Some versions don't evaluate the expression; many versions
+(including @command{gawk}) do. Here is an example, due to Duncan Moore:
+
+@ignore
+Date: Sun, 01 Apr 2012 11:49:33 +0100
+From: Duncan Moore <duncan.moore@@gmx.com>
+@end ignore
+
+@example
+BEGIN @{
+ system("echo 1 > f")
+ while ((getline a[++c] < "f") > 0) @{ @}
+ print c
+@}
+@end example
+
+@noindent
+Here, the side effect is the @samp{++c}. Is @code{c} incremented if
+end of file is encountered, before the element in @code{a} is assigned?
+
+@command{gawk} treats @code{getline} like a function call, and evaluates
+the expression @samp{a[++c]} before attempting to read from @file{f}.
+However, some versions of @command{awk} only evaluate the expression once they
+know that there is a string value to be assigned.
+@end itemize
+
+@node Getline Summary
+@subsection Summary of @code{getline} Variants
+@cindex @code{getline} command, variants
+
+@ref{table-getline-variants}
+summarizes the eight variants of @code{getline},
+listing which predefined variables are set by each one,
+and whether the variant is standard or a @command{gawk} extension.
+Note: for each variant, @command{gawk} sets the @code{RT} predefined variable.
+
+@float Table,table-getline-variants
+@caption{@code{getline} variants and what they set}
+@multitable @columnfractions .33 .38 .27
+@headitem Variant @tab Effect @tab @command{awk} / @command{gawk}
+@item @code{getline} @tab Sets @code{$0}, @code{NF}, @code{FNR}, @code{NR}, and @code{RT} @tab @command{awk}
+@item @code{getline} @var{var} @tab Sets @var{var}, @code{FNR}, @code{NR}, and @code{RT} @tab @command{awk}
+@item @code{getline <} @var{file} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{awk}
+@item @code{getline @var{var} < @var{file}} @tab Sets @var{var} and @code{RT} @tab @command{awk}
+@item @var{command} @code{| getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{awk}
+@item @var{command} @code{| getline} @var{var} @tab Sets @var{var} and @code{RT} @tab @command{awk}
+@item @var{command} @code{|& getline} @tab Sets @code{$0}, @code{NF}, and @code{RT} @tab @command{gawk}
+@item @var{command} @code{|& getline} @var{var} @tab Sets @var{var} and @code{RT} @tab @command{gawk}
+@end multitable
+@end float
+
+@node Read Timeout
+@section Reading Input with a Timeout
+@cindex timeout, reading input
+
+@cindex differences in @command{awk} and @command{gawk}, read timeouts
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+You may specify a timeout in milliseconds for reading input from the keyboard,
+a pipe, or two-way communication, including TCP/IP sockets. This can be done
+on a per input, command, or connection basis, by setting a special element
+in the @code{PROCINFO} array (@pxref{Auto-set}):
+
+@example
+PROCINFO["input_name", "READ_TIMEOUT"] = @var{timeout in milliseconds}
+@end example
+
+When set, this causes @command{gawk} to time out and return failure
+if no data is available to read within the specified timeout period.
+For example, a TCP client can decide to give up on receiving
+any response from the server after a certain amount of time:
+
+@example
+Service = "/inet/tcp/0/localhost/daytime"
+PROCINFO[Service, "READ_TIMEOUT"] = 100
+if ((Service |& getline) > 0)
+ print $0
+else if (ERRNO != "")
+ print ERRNO
+@end example
+
+Here is how to read interactively from the user@footnote{This assumes
+that standard input is the keyboard.} without waiting
+for more than five seconds:
+
+@example
+PROCINFO["/dev/stdin", "READ_TIMEOUT"] = 5000
+while ((getline < "/dev/stdin") > 0)
+ print $0
+@end example
+
+@command{gawk} terminates the read operation if input does not
+arrive after waiting for the timeout period, returns failure
+and sets @code{ERRNO} to an appropriate string value.
+A negative or zero value for the timeout is the same as specifying
+no timeout at all.
+
+A timeout can also be set for reading from the keyboard in the implicit
+loop that reads input records and matches them against patterns,
+like so:
+
+@example
+$ @kbd{gawk 'BEGIN @{ PROCINFO["-", "READ_TIMEOUT"] = 5000 @}}
+> @kbd{@{ print "You entered: " $0 @}'}
+@kbd{gawk}
+@print{} You entered: gawk
+@end example
+
+In this case, failure to respond within five seconds results in the following
+error message:
+
+@example
+@error{} gawk: cmd. line:2: (FILENAME=- FNR=1) fatal: error reading input file `-': Connection timed out
+@end example
+
+The timeout can be set or changed at any time, and will take effect on the
+next attempt to read from the input device. In the following example,
+we start with a timeout value of one second, and progressively
+reduce it by one-tenth of a second until we wait indefinitely
+for the input to arrive:
+
+@example
+PROCINFO[Service, "READ_TIMEOUT"] = 1000
+while ((Service |& getline) > 0) @{
+ print $0
+ PROCINFO[Service, "READ_TIMEOUT"] -= 100
+@}
+@end example
+
+@quotation NOTE
+You should not assume that the read operation will block
+exactly after the tenth record has been printed. It is possible that
+@command{gawk} will read and buffer more than one record's
+worth of data the first time. Because of this, changing the value
+of timeout like in the preceding example is not very useful.
+@end quotation
+
+If the @code{PROCINFO} element is not present and the
+@env{GAWK_READ_TIMEOUT} environment variable exists,
+@command{gawk} uses its value to initialize the timeout value.
+The exclusive use of the environment variable to specify timeout
+has the disadvantage of not being able to control it
+on a per command or connection basis.
+
+@command{gawk} considers a timeout event to be an error even though
+the attempt to read from the underlying device may
+succeed in a later attempt. This is a limitation, and it also
+means that you cannot use this to multiplex input from
+two or more sources.
+
+Assigning a timeout value prevents read operations from
+blocking indefinitely. But bear in mind that there are other ways
+@command{gawk} can stall waiting for an input device to be ready.
+A network client can sometimes take a long time to establish
+a connection before it can start reading any data,
+or the attempt to open a FIFO special file for reading can block
+indefinitely until some other process opens it for writing.
+
+@node Command-line directories
+@section Directories on the Command Line
+@cindex differences in @command{awk} and @command{gawk}, command-line directories
+@cindex directories, command-line
+@cindex command line, directories on
+
+According to the POSIX standard, files named on the @command{awk}
+command line must be text files; it is a fatal error if they are not.
+Most versions of @command{awk} treat a directory on the command line as
+a fatal error.
+
+By default, @command{gawk} produces a warning for a directory on the
+command line, but otherwise ignores it. This makes it easier to use
+shell wildcards with your @command{awk} program:
+
+@example
+$ @kbd{gawk -f whizprog.awk *} @ii{Directories could kill this program}
+@end example
+
+If either of the @option{--posix}
+or @option{--traditional} options is given, then @command{gawk} reverts
+to treating a directory on the command line as a fatal error.
+
+@DBXREF{Extension Sample Readdir} for a way to treat directories
+as usable data from an @command{awk} program.
+
+@node Input Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Input is split into records based on the value of @code{RS}.
+The possibilities are as follows:
+
+@multitable @columnfractions .25 .35 .40
+@headitem Value of @code{RS} @tab Records are split on @dots{} @tab @command{awk} / @command{gawk}
+@item Any single character @tab That character @tab @command{awk}
+@item The empty string (@code{""}) @tab Runs of two or more newlines @tab @command{awk}
+@item A regexp @tab Text that matches the regexp @tab @command{gawk}
+@end multitable
+
+@item
+@code{FNR} indicates how many records have been read from the current input file;
+@code{NR} indicates how many records have been read in total.
+
+@item
+@command{gawk} sets @code{RT} to the text matched by @code{RS}.
+
+@item
+After splitting the input into records, @command{awk} further splits
+the record into individual fields, named @code{$1}, @code{$2}, and so
+on. @code{$0} is the whole record, and @code{NF} indicates how many
+fields there are. The default way to split fields is between whitespace
+characters.
+
+@item
+Fields may be referenced using a variable, as in @code{$NF}. Fields
+may also be assigned values, which causes the value of @code{$0} to be
+recomputed when it is later referenced. Assigning to a field with a number
+greater than @code{NF} creates the field and rebuilds the record, using
+@code{OFS} to separate the fields. Incrementing @code{NF} does the same
+thing. Decrementing @code{NF} throws away fields and rebuilds the record.
+
+@item
+Field splitting is more complicated than record splitting:
+
+@multitable @columnfractions .40 .45 .15
+@headitem Field separator value @tab Fields are split @dots{} @tab @command{awk} / @command{gawk}
+@item @code{FS == " "} @tab On runs of whitespace @tab @command{awk}
+@item @code{FS == @var{any single character}} @tab On that character @tab @command{awk}
+@item @code{FS == @var{regexp}} @tab On text matching the regexp @tab @command{awk}
+@item @code{FS == ""} @tab Each individual character is a separate field @tab @command{gawk}
+@item @code{FIELDWIDTHS == @var{list of columns}} @tab Based on character position @tab @command{gawk}
+@item @code{FPAT == @var{regexp}} @tab On the text surrounding text matching the regexp @tab @command{gawk}
+@end multitable
+
+@item
+Using @samp{FS = "\n"} causes the entire record to be a single field
+(assuming that newlines separate records).
+
+@item
+@code{FS} may be set from the command line using the @option{-F} option.
+This can also be done using command-line variable assignment.
+
+@item
+Use @code{PROCINFO["FS"]} to see how fields are being split.
+
+@item
+Use @code{getline} in its various forms to read additional records,
+from the default input stream, from a file, or from a pipe or coprocess.
+
+@item
+Use @code{PROCINFO[@var{file}, "READ_TIMEOUT"]} to cause reads to timeout
+for @var{file}.
+
+@item
+Directories on the command line are fatal for standard @command{awk};
+@command{gawk} ignores them if not in POSIX mode.
+
+@end itemize
+
+@c EXCLUDE START
+@node Input Exercises
+@section Exercises
+
+@enumerate
+@item
+Using the @code{FIELDWIDTHS} variable (@pxref{Constant Size}),
+write a program to read election data, where each record represents
+one voter's votes. Come up with a way to define which columns are
+associated with each ballot item, and print the total votes,
+including abstentions, for each item.
+
+@item
+@ref{Plain Getline}, presented a program to remove C-style
+comments (@samp{/* @dots{} */}) from the input. That program
+does not work if one comment ends on one line and another one
+starts later on the same line.
+That can be fixed by making one simple change. What is it?
+
+@end enumerate
+@c EXCLUDE END
+
+@node Printing
+@chapter Printing Output
+
+@cindex printing
+@cindex output, printing, See printing
+One of the most common programming actions is to @dfn{print}, or output,
+some or all of the input. Use the @code{print} statement
+for simple output, and the @code{printf} statement
+for fancier formatting.
+The @code{print} statement is not limited when
+computing @emph{which} values to print. However, with two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(For the exceptions, @DBPXREF{Output Separators} and
+@ref{OFMT}.)
+For printing with specifications, you need the @code{printf} statement
+(@pxref{Printf}).
+
+@cindex @code{print} statement
+@cindex @code{printf} statement
+Besides basic and formatted printing, this @value{CHAPTER}
+also covers I/O redirections to files and pipes, introduces
+the special @value{FN}s that @command{gawk} processes internally,
+and discusses the @code{close()} built-in function.
+
+@menu
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Redirection:: How to redirect output to multiple files and
+ pipes.
+* Special FD:: Special files for I/O.
+* Special Files:: File name interpretation in @command{gawk}.
+ @command{gawk} allows access to inherited file
+ descriptors.
+* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
+@end menu
+
+@node Print
+@section The @code{print} Statement
+
+Use the @code{print} statement to produce output with simple, standardized
+formatting. You specify only the strings or numbers to print, in a
+list separated by commas. They are output, separated by single spaces,
+followed by a newline. The statement looks like this:
+
+@example
+print @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of items may be optionally enclosed in parentheses. The
+parentheses are necessary if any of the item expressions uses the @samp{>}
+relational operator; otherwise it could be confused with an output redirection
+(@pxref{Redirection}).
+
+The items to print can be constant strings or numbers, fields of the
+current record (such as @code{$1}), variables, or any @command{awk}
+expression. Numeric values are converted to strings and then printed.
+
+@cindex records, printing
+@cindex lines, blank, printing
+@cindex text, printing
+The simple statement @samp{print} with no items is equivalent to
+@samp{print $0}: it prints the entire current record. To print a blank
+line, use @samp{print ""}.
+To print a fixed piece of text, use a string constant, such as
+@w{@code{"Don't Panic"}}, as one item. If you forget to use the
+double-quote characters, your text is taken as an @command{awk}
+expression, and you will probably get an error. Keep in mind that a
+space is printed between any two items.
+
+Note that the @code{print} statement is a statement and not an
+expression---you can't use it in the pattern part of a
+@var{pattern}-@var{action} statement, for example.
+
+@node Print Examples
+@section @code{print} Statement Examples
+
+Each @code{print} statement makes at least one line of output. However, it
+isn't limited to only one line. If an item value is a string containing a
+newline, the newline is output along with the rest of the string. A
+single @code{print} statement can make any number of lines this way.
+
+@cindex newlines, printing
+The following is an example of printing a string that contains embedded
+@ifinfo
+newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; @pxref{Escape Sequences}):
+@end ifinfo
+@ifhtml
+newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; @pxref{Escape Sequences}):
+@end ifhtml
+@ifnotinfo
+@ifnothtml
+newlines:
+@end ifnothtml
+@end ifnotinfo
+
+@example
+$ @kbd{awk 'BEGIN @{ print "line one\nline two\nline three" @}'}
+@print{} line one
+@print{} line two
+@print{} line three
+@end example
+
+@cindex fields, printing
+The next example, which is run on the @file{inventory-shipped} file,
+prints the first two fields of each input record, with a space between
+them:
+
+@example
+$ @kbd{awk '@{ print $1, $2 @}' inventory-shipped}
+@print{} Jan 13
+@print{} Feb 15
+@print{} Mar 15
+@dots{}
+@end example
+
+@cindex @code{print} statement, commas, omitting
+@cindex troubleshooting, @code{print} statement@comma{} omitting commas
+A common mistake in using the @code{print} statement is to omit the comma
+between two items. This often has the effect of making the items run
+together in the output, with no space. The reason for this is that
+juxtaposing two string expressions in @command{awk} means to concatenate
+them. Here is the same program, without the comma:
+
+@example
+$ @kbd{awk '@{ print $1 $2 @}' inventory-shipped}
+@print{} Jan13
+@print{} Feb15
+@print{} Mar15
+@dots{}
+@end example
+
+@cindex @code{BEGIN} pattern, headings@comma{} adding
+To someone unfamiliar with the @file{inventory-shipped} file, neither
+example's output makes much sense. A heading line at the beginning
+would make it clearer. Let's add some headings to our table of months
+(@code{$1}) and green crates shipped (@code{$2}). We do this using
+a @code{BEGIN} rule (@pxref{BEGIN/END}) so that the headings are only
+printed once:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+When run, the program prints the following:
+
+@example
+Month Crates
+----- ------
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end example
+
+@noindent
+The only problem, however, is that the headings and the table data
+don't line up! We can fix this by printing some spaces between the
+two fields:
+
+@example
+@group
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end group
+@end example
+
+@cindex @code{printf} statement, columns@comma{} aligning
+@cindex columns, aligning
+Lining up columns this way can get pretty
+complicated when there are many columns to fix. Counting spaces for two
+or three columns is simple, but any more than this can take up
+a lot of time. This is why the @code{printf} statement was
+created (@pxref{Printf});
+one of its specialties is lining up columns of data.
+
+@cindex line continuations, in @code{print} statement
+@cindex @code{print} statement, line continuations and
+@quotation NOTE
+You can continue either a @code{print} or
+@code{printf} statement simply by putting a newline after any comma
+(@pxref{Statements/Lines}).
+@end quotation
+
+@node Output Separators
+@section Output Separators
+
+@cindex @code{OFS} variable
+As mentioned previously, a @code{print} statement contains a list
+of items separated by commas. In the output, the items are normally
+separated by single spaces. However, this doesn't need to be the case;
+a single space is simply the default. Any string of
+characters may be used as the @dfn{output field separator} by setting the
+predefined variable @code{OFS}. The initial value of this variable
+is the string @w{@code{" "}} (i.e., a single space).
+
+The output from an entire @code{print} statement is called an @dfn{output
+record}. Each @code{print} statement outputs one output record, and
+then outputs a string called the @dfn{output record separator} (or
+@code{ORS}). The initial value of @code{ORS} is the string @code{"\n"}
+(i.e., a newline character). Thus, each @code{print} statement normally
+makes a separate line.
+
+@cindex output, records
+@cindex output record separator, See @code{ORS} variable
+@cindex @code{ORS} variable
+@cindex @code{BEGIN} pattern, @code{OFS}/@code{ORS} variables, assigning values to
+In order to change how output fields and records are separated, assign
+new values to the variables @code{OFS} and @code{ORS}. The usual
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END}), so
+that it happens before any input is processed. It can also be done
+with assignments on the command line, before the names of the input
+files, or using the @option{-v} command-line option
+(@pxref{Options}).
+The following example prints the first and second fields of each input
+record, separated by a semicolon, with a blank line added after each
+newline:
+
+
+@example
+$ @kbd{awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}}
+> @kbd{@{ print $1, $2 @}' mail-list}
+@print{} Amelia;555-5553
+@print{}
+@print{} Anthony;555-3412
+@print{}
+@print{} Becky;555-7685
+@print{}
+@print{} Bill;555-1675
+@print{}
+@print{} Broderick;555-0542
+@print{}
+@print{} Camilla;555-2912
+@print{}
+@print{} Fabius;555-1234
+@print{}
+@print{} Julie;555-6699
+@print{}
+@print{} Martin;555-6480
+@print{}
+@print{} Samuel;555-3430
+@print{}
+@print{} Jean-Paul;555-2127
+@print{}
+@end example
+
+If the value of @code{ORS} does not contain a newline, the program's output
+runs together on a single line.
+
+@node OFMT
+@section Controlling Numeric Output with @code{print}
+@cindex numeric, output format
+@cindex formats@comma{} numeric output
+When printing numeric values with the @code{print} statement,
+@command{awk} internally converts the number to a string of characters
+and prints that string. @command{awk} uses the @code{sprintf()} function
+to do this conversion
+(@pxref{String Functions}).
+For now, it suffices to say that the @code{sprintf()}
+function accepts a @dfn{format specification} that tells it how to format
+numbers (or strings), and that there are a number of different ways in which
+numbers can be formatted. The different format specifications are discussed
+more fully in
+@ref{Control Letters}.
+
+@cindexawkfunc{sprintf}
+@cindex @code{OFMT} variable
+@cindex output, format specifier@comma{} @code{OFMT}
+The predefined variable @code{OFMT} contains the format specification
+that @code{print} uses with @code{sprintf()} when it wants to convert a
+number to a string for printing.
+The default value of @code{OFMT} is @code{"%.6g"}.
+The way @code{print} prints numbers can be changed
+by supplying a different format specification
+for the value of @code{OFMT}, as shown in the following example:
+
+@example
+$ @kbd{awk 'BEGIN @{}
+> @kbd{OFMT = "%.0f" # print numbers as integers (rounds)}
+> @kbd{print 17.23, 17.54 @}'}
+@print{} 17 18
+@end example
+
+@noindent
+@cindex dark corner, @code{OFMT} variable
+@cindex POSIX @command{awk}, @code{OFMT} variable and
+@cindex @code{OFMT} variable, POSIX @command{awk} and
+According to the POSIX standard, @command{awk}'s behavior is undefined
+if @code{OFMT} contains anything but a floating-point conversion specification.
+@value{DARKCORNER}
+
+@node Printf
+@section Using @code{printf} Statements for Fancier Printing
+
+@cindex @code{printf} statement
+@cindex output, formatted
+@cindex formatting output
+For more precise control over the output format than what is
+provided by @code{print}, use @code{printf}.
+With @code{printf} you can
+specify the width to use for each item, as well as various
+formatting choices for numbers (such as what output base to use, whether to
+print an exponent, whether to print a sign, and how many digits to print
+after the decimal point).
+
+@menu
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+@end menu
+
+@node Basic Printf
+@subsection Introduction to the @code{printf} Statement
+
+@cindex @code{printf} statement, syntax of
+A simple @code{printf} statement looks like this:
+
+@example
+printf @var{format}, @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+As for @code{print}, the entire list of arguments may optionally be
+enclosed in parentheses. Here too, the parentheses are necessary if any
+of the item expressions use the @samp{>} relational operator; otherwise,
+it can be confused with an output redirection (@pxref{Redirection}).
+
+@cindex format specifiers
+The difference between @code{printf} and @code{print} is the @var{format}
+argument. This is an expression whose value is taken as a string; it
+specifies how to output each of the other arguments. It is called the
+@dfn{format string}.
+
+The format string is very similar to that in the ISO C library function
+@code{printf()}. Most of @var{format} is text to output verbatim.
+Scattered among this text are @dfn{format specifiers}---one per item.
+Each format specifier says to output the next item in the argument list
+at that place in the format.
+
+The @code{printf} statement does not automatically append a newline
+to its output. It outputs only what the format string specifies.
+So if a newline is needed, you must include one in the format string.
+The output separator variables @code{OFS} and @code{ORS} have no effect
+on @code{printf} statements. For example:
+
+@example
+$ @kbd{awk 'BEGIN @{}
+> @kbd{ORS = "\nOUCH!\n"; OFS = "+"}
+> @kbd{msg = "Don\47t Panic!"}
+> @kbd{printf "%s\n", msg}
+> @kbd{@}'}
+@print{} Don't Panic!
+@end example
+
+@noindent
+Here, neither the @samp{+} nor the @samp{OUCH!} appear in
+the output message.
+
+@node Control Letters
+@subsection Format-Control Letters
+@cindex @code{printf} statement, format-control characters
+@cindex format specifiers, @code{printf} statement
+
+A format specifier starts with the character @samp{%} and ends with
+a @dfn{format-control letter}---it tells the @code{printf} statement
+how to output one item. The format-control letter specifies what @emph{kind}
+of value to print. The rest of the format specifier is made up of
+optional @dfn{modifiers} that control @emph{how} to print the value, such as
+the field width. Here is a list of the format-control letters:
+
+@c @asis for docbook to come out right
+@table @asis
+@item @code{%c}
+Print a number as a character; thus, @samp{printf "%c",
+65} outputs the letter @samp{A}. The output for a string value is
+the first character of the string.
+
+@cindex dark corner, format-control characters
+@cindex @command{gawk}, format-control characters
+@quotation NOTE
+The POSIX standard says the first character of a string is printed.
+In locales with multibyte characters, @command{gawk} attempts to
+convert the leading bytes of the string into a valid wide character
+and then to print the multibyte encoding of that character.
+Similarly, when printing a numeric value, @command{gawk} allows the
+value to be within the numeric range of values that can be held
+in a wide character.
+If the conversion to multibyte encoding fails, @command{gawk}
+uses the low eight bits of the value as the character to print.
+
+Other @command{awk} versions generally restrict themselves to printing
+the first byte of a string or to numeric values within the range of
+a single byte (0--255).
+@end quotation
+
+
+@item @code{%d}, @code{%i}
+Print a decimal integer.
+The two control letters are equivalent.
+(The @samp{%i} specification is for compatibility with ISO C.)
+
+@item @code{%e}, @code{%E}
+Print a number in scientific (exponential) notation;
+for example:
+
+@example
+printf "%4.3e\n", 1950
+@end example
+
+@noindent
+prints @samp{1.950e+03}, with a total of four significant figures, three of
+which follow the decimal point.
+(The @samp{4.3} represents two modifiers,
+discussed in the next @value{SUBSECTION}.)
+@samp{%E} uses @samp{E} instead of @samp{e} in the output.
+
+@item @code{%f}
+Print a number in floating-point notation.
+For example:
+
+@example
+printf "%4.3f", 1950
+@end example
+
+@noindent
+prints @samp{1950.000}, with a total of four significant figures, three of
+which follow the decimal point.
+(The @samp{4.3} represents two modifiers,
+discussed in the next @value{SUBSECTION}.)
+
+On systems supporting IEEE 754 floating-point format, values
+representing negative
+infinity are formatted as
+@samp{-inf} or @samp{-infinity},
+and positive infinity as
+@samp{inf} or @samp{infinity}.
+The special ``not a number'' value formats as @samp{-nan} or @samp{nan}
+(@pxref{Math Definitions}).
+
+@item @code{%F}
+Like @samp{%f} but the infinity and ``not a number'' values are spelled
+using uppercase letters.
+
+The @samp{%F} format is a POSIX extension to ISO C; not all systems
+support it. On those that don't, @command{gawk} uses @samp{%f} instead.
+
+@item @code{%g}, @code{%G}
+Print a number in either scientific notation or in floating-point
+notation, whichever uses fewer characters; if the result is printed in
+scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
+
+@item @code{%o}
+Print an unsigned octal integer
+(@pxref{Nondecimal-numbers}).
+
+@item @code{%s}
+Print a string.
+
+@item @code{%u}
+Print an unsigned decimal integer.
+(This format is of marginal use, because all numbers in @command{awk}
+are floating point; it is provided primarily for compatibility with C.)
+
+@item @code{%x}, @code{%X}
+Print an unsigned hexadecimal integer;
+@samp{%X} uses the letters @samp{A} through @samp{F}
+instead of @samp{a} through @samp{f}
+(@pxref{Nondecimal-numbers}).
+
+@item @code{%%}
+Print a single @samp{%}.
+This does not consume an
+argument and it ignores any modifiers.
+@end table
+
+@cindex dark corner, format-control characters
+@cindex @command{gawk}, format-control characters
+@quotation NOTE
+When using the integer format-control letters for values that are
+outside the range of the widest C integer type, @command{gawk} switches to
+the @samp{%g} format specifier. If @option{--lint} is provided on the
+command line (@pxref{Options}), @command{gawk}
+warns about this. Other versions of @command{awk} may print invalid
+values or do something else entirely.
+@value{DARKCORNER}
+@end quotation
+
+@node Format Modifiers
+@subsection Modifiers for @code{printf} Formats
+
+@cindex @code{printf} statement, modifiers
+@cindex modifiers@comma{} in format specifiers
+A format specification can also include @dfn{modifiers} that can control
+how much of the item's value is printed, as well as how much space it gets.
+The modifiers come between the @samp{%} and the format-control letter.
+We use the bullet symbol ``@bullet{}'' in the following examples to
+represent
+spaces in the output. Here are the possible modifiers, in the order in
+which they may appear:
+
+@table @code
+@cindex differences in @command{awk} and @command{gawk}, @code{print}/@code{printf} statements
+@cindex @code{printf} statement, positional specifiers
+@c the code{} does NOT start a secondary
+@cindex positional specifiers, @code{printf} statement
+@item @var{N}$
+An integer constant followed by a @samp{$} is a @dfn{positional specifier}.
+Normally, format specifications are applied to arguments in the order
+given in the format string. With a positional specifier, the format
+specification is applied to a specific argument, instead of what
+would be the next argument in the list. Positional specifiers begin
+counting with one. Thus:
+
+@example
+printf "%s %s\n", "don't", "panic"
+printf "%2$s %1$s\n", "panic", "don't"
+@end example
+
+@noindent
+prints the famous friendly message twice.
+
+At first glance, this feature doesn't seem to be of much use.
+It is in fact a @command{gawk} extension, intended for use in translating
+messages at runtime.
+@xref{Printf Ordering},
+which describes how and why to use positional specifiers.
+For now, we ignore them.
+
+@item - (Minus)
+The minus sign, used before the width modifier (see later on in
+this list),
+says to left-justify
+the argument within its specified width. Normally, the argument
+is printed right-justified in the specified width. Thus:
+
+@example
+printf "%-4s", "foo"
+@end example
+
+@noindent
+prints @samp{foo@bullet{}}.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see later on in
+this list),
+says to always supply a sign for numeric conversions, even if the data
+to format is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternative form'' for certain control letters.
+For @samp{%o}, supply a leading zero.
+For @samp{%x} and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for
+a nonzero result.
+For @samp{%e}, @samp{%E}, @samp{%f}, and @samp{%F}, the result always
+contains a decimal point.
+For @samp{%g} and @samp{%G}, trailing zeros are not removed from the result.
+
+@item 0
+A leading @samp{0} (zero) acts as a flag indicating that output should be
+padded with zeros instead of spaces.
+This applies only to the numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to print.
+
+@item '
+A single quote or apostrophe character is a POSIX extension to ISO C.
+It indicates that the integer part of a floating-point value, or the
+entire part of an integer decimal value, should have a thousands-separator
+character in it. This only works in locales that support such characters.
+For example:
+
+@example
+$ @kbd{cat thousands.awk} @ii{Show source program}
+@print{} BEGIN @{ printf "%'d\n", 1234567 @}
+$ @kbd{LC_ALL=C gawk -f thousands.awk}
+@print{} 1234567 @ii{Results in} "C" @ii{locale}
+$ @kbd{LC_ALL=en_US.UTF-8 gawk -f thousands.awk}
+@print{} 1,234,567 @ii{Results in US English UTF locale}
+@end example
+
+@noindent
+For more information about locales and internationalization issues,
+see @ref{Locales}.
+
+@quotation NOTE
+The @samp{'} flag is a nice feature, but its use complicates things: it
+becomes difficult to use it in command-line programs. For information
+on appropriate quoting tricks, see @ref{Quoting}.
+@end quotation
+
+@item @var{width}
+This is a number specifying the desired minimum width of a field. Inserting any
+number between the @samp{%} sign and the format-control character forces the
+field to expand to this width. The default way to do this is to
+pad with spaces on the left. For example:
+
+@example
+printf "%4s", "foo"
+@end example
+
+@noindent
+prints @samp{@bullet{}foo}.
+
+The value of @var{width} is a minimum width, not a maximum. If the item
+value requires more than @var{width} characters, it can be as wide as
+necessary. Thus, the following:
+
+@example
+printf "%4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foobar}.
+
+Preceding the @var{width} with a minus sign causes the output to be
+padded with spaces on the right, instead of on the left.
+
+@item .@var{prec}
+A period followed by an integer constant
+specifies the precision to use when printing.
+The meaning of the precision varies by control letter:
+
+@table @asis
+@item @code{%d}, @code{%i}, @code{%o}, @code{%u}, @code{%x}, @code{%X}
+Minimum number of digits to print.
+
+@item @code{%e}, @code{%E}, @code{%f}, @code{%F}
+Number of digits to the right of the decimal point.
+
+@item @code{%g}, @code{%G}
+Maximum number of significant digits.
+
+@item @code{%s}
+Maximum number of characters from the string that should print.
+@end table
+
+Thus, the following:
+
+@example
+printf "%.4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foob}.
+@end table
+
+The C library @code{printf}'s dynamic @var{width} and @var{prec}
+capability (e.g., @code{"%*.*s"}) is supported. Instead of
+supplying explicit @var{width} and/or @var{prec} values in the format
+string, they are passed in the argument list. For example:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%*.*s\n", w, p, s
+@end example
+
+@noindent
+is exactly equivalent to:
+
+@example
+s = "abcdefg"
+printf "%5.3s\n", s
+@end example
+
+@noindent
+Both programs output @samp{@w{@bullet{}@bullet{}abc}}.
+Earlier versions of @command{awk} did not support this capability.
+If you must use such a version, you may simulate this feature by using
+concatenation to build up the format string, like so:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%" w "." p "s\n", s
+@end example
+
+@noindent
+This is not particularly easy to read but it does work.
+
+@c @cindex lint checks
+@cindex troubleshooting, fatal errors, @code{printf} format strings
+@cindex POSIX @command{awk}, @code{printf} format strings and
+C programmers may be used to supplying additional modifiers (@samp{h},
+@samp{j}, @samp{l}, @samp{L}, @samp{t}, and @samp{z}) in @code{printf}
+format strings. These are not valid in @command{awk}. Most @command{awk}
+implementations silently ignore them. If @option{--lint} is provided
+on the command line (@pxref{Options}), @command{gawk} warns about their
+use. If @option{--posix} is supplied, their use is a fatal error.
+
+@node Printf Examples
+@subsection Examples Using @code{printf}
+
+The following simple example shows
+how to use @code{printf} to make an aligned table:
+
+@example
+awk '@{ printf "%-10s %s\n", $1, $2 @}' mail-list
+@end example
+
+@noindent
+This command
+prints the names of the people (@code{$1}) in the file
+@file{mail-list} as a string of 10 characters that are left-justified. It also
+prints the phone numbers (@code{$2}) next on the line. This
+produces an aligned two-column table of names and phone numbers,
+as shown here:
+
+@example
+$ @kbd{awk '@{ printf "%-10s %s\n", $1, $2 @}' mail-list}
+@print{} Amelia 555-5553
+@print{} Anthony 555-3412
+@print{} Becky 555-7685
+@print{} Bill 555-1675
+@print{} Broderick 555-0542
+@print{} Camilla 555-2912
+@print{} Fabius 555-1234
+@print{} Julie 555-6699
+@print{} Martin 555-6480
+@print{} Samuel 555-3430
+@print{} Jean-Paul 555-2127
+@end example
+
+In this case, the phone numbers had to be printed as strings because
+the numbers are separated by a dash. Printing the phone numbers as
+numbers would have produced just the first three digits: @samp{555}.
+This would have been pretty confusing.
+
+It wasn't necessary to specify a width for the phone numbers because
+they are last on their lines. They don't need to have spaces
+after them.
+
+The table could be made to look even nicer by adding headings to the
+tops of the columns. This is done using a @code{BEGIN} rule
+(@pxref{BEGIN/END})
+so that the headers are only printed once, at the beginning of
+the @command{awk} program:
+
+@example
+awk 'BEGIN @{ print "Name Number"
+ print "---- ------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' mail-list
+@end example
+
+The preceding example mixes @code{print} and @code{printf} statements in
+the same program. Using just @code{printf} statements can produce the
+same results:
+
+@example
+awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
+ printf "%-10s %s\n", "----", "------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' mail-list
+@end example
+
+@noindent
+Printing each column heading with the same format specification
+used for the column elements ensures that the headings
+are aligned just like the columns.
+
+The fact that the same format specification is used three times can be
+emphasized by storing it in a variable, like this:
+
+@example
+awk 'BEGIN @{ format = "%-10s %s\n"
+ printf format, "Name", "Number"
+ printf format, "----", "------" @}
+ @{ printf format, $1, $2 @}' mail-list
+@end example
+
+
+@node Redirection
+@section Redirecting Output of @code{print} and @code{printf}
+
+@cindex output redirection
+@cindex redirection of output
+@cindex @option{--sandbox} option, output redirection with @code{print}, @code{printf}
+So far, the output from @code{print} and @code{printf} has gone
+to the standard
+output, usually the screen. Both @code{print} and @code{printf} can
+also send their output to other places.
+This is called @dfn{redirection}.
+
+@quotation NOTE
+When @option{--sandbox} is specified (@pxref{Options}),
+redirecting output to files, pipes and coprocesses is disabled.
+@end quotation
+
+A redirection appears after the @code{print} or @code{printf} statement.
+Redirections in @command{awk} are written just like redirections in shell
+commands, except that they are written inside the @command{awk} program.
+
+@c the commas here are part of the see also
+@cindex @code{print} statement, See Also redirection@comma{} of output
+@cindex @code{printf} statement, See Also redirection@comma{} of output
+There are four forms of output redirection: output to a file, output
+appended to a file, output through a pipe to another command, and output
+to a coprocess. We show them all for the @code{print} statement,
+but they work identically for @code{printf}:
+
+@table @code
+@cindex @code{>} (right angle bracket), @code{>} operator (I/O)
+@cindex right angle bracket (@code{>}), @code{>} operator (I/O)
+@cindex operators, input/output
+@item print @var{items} > @var{output-file}
+This redirection prints the items into the output file named
+@var{output-file}. The @value{FN} @var{output-file} can be any
+expression. Its value is changed to a string and then used as a
+@value{FN} (@pxref{Expressions}).
+
+When this type of redirection is used, the @var{output-file} is erased
+before the first output is written to it. Subsequent writes to the same
+@var{output-file} do not erase @var{output-file}, but append to it.
+(This is different from how you use redirections in shell scripts.)
+If @var{output-file} does not exist, it is created. For example, here
+is how an @command{awk} program can write a list of peoples' names to one
+file named @file{name-list}, and a list of phone numbers to another file
+named @file{phone-list}:
+
+@example
+$ @kbd{awk '@{ print $2 > "phone-list"}
+> @kbd{print $1 > "name-list" @}' mail-list}
+$ @kbd{cat phone-list}
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+$ @kbd{cat name-list}
+@print{} Amelia
+@print{} Anthony
+@dots{}
+@end example
+
+@noindent
+Each output file contains one name or number per line.
+
+@cindex @code{>} (right angle bracket), @code{>>} operator (I/O)
+@cindex right angle bracket (@code{>}), @code{>>} operator (I/O)
+@item print @var{items} >> @var{output-file}
+This redirection prints the items into the pre-existing output file
+named @var{output-file}. The difference between this and the
+single-@samp{>} redirection is that the old contents (if any) of
+@var{output-file} are not erased. Instead, the @command{awk} output is
+appended to the file.
+If @var{output-file} does not exist, then it is created.
+
+@cindex @code{|} (vertical bar), @code{|} operator (I/O)
+@cindex pipe, output
+@cindex output, pipes
+@item print @var{items} | @var{command}
+It is possible to send output to another program through a pipe
+instead of into a file. This redirection opens a pipe to
+@var{command}, and writes the values of @var{items} through this pipe
+to another process created to execute @var{command}.
+
+The redirection argument @var{command} is actually an @command{awk}
+expression. Its value is converted to a string whose contents give
+the shell command to be run. For example, the following produces two
+files, one unsorted list of peoples' names, and one list sorted in reverse
+alphabetical order:
+
+@ignore
+10/2000:
+This isn't the best style, since COMMAND is assigned for each
+record. It's done to avoid overfull hboxes in TeX. Leave it
+alone for now and let's hope no-one notices.
+@end ignore
+
+@example
+awk '@{ print $1 > "names.unsorted"
+ command = "sort -r > names.sorted"
+ print $1 | command @}' mail-list
+@end example
+
+The unsorted list is written with an ordinary redirection, while
+the sorted list is written by piping through the @command{sort} utility.
+
+The next example uses redirection to mail a message to the mailing
+list @samp{bug-system}. This might be useful when trouble is encountered
+in an @command{awk} script run periodically for system maintenance:
+
+@example
+report = "mail bug-system"
+print("Awk script failed:", $0) | report
+print("at record number", FNR, "of", FILENAME) | report
+close(report)
+@end example
+
+The @code{close()} function is called here because it's a good idea to close
+the pipe as soon as all the intended output has been sent to it.
+@DBXREF{Close Files And Pipes}
+for more information.
+
+This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}---it is not necessary to always
+use a string constant. Using a variable is generally a good idea,
+because (if you mean to refer to that same file or command)
+@command{awk} requires that the string value be written identically
+every time.
+
+@cindex coprocesses
+@cindex @code{|} (vertical bar), @code{|&} operator (I/O)
+@cindex operators, input/output
+@cindex differences in @command{awk} and @command{gawk}, input/output operators
+@item print @var{items} |& @var{command}
+This redirection prints the items to the input of @var{command}.
+The difference between this and the
+single-@samp{|} redirection is that the output from @var{command}
+can be read with @code{getline}.
+Thus @var{command} is a @dfn{coprocess}, which works together with,
+but subsidiary to, the @command{awk} program.
+
+This feature is a @command{gawk} extension, and is not available in
+POSIX @command{awk}.
+@DBXREF{Getline/Coprocess}
+for a brief discussion.
+@DBXREF{Two-way I/O}
+for a more complete discussion.
+@end table
+
+Redirecting output using @samp{>}, @samp{>>}, @samp{|}, or @samp{|&}
+asks the system to open a file, pipe, or coprocess only if the particular
+@var{file} or @var{command} you specify has not already been written
+to by your program or if it has been closed since it was last written to.
+
+@cindex troubleshooting, printing
+It is a common error to use @samp{>} redirection for the first @code{print}
+to a file, and then to use @samp{>>} for subsequent output:
+
+@example
+# clear the file
+print "Don't panic" > "guide.txt"
+@dots{}
+# append
+print "Avoid improbability generators" >> "guide.txt"
+@end example
+
+@noindent
+This is indeed how redirections must be used from the shell. But in
+@command{awk}, it isn't necessary. In this kind of case, a program should
+use @samp{>} for all the @code{print} statements, because the output file
+is only opened once. (It happens that if you mix @samp{>} and @samp{>>}
+that output is produced in the expected order. However, mixing the operators
+for the same file is definitely poor style, and is confusing to readers
+of your program.)
+
+@cindex differences in @command{awk} and @command{gawk}, implementation limitations
+@cindex implementation issues, @command{gawk}, limits
+@cindex @command{awk}, implementation issues, pipes
+@cindex @command{gawk}, implementation issues, pipes
+@ifnotinfo
+As mentioned earlier
+(@pxref{Getline Notes}),
+many
+@end ifnotinfo
+@ifnottex
+@ifnotdocbook
+Many
+@end ifnotdocbook
+@end ifnottex
+older
+@command{awk} implementations limit the number of pipelines that an @command{awk}
+program may have open to just one! In @command{gawk}, there is no such limit.
+@command{gawk} allows a program to
+open as many pipelines as the underlying operating system permits.
+
+@sidebar Piping into @command{sh}
+@cindex shells, piping commands into
+
+A particularly powerful way to use redirection is to build command lines
+and pipe them into the shell, @command{sh}. For example, suppose you
+have a list of files brought over from a system where all the @value{FN}s
+are stored in uppercase, and you wish to rename them to have names in
+all lowercase. The following program is both simple and efficient:
+
+@c @cindex @command{mv} utility
+@example
+@{ printf("mv %s %s\n", $0, tolower($0)) | "sh" @}
+
+END @{ close("sh") @}
+@end example
+
+The @code{tolower()} function returns its argument string with all
+uppercase characters converted to lowercase
+(@pxref{String Functions}).
+The program builds up a list of command lines,
+using the @command{mv} utility to rename the files.
+It then sends the list to the shell for execution.
+
+@DBXREF{Shell Quoting} for a function that can help in generating
+command lines to be fed to the shell.
+@end sidebar
+
+@node Special FD
+@section Special Files for Standard Pre-Opened Data Streams
+@cindex standard input
+@cindex input, standard
+@cindex standard output
+@cindex output, standard
+@cindex error output
+@cindex standard error
+@cindex file descriptors
+@cindex files, descriptors, See file descriptors
+
+Running programs conventionally have three input and output streams
+already available to them for reading and writing. These are known
+as the @dfn{standard input}, @dfn{standard output}, and @dfn{standard
+error output}. These open streams (and any other open file or pipe)
+are often referred to by the technical term @dfn{file descriptors}.
+
+These streams are, by default, connected to your keyboard and screen, but
+they are often redirected with the shell, via the @samp{<}, @samp{<<},
+@samp{>}, @samp{>>}, @samp{>&}, and @samp{|} operators. Standard error
+is typically used for writing error messages; the reason there are two separate
+streams, standard output and standard error, is so that they can be
+redirected separately.
+
+@cindex differences in @command{awk} and @command{gawk}, error messages
+@cindex error handling
+In traditional implementations of @command{awk}, the only way to write an error
+message to standard error in an @command{awk} program is as follows:
+
+@example
+print "Serious error detected!" | "cat 1>&2"
+@end example
+
+@noindent
+This works by opening a pipeline to a shell command that can access the
+standard error stream that it inherits from the @command{awk} process.
+@c 8/2014: Mike Brennan says not to cite this as inefficient. So, fixed.
+This is far from elegant, and it also requires a
+separate process. So people writing @command{awk} programs often
+don't do this. Instead, they send the error messages to the
+screen, like this:
+
+@example
+print "Serious error detected!" > "/dev/tty"
+@end example
+
+@noindent
+(@file{/dev/tty} is a special file supplied by the operating system
+that is connected to your keyboard and screen. It represents the
+``terminal,''@footnote{The ``tty'' in @file{/dev/tty} stands for
+``Teletype,'' a serial terminal.} which on modern systems is a keyboard
+and screen, not a serial console.)
+This generally has the same effect but not always: although the
+standard error stream is usually the screen, it can be redirected; when
+that happens, writing to the screen is not correct. In fact, if
+@command{awk} is run from a background job, it may not have a
+terminal at all.
+Then opening @file{/dev/tty} fails.
+
+@command{gawk}, BWK @command{awk}, and @command{mawk} provide
+special @value{FN}s for accessing the three standard streams.
+If the @value{FN} matches one of these special names when @command{gawk}
+(or one of the others) redirects input or output, then it directly uses
+the descriptor that the @value{FN} stands for. These special
+@value{FN}s work for all operating systems that @command{gawk}
+has been ported to, not just those that are POSIX-compliant:
+
+@cindex common extensions, @code{/dev/stdin} special file
+@cindex common extensions, @code{/dev/stdout} special file
+@cindex common extensions, @code{/dev/stderr} special file
+@cindex extensions, common@comma{} @code{/dev/stdin} special file
+@cindex extensions, common@comma{} @code{/dev/stdout} special file
+@cindex extensions, common@comma{} @code{/dev/stderr} special file
+@cindex file names, standard streams in @command{gawk}
+@cindex @code{/dev/@dots{}} special files
+@cindex files, @code{/dev/@dots{}} special files
+@cindex @code{/dev/fd/@var{N}} special files (@command{gawk})
+@table @file
+@item /dev/stdin
+The standard input (file descriptor 0).
+
+@item /dev/stdout
+The standard output (file descriptor 1).
+
+@item /dev/stderr
+The standard error output (file descriptor 2).
+@end table
+
+With these facilities,
+the proper way to write an error message then becomes:
+
+@example
+print "Serious error detected!" > "/dev/stderr"
+@end example
+
+@cindex troubleshooting, quotes with file names
+Note the use of quotes around the @value{FN}.
+Like any other redirection, the value must be a string.
+It is a common error to omit the quotes, which leads
+to confusing results.
+
+@command{gawk} does not treat these @value{FN}s as special when
+in POSIX-compatibility mode. However, because BWK @command{awk}
+supports them, @command{gawk} does support them even when
+invoked with the @option{--traditional} option (@pxref{Options}).
+
+@node Special Files
+@section Special @value{FFN}s in @command{gawk}
+@cindex @command{gawk}, file names in
+
+Besides access to standard input, standard output, and standard error,
+@command{gawk} provides access to any open file descriptor.
+Additionally, there are special @value{FN}s reserved for
+TCP/IP networking.
+
+@menu
+* Other Inherited Files:: Accessing other open files with
+ @command{gawk}.
+* Special Network:: Special files for network communications.
+* Special Caveats:: Things to watch out for.
+@end menu
+
+@node Other Inherited Files
+@subsection Accessing Other Open Files With @command{gawk}
+
+Besides the @code{/dev/stdin}, @code{/dev/stdout}, and @code{/dev/stderr}
+special @value{FN}s mentioned earlier, @command{gawk} provides syntax
+for accessing any other inherited open file:
+
+@table @file
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}. Such a file must
+be opened by the program initiating the @command{awk} execution (typically
+the shell). Unless special pains are taken in the shell from which
+@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
+@end table
+
+The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are essentially aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and
+@file{/dev/fd/2}, respectively. However, those names are more self-explanatory.
+
+Note that using @code{close()} on a @value{FN} of the
+form @code{"/dev/fd/@var{N}"}, for file descriptor numbers
+above two, does actually close the given file descriptor.
+
+@node Special Network
+@subsection Special Files for Network Communications
+@cindex networks, support for
+@cindex TCP/IP, support for
+
+@command{gawk} programs
+can open a two-way
+TCP/IP connection, acting as either a client or a server.
+This is done using a special @value{FN} of the form:
+
+@example
+@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}
+@end example
+
+The @var{net-type} is one of @samp{inet}, @samp{inet4}, or @samp{inet6}.
+The @var{protocol} is one of @samp{tcp} or @samp{udp},
+and the other fields represent the other essential pieces of information
+for making a networking connection.
+These @value{FN}s are used with the @samp{|&} operator for communicating
+with a coprocess
+(@pxref{Two-way I/O}).
+This is an advanced feature, mentioned here only for completeness.
+Full discussion is delayed until
+@ref{TCP/IP Networking}.
+
+@node Special Caveats
+@subsection Special @value{FFN} Caveats
+
+Here are some things to bear in mind when using the
+special @value{FN}s that @command{gawk} provides:
+
+@itemize @value{BULLET}
+@cindex compatibility mode (@command{gawk}), file names
+@cindex file names, in compatibility mode
+@item
+Recognition of the @value{FN}s for the three standard pre-opened
+files is disabled only in POSIX mode.
+
+@item
+Recognition of the other special @value{FN}s is disabled if @command{gawk} is in
+compatibility mode (either @option{--traditional} or @option{--posix};
+@pxref{Options}).
+
+@item
+@command{gawk} @emph{always}
+interprets these special @value{FN}s.
+For example, using @samp{/dev/fd/4}
+for output actually writes on file descriptor 4, and not on a new
+file descriptor that is @code{dup()}'ed from file descriptor 4. Most of
+the time this does not matter; however, it is important to @emph{not}
+close any of the files related to file descriptors 0, 1, and 2.
+Doing so results in unpredictable behavior.
+@end itemize
+
+@node Close Files And Pipes
+@section Closing Input and Output Redirections
+@cindex files, output, See output files
+@cindex input files, closing
+@cindex output, files@comma{} closing
+@cindex pipe, closing
+@cindex coprocesses, closing
+@cindex @code{getline} command, coprocesses@comma{} using from
+
+If the same @value{FN} or the same shell command is used with @code{getline}
+more than once during the execution of an @command{awk} program
+(@pxref{Getline}),
+the file is opened (or the command is executed) the first time only.
+At that time, the first record of input is read from that file or command.
+The next time the same file or command is used with @code{getline},
+another record is read from it, and so on.
+
+Similarly, when a file or pipe is opened for output, @command{awk} remembers
+the @value{FN} or command associated with it, and subsequent
+writes to the same file or command are appended to the previous writes.
+The file or pipe stays open until @command{awk} exits.
+
+@cindexawkfunc{close}
+This implies that special steps are necessary in order to read the same
+file again from the beginning, or to rerun a shell command (rather than
+reading more output from the same command). The @code{close()} function
+makes these things possible:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or:
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression. Its
+value must @emph{exactly} match the string that was used to open the file or
+start the command (spaces and other ``irrelevant'' characters
+included). For example, if you open a pipe with this:
+
+@example
+"sort -r names" | getline foo
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r names")
+@end example
+
+Once this function call is executed, the next @code{getline} from that
+file or command, or the next @code{print} or @code{printf} to that
+file or command, reopens the file or reruns the command.
+Because the expression that you use to close a file or pipeline must
+exactly match the expression used to open the file or run the command,
+it is good practice to use a variable to store the @value{FN} or command.
+The previous example becomes the following:
+
+@example
+sortcom = "sort -r names"
+sortcom | getline foo
+@dots{}
+close(sortcom)
+@end example
+
+@noindent
+This helps avoid hard-to-find typographical errors in your @command{awk}
+programs. Here are some of the reasons for closing an output file:
+
+@itemize @value{BULLET}
+@item
+To write a file and read it back later on in the same @command{awk}
+program. Close the file after writing it, then
+begin reading it with @code{getline}.
+
+@item
+To write numerous files, successively, in the same @command{awk}
+program. If the files aren't closed, eventually @command{awk} may exceed a
+system limit on the number of open files in one process. It is best to
+close each one when the program has finished writing it.
+
+@item
+To make a command finish. When output is redirected through a pipe,
+the command reading the pipe normally continues to try to read input
+as long as the pipe is open. Often this means the command cannot
+really do its work until the pipe is closed. For example, if
+output is redirected to the @command{mail} program, the message is not
+actually sent until the pipe is closed.
+
+@item
+To run the same program a second time, with the same arguments.
+This is not the same thing as giving more input to the first run!
+
+For example, suppose a program pipes output to the @command{mail} program.
+If it outputs several lines redirected to this pipe without closing
+it, they make a single message of several lines. By contrast, if the
+program closes the pipe after each line of output, then each line makes
+a separate message.
+@end itemize
+
+@cindex differences in @command{awk} and @command{gawk}, @code{close()} function
+@cindex portability, @code{close()} function and
+@cindex @code{close()} function, portability
+If you use more files than the system allows you to have open,
+@command{gawk} attempts to multiplex the available open files among
+your @value{DF}s. @command{gawk}'s ability to do this depends upon the
+facilities of your operating system, so it may not always work. It is
+therefore both good practice and good portability advice to always
+use @code{close()} on your files when you are done with them.
+In fact, if you are using a lot of pipes, it is essential that
+you close commands when done. For example, consider something like this:
+
+@example
+@{
+ @dots{}
+ command = ("grep " $1 " /some/file | my_prog -q " $3)
+ while ((command | getline) > 0) @{
+ @var{process output of} command
+ @}
+ # need close(command) here
+@}
+@end example
+
+This example creates a new pipeline based on data in @emph{each} record.
+Without the call to @code{close()} indicated in the comment, @command{awk}
+creates child processes to run the commands, until it eventually
+runs out of file descriptors for more pipelines.
+
+Even though each command has finished (as indicated by the end-of-file
+return status from @code{getline}), the child process is not
+terminated;@footnote{The technical terminology is rather morbid.
+The finished child is called a ``zombie,'' and cleaning up after
+it is referred to as ``reaping.''}
+@c Good old UNIX: give the marketing guys fits, that's the ticket
+more importantly, the file descriptor for the pipe
+is not closed and released until @code{close()} is called or
+@command{awk} exits.
+
+@code{close()} silently does nothing if given an argument that
+does not represent a file, pipe, or coprocess that was opened with
+a redirection. In such a case, it returns a negative value,
+indicating an error. In addition, @command{gawk} sets @code{ERRNO}
+to a string indicating the error.
+
+Note also that @samp{close(FILENAME)} has no ``magic'' effects on the
+implicit loop that reads through the files named on the command line.
+It is, more likely, a close of a file that was never opened with a
+redirection, so @command{awk} silently does nothing, except return
+a negative value.
+
+@cindex @code{|} (vertical bar), @code{|&} operator (I/O), pipes@comma{} closing
+When using the @samp{|&} operator to communicate with a coprocess,
+it is occasionally useful to be able to close one end of the two-way
+pipe without closing the other.
+This is done by supplying a second argument to @code{close()}.
+As in any other call to @code{close()},
+the first argument is the name of the command or special file used
+to start the coprocess.
+The second argument should be a string, with either of the values
+@code{"to"} or @code{"from"}. Case does not matter.
+As this is an advanced feature, discussion is
+delayed until
+@ref{Two-way I/O},
+which describes it in more detail and gives an example.
+
+@sidebar Using @code{close()}'s Return Value
+@cindex dark corner, @code{close()} function
+@cindex @code{close()} function, return value
+@cindex return value@comma{} @code{close()} function
+@cindex differences in @command{awk} and @command{gawk}, @code{close()} function
+@cindex Unix @command{awk}, @code{close()} function and
+
+In many older versions of Unix @command{awk}, the @code{close()} function
+is actually a statement.
+@value{DARKCORNER}
+It is a syntax error to try and use the return
+value from @code{close()}:
+
+@example
+command = "@dots{}"
+command | getline info
+retval = close(command) # syntax error in many Unix awks
+@end example
+
+@cindex @command{gawk}, @code{ERRNO} variable in
+@cindex @code{ERRNO} variable, with @command{close()} function
+@command{gawk} treats @code{close()} as a function.
+The return value is @minus{}1 if the argument names something
+that was never opened with a redirection, or if there is
+a system problem closing the file or process.
+In these cases, @command{gawk} sets the predefined variable
+@code{ERRNO} to a string describing the problem.
+
+In @command{gawk},
+when closing a pipe or coprocess (input or output),
+the return value is the exit status of the command.@footnote{
+This is a full 16-bit value as returned by the @code{wait()}
+system call. See the system manual pages for information on
+how to decode this value.}
+Otherwise, it is the return value from the system's @code{close()} or
+@code{fclose()} C functions when closing input or output
+files, respectively.
+This value is zero if the close succeeds, or @minus{}1 if
+it fails.
+
+The POSIX standard is very vague; it says that @code{close()}
+returns zero on success and nonzero otherwise. In general,
+different implementations vary in what they report when closing
+pipes; thus the return value cannot be used portably.
+@value{DARKCORNER}
+In POSIX mode (@pxref{Options}), @command{gawk} just returns zero
+when closing a pipe.
+@end sidebar
+
+
+@node Output Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The @code{print} statement prints comma-separated expressions. Each
+expression is separated by the value of @code{OFS} and terminated by
+the value of @code{ORS}. @code{OFMT} provides the conversion format
+for numeric values for the @code{print} statement.
+
+@item
+The @code{printf} statement provides finer-grained control over output,
+with format control letters for different data types and various flags
+that modify the behavior of the format control letters.
+
+@item
+Output from both @code{print} and @code{printf} may be redirected to
+files, pipes, and coprocesses.
+
+@item
+@command{gawk} provides special @value{FN}s for access to standard input,
+output, and error, and for network communications.
+
+@item
+Use @code{close()} to close open file, pipe, and coprocess redirections.
+For coprocesses, it is possible to close only one direction of the
+communications.
+
+@end itemize
+
+@c EXCLUDE START
+@node Output Exercises
+@section Exercises
+
+@enumerate
+@item
+Rewrite the program:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+
+@noindent
+from @ref{Output Separators}, by using a new value of @code{OFS}.
+
+@item
+Use the @code{printf} statement to line up the headings and table data
+for the @file{inventory-shipped} example that was covered in @ref{Print}.
+
+@item
+What happens if you forget the double quotes when redirecting
+output, as follows:
+
+@example
+BEGIN @{ print "Serious error detected!" > /dev/stderr @}
+@end example
+
+@end enumerate
+@c EXCLUDE END
+
+
+@node Expressions
+@chapter Expressions
+@cindex expressions
+
+Expressions are the basic building blocks of @command{awk} patterns
+and actions. An expression evaluates to a value that you can print, test,
+or pass to a function. Additionally, an expression
+can assign a new value to a variable or a field by using an assignment operator.
+
+An expression can serve as a pattern or action statement on its own.
+Most other kinds of
+statements contain one or more expressions that specify the data on which to
+operate. As in other languages, expressions in @command{awk} include
+variables, array references, constants, and function calls, as well as
+combinations of these with various operators.
+
+@menu
+* Values:: Constants, Variables, and Regular Expressions.
+* All Operators:: @command{gawk}'s operators.
+* Truth Values and Conditions:: Testing for true and false.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
+@end menu
+
+@node Values
+@section Constants, Variables, and Conversions
+
+Expressions are built up from values and the operations performed
+upon them. This @value{SECTION} describes the elementary objects
+which provide the values used in expressions.
+
+@menu
+* Constants:: String, numeric and regexp constants.
+* Using Constant Regexps:: When and how to use a regexp constant.
+* Variables:: Variables give names to values for later use.
+* Conversion:: The conversion of strings to numbers and vice
+ versa.
+@end menu
+
+@node Constants
+@subsection Constant Expressions
+
+@cindex constants, types of
+
+The simplest type of expression is the @dfn{constant}, which always has
+the same value. There are three types of constants: numeric,
+string, and regular expression.
+
+Each is used in the appropriate context when you need a data
+value that isn't going to change. Numeric constants can
+have different forms, but are internally stored in an identical manner.
+
+@menu
+* Scalar Constants:: Numeric and string constants.
+* Nondecimal-numbers:: What are octal and hex numbers.
+* Regexp Constants:: Regular Expression constants.
+@end menu
+
+@node Scalar Constants
+@subsubsection Numeric and String Constants
+
+@cindex constants, numeric
+@cindex numeric constants
+A @dfn{numeric constant} stands for a number. This number can be an
+integer, a decimal fraction, or a number in scientific (exponential)
+notation.@footnote{The internal representation of all numbers,
+including integers, uses double-precision floating-point numbers.
+On most modern systems, these are in IEEE 754 standard format.
+@xref{Arbitrary Precision Arithmetic}, for much more information.}
+Here are some examples of numeric constants that all
+have the same value:
+
+@example
+105
+1.05e+2
+1050e-1
+@end example
+
+@cindex string constants
+A string constant consists of a sequence of characters enclosed in
+double quotation marks. For example:
+
+@example
+"parrot"
+@end example
+
+@noindent
+@cindex differences in @command{awk} and @command{gawk}, strings
+@cindex strings, length limitations
+represents the string whose contents are @samp{parrot}. Strings in
+@command{gawk} can be of any length, and they can contain any of the possible
+eight-bit ASCII characters including ASCII @sc{nul} (character code zero).
+Other @command{awk}
+implementations may have difficulty with some character codes.
+
+@node Nondecimal-numbers
+@subsubsection Octal and Hexadecimal Numbers
+@cindex octal numbers
+@cindex hexadecimal numbers
+@cindex numbers, octal
+@cindex numbers, hexadecimal
+
+In @command{awk}, all numbers are in decimal (i.e., base 10). Many other
+programming languages allow you to specify numbers in other bases, often
+octal (base 8) and hexadecimal (base 16).
+In octal, the numbers go 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, and so on.
+Just as @samp{11}, in decimal, is 1 times 10 plus 1, so
+@samp{11}, in octal, is 1 times 8, plus 1. This equals 9 in decimal.
+In hexadecimal, there are 16 digits. Because the everyday decimal
+number system only has ten digits (@samp{0}--@samp{9}), the letters
+@samp{a} through @samp{f} are used to represent the rest.
+(Case in the letters is usually irrelevant; hexadecimal @samp{a} and @samp{A}
+have the same value.)
+Thus, @samp{11}, in
+hexadecimal, is 1 times 16 plus 1, which equals 17 in decimal.
+
+Just by looking at plain @samp{11}, you can't tell what base it's in.
+So, in C, C++, and other languages derived from C,
+@c such as PERL, but we won't mention that....
+there is a special notation to signify the base.
+Octal numbers start with a leading @samp{0},
+and hexadecimal numbers start with a leading @samp{0x} or @samp{0X}:
+
+@table @code
+@item 11
+Decimal value 11.
+
+@item 011
+Octal 11, decimal value 9.
+
+@item 0x11
+Hexadecimal 11, decimal value 17.
+@end table
+
+This example shows the difference:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf "%d, %d, %d\n", 011, 11, 0x11 @}'}
+@print{} 9, 11, 17
+@end example
+
+Being able to use octal and hexadecimal constants in your programs is most
+useful when working with data that cannot be represented conveniently as
+characters or as regular numbers, such as binary data of various sorts.
+
+@cindex @command{gawk}, octal numbers and
+@cindex @command{gawk}, hexadecimal numbers and
+@command{gawk} allows the use of octal and hexadecimal
+constants in your program text. However, such numbers in the input data
+are not treated differently; doing so by default would break old
+programs.
+(If you really need to do this, use the @option{--non-decimal-data}
+command-line option;
+@pxref{Nondecimal Data}.)
+If you have octal or hexadecimal data,
+you can use the @code{strtonum()} function
+(@pxref{String Functions})
+to convert the data into a number.
+Most of the time, you will want to use octal or hexadecimal constants
+when working with the built-in bit manipulation functions;
+see @DBREF{Bitwise Functions}
+for more information.
+
+Unlike some early C implementations, @samp{8} and @samp{9} are not
+valid in octal constants. For example, @command{gawk} treats @samp{018}
+as decimal 18:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "021 is", 021 ; print 018 @}'}
+@print{} 021 is 17
+@print{} 18
+@end example
+
+@cindex compatibility mode (@command{gawk}), octal numbers
+@cindex compatibility mode (@command{gawk}), hexadecimal numbers
+Octal and hexadecimal source code constants are a @command{gawk} extension.
+If @command{gawk} is in compatibility mode
+(@pxref{Options}),
+they are not available.
+
+@sidebar A Constant's Base Does Not Affect Its Value
+
+Once a numeric constant has
+been converted internally into a number,
+@command{gawk} no longer remembers
+what the original form of the constant was; the internal value is
+always used. This has particular consequences for conversion of
+numbers to strings:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf "0x11 is <%s>\n", 0x11 @}'}
+@print{} 0x11 is <17>
+@end example
+@end sidebar
+
+@node Regexp Constants
+@subsubsection Regular Expression Constants
+
+@cindex regexp constants
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+A regexp constant is a regular expression description enclosed in
+slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
+@command{awk} programs are constant, but the @samp{~} and @samp{!~}
+matching operators can also match computed or dynamic regexps
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
+
+@node Using Constant Regexps
+@subsection Using Regular Expression Constants
+
+@cindex dark corner, regexp constants
+When used on the righthand side of the @samp{~} or @samp{!~}
+operators, a regexp constant merely stands for the regexp that is to be
+matched.
+However, regexp constants (such as @code{/foo/}) may be used like simple expressions.
+When a
+regexp constant appears by itself, it has the same meaning as if it appeared
+in a pattern (i.e., @samp{($0 ~ /foo/)}).
+@value{DARKCORNER}
+@xref{Expression Patterns}.
+This means that the following two code segments:
+
+@example
+if ($0 ~ /barfly/ || $0 ~ /camelot/)
+ print "found"
+@end example
+
+@noindent
+and:
+
+@example
+if (/barfly/ || /camelot/)
+ print "found"
+@end example
+
+@noindent
+are exactly equivalent.
+One rather bizarre consequence of this rule is that the following
+Boolean expression is valid, but does not do what its author probably
+intended:
+
+@example
+# Note that /foo/ is on the left of the ~
+if (/foo/ ~ $1) print "found foo"
+@end example
+
+@c @cindex automatic warnings
+@c @cindex warnings, automatic
+@cindex @command{gawk}, regexp constants and
+@cindex regexp constants, in @command{gawk}
+@noindent
+This code is ``obviously'' testing @code{$1} for a match against the regexp
+@code{/foo/}. But in fact, the expression @samp{/foo/ ~ $1} really means
+@samp{($0 ~ /foo/) ~ $1}. In other words, first match the input record
+against the regexp @code{/foo/}. The result is either zero or one,
+depending upon the success or failure of the match. That result
+is then matched against the first field in the record.
+Because it is unlikely that you would ever really want to make this kind of
+test, @command{gawk} issues a warning when it sees this construct in
+a program.
+Another consequence of this rule is that the assignment statement:
+
+@example
+matches = /foo/
+@end example
+
+@noindent
+assigns either zero or one to the variable @code{matches}, depending
+upon the contents of the current input record.
+
+@cindex differences in @command{awk} and @command{gawk}, regexp constants
+@cindex dark corner, regexp constants, as arguments to user-defined functions
+@cindexgawkfunc{gensub}
+@cindexawkfunc{sub}
+@cindexawkfunc{gsub}
+Constant regular expressions are also used as the first argument for
+the @code{gensub()}, @code{sub()}, and @code{gsub()} functions, as the
+second argument of the @code{match()} function,
+and as the third argument of the @code{split()} and @code{patsplit()} functions
+(@pxref{String Functions}).
+Modern implementations of @command{awk}, including @command{gawk}, allow
+the third argument of @code{split()} to be a regexp constant, but some
+older implementations do not.
+@value{DARKCORNER}
+Because some built-in functions accept regexp constants as arguments,
+it can be confusing when attempting to use regexp constants as arguments
+to user-defined functions (@pxref{User-defined}). For example:
+
+@example
+function mysub(pat, repl, str, global)
+@{
+ if (global)
+ gsub(pat, repl, str)
+ else
+ sub(pat, repl, str)
+ return str
+@}
+
+@{
+ @dots{}
+ text = "hi! hi yourself!"
+ mysub(/hi/, "howdy", text, 1)
+ @dots{}
+@}
+@end example
+
+@c @cindex automatic warnings
+@c @cindex warnings, automatic
+In this example, the programmer wants to pass a regexp constant to the
+user-defined function @code{mysub()}, which in turn passes it on to
+either @code{sub()} or @code{gsub()}. However, what really happens is that
+the @code{pat} parameter is either one or zero, depending upon whether
+or not @code{$0} matches @code{/hi/}.
+@command{gawk} issues a warning when it sees a regexp constant used as
+a parameter to a user-defined function, because passing a truth value in
+this way is probably not what was intended.
+
+@node Variables
+@subsection Variables
+
+@cindex variables, user-defined
+@cindex user-defined, variables
+Variables are ways of storing values at one point in your program for
+use later in another part of your program. They can be manipulated
+entirely within the program text, and they can also be assigned values
+on the @command{awk} command line.
+
+@menu
+* Using Variables:: Using variables in your programs.
+* Assignment Options:: Setting variables on the command line and a
+ summary of command-line syntax. This is an
+ advanced method of input.
+@end menu
+
+@node Using Variables
+@subsubsection Using Variables in a Program
+
+Variables let you give names to values and refer to them later. Variables
+have already been used in many of the examples. The name of a variable
+must be a sequence of letters, digits, or underscores, and it may not begin
+with a digit.
+Here, a @dfn{letter} is any one of the 52 upper- and lowercase
+English letters. Other characters that may be defined as letters
+in non-English locales are not valid in variable names.
+Case is significant in variable names; @code{a} and @code{A}
+are distinct variables.
+
+A variable name is a valid expression by itself; it represents the
+variable's current value. Variables are given new values with
+@dfn{assignment operators}, @dfn{increment operators}, and
+@dfn{decrement operators}.
+@xref{Assignment Ops}.
+In addition, the @code{sub()} and @code{gsub()} functions can
+change a variable's value, and the @code{match()}, @code{split()},
+and @code{patsplit()} functions can change the contents of their
+array parameters. @xref{String Functions}.
+
+@cindex variables, built-in
+@cindex variables, initializing
+A few variables have special built-in meanings, such as @code{FS} (the
+field separator), and @code{NF} (the number of fields in the current input
+record). @DBXREF{Built-in Variables} for a list of the predefined variables.
+These predefined variables can be used and assigned just like all other
+variables, but their values are also used or changed automatically by
+@command{awk}. All predefined variables' names are entirely uppercase.
+
+Variables in @command{awk} can be assigned either numeric or string values.
+The kind of value a variable holds can change over the life of a program.
+By default, variables are initialized to the empty string, which
+is zero if converted to a number. There is no need to explicitly
+initialize a variable in @command{awk},
+which is what you would do in C and in most other traditional languages.
+
+@node Assignment Options
+@subsubsection Assigning Variables on the Command Line
+@cindex variables, assigning on command line
+@cindex command line, variables@comma{} assigning on
+
+Any @command{awk} variable can be set by including a @dfn{variable assignment}
+among the arguments on the command line when @command{awk} is invoked
+(@pxref{Other Arguments}).
+Such an assignment has the following form:
+
+@example
+@var{variable}=@var{text}
+@end example
+
+@cindex @option{-v} option
+@noindent
+With it, a variable is set either at the beginning of the
+@command{awk} run or in between input files.
+When the assignment is preceded with the @option{-v} option,
+as in the following:
+
+@example
+-v @var{variable}=@var{text}
+@end example
+
+@noindent
+the variable is set at the very beginning, even before the
+@code{BEGIN} rules execute. The @option{-v} option and its assignment
+must precede all the @value{FN} arguments, as well as the program text.
+(@DBXREF{Options} for more information about
+the @option{-v} option.)
+Otherwise, the variable assignment is performed at a time determined by
+its position among the input file arguments---after the processing of the
+preceding input file argument. For example:
+
+@example
+awk '@{ print $n @}' n=4 inventory-shipped n=2 mail-list
+@end example
+
+@noindent
+prints the value of field number @code{n} for all input records. Before
+the first file is read, the command line sets the variable @code{n}
+equal to four. This causes the fourth field to be printed in lines from
+@file{inventory-shipped}. After the first file has finished,
+but before the second file is started, @code{n} is set to two, so that the
+second field is printed in lines from @file{mail-list}:
+
+@example
+$ @kbd{awk '@{ print $n @}' n=4 inventory-shipped n=2 mail-list}
+@print{} 15
+@print{} 24
+@dots{}
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end example
+
+@cindex dark corner, command-line arguments
+Command-line arguments are made available for explicit examination by
+the @command{awk} program in the @code{ARGV} array
+(@pxref{ARGC and ARGV}).
+@command{awk} processes the values of command-line assignments for escape
+sequences
+(@pxref{Escape Sequences}).
+@value{DARKCORNER}
+
+@node Conversion
+@subsection Conversion of Strings and Numbers
+
+Number-to-string and string-to-number conversion are generally
+straightforward. There can be subtleties to be aware of;
+this @value{SECTION} discusses this important facet of @command{awk}.
+
+@menu
+* Strings And Numbers:: How @command{awk} Converts Between Strings And
+ Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
+@end menu
+
+@node Strings And Numbers
+@subsubsection How @command{awk} Converts Between Strings and Numbers
+
+@cindex converting, strings to numbers
+@cindex strings, converting
+@cindex numbers, converting
+@cindex converting, numbers to strings
+Strings are converted to numbers and numbers are converted to strings, if the context
+of the @command{awk} program demands it. For example, if the value of
+either @code{foo} or @code{bar} in the expression @samp{foo + bar}
+happens to be a string, it is converted to a number before the addition
+is performed. If numeric values appear in string concatenation, they
+are converted to strings. Consider the following:
+
+@example
+two = 2; three = 3
+print (two three) + 4
+@end example
+
+@noindent
+This prints the (numeric) value 27. The numeric values of
+the variables @code{two} and @code{three} are converted to strings and
+concatenated together. The resulting string is converted back to the
+number 23, to which 4 is then added.
+
+@cindex null strings, converting numbers to strings
+@cindex type conversion
+If, for some reason, you need to force a number to be converted to a
+string, concatenate that number with the empty string, @code{""}.
+To force a string to be converted to a number, add zero to that string.
+A string is converted to a number by interpreting any numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1,000, and @code{"25fix"}
+has a numeric value of 25.
+Strings that can't be interpreted as valid numbers convert to zero.
+
+@cindex @code{CONVFMT} variable
+The exact manner in which numbers are converted into strings is controlled
+by the @command{awk} predefined variable @code{CONVFMT} (@pxref{Built-in Variables}).
+Numbers are converted using the @code{sprintf()} function
+with @code{CONVFMT} as the format
+specifier
+(@pxref{String Functions}).
+
+@code{CONVFMT}'s default value is @code{"%.6g"}, which creates a value with
+at most six significant digits. For some applications, you might want to
+change it to specify more precision.
+On most modern machines,
+17 digits is usually enough to capture a floating-point number's
+value exactly.@footnote{Pathological cases can require up to
+752 digits (!), but we doubt that you need to worry about this.}
+
+@cindex dark corner, @code{CONVFMT} variable
+Strange results can occur if you set @code{CONVFMT} to a string that doesn't
+tell @code{sprintf()} how to format floating-point numbers in a useful way.
+For example, if you forget the @samp{%} in the format, @command{awk} converts
+all numbers to the same constant string.
+
+As a special case, if a number is an integer, then the result of converting
+it to a string is @emph{always} an integer, no matter what the value of
+@code{CONVFMT} may be. Given the following code fragment:
+
+@example
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+@end example
+
+@noindent
+@code{b} has the value @code{"12"}, not @code{"12.00"}.
+@value{DARKCORNER}
+
+@sidebar Pre-POSIX @command{awk} Used @code{OFMT} for String Conversion
+@cindex POSIX @command{awk}, @code{OFMT} variable and
+@cindex @code{OFMT} variable
+@cindex portability, new @command{awk} vs.@: old @command{awk}
+@cindex @command{awk}, new vs.@: old, @code{OFMT} variable
+Prior to the POSIX standard, @command{awk} used the value
+of @code{OFMT} for converting numbers to strings. @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversion from the semantics of printing. Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
+of cases, old @command{awk} programs do not change their behavior.
+@DBXREF{Print} for more information on the @code{print} statement.
+@end sidebar
+
+@node Locale influences conversions
+@subsubsection Locales Can Influence Conversion
+
+Where you are can matter when it comes to converting between numbers and
+strings. The local character set and language---the @dfn{locale}---can
+affect numeric formats. In particular, for @command{awk} programs,
+it affects the decimal point character and the thousands-separator
+character. The @code{"C"} locale, and most English-language locales,
+use the period character (@samp{.}) as the decimal point and don't
+have a thousands separator. However, many (if not most) European and
+non-English locales use the comma (@samp{,}) as the decimal point
+character. European locales often use either a space or a period as
+the thousands separator, if they have one.
+
+@cindex dark corner, locale's decimal point character
+The POSIX standard says that @command{awk} always uses the period as the decimal
+point when reading the @command{awk} program source code, and for
+command-line variable assignments (@pxref{Other Arguments}). However,
+when interpreting input data, for @code{print} and @code{printf} output,
+and for number-to-string conversion, the local decimal point character
+is used. @value{DARKCORNER} In all cases, numbers in source code and
+in input data cannot have a thousands separator. Here are some examples
+indicating the difference in behavior, on a GNU/Linux system:
+
+@example
+$ @kbd{export POSIXLY_CORRECT=1} @ii{Force POSIX behavior}
+$ @kbd{gawk 'BEGIN @{ printf "%g\n", 3.1415927 @}'}
+@print{} 3.14159
+$ @kbd{LC_ALL=en_DK.utf-8 gawk 'BEGIN @{ printf "%g\n", 3.1415927 @}'}
+@print{} 3,14159
+$ @kbd{echo 4,321 | gawk '@{ print $1 + 1 @}'}
+@print{} 5
+$ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
+@print{} 5,321
+@end example
+
+@noindent
+The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
+the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
+treats @samp{4,321} as 4, while in the Danish locale, it's treated
+as the full number including the fractional part, 4.321.
+
+Some earlier versions of @command{gawk} fully complied with this aspect
+of the standard. However, many users in non-English locales complained
+about this behavior, because their data used a period as the decimal
+point, so the default behavior was restored to use a period as the
+decimal point character. You can use the @option{--use-lc-numeric}
+option (@pxref{Options}) to force @command{gawk} to use the locale's
+decimal point character. (@command{gawk} also uses the locale's decimal
+point character when in POSIX mode, either via @option{--posix}, or the
+@env{POSIXLY_CORRECT} environment variable, as shown previously.)
+
+@ref{table-locale-affects} describes the cases in which the locale's decimal
+point character is used and when a period is used. Some of these
+features have not been described yet.
+
+@float Table,table-locale-affects
+@caption{Locale decimal point versus a period}
+@multitable @columnfractions .15 .20 .45
+@headitem Feature @tab Default @tab @option{--posix} or @option{--use-lc-numeric}
+@item @code{%'g} @tab Use locale @tab Use locale
+@item @code{%g} @tab Use period @tab Use locale
+@item Input @tab Use period @tab Use locale
+@item @code{strtonum()} @tab Use period @tab Use locale
+@end multitable
+@end float
+
+Finally, modern day formal standards and IEEE standard floating-point
+representation can have an unusual but important effect on the way
+@command{gawk} converts some special string values to numbers. The details
+are presented in @ref{POSIX Floating Point Problems}.
+
+@node All Operators
+@section Operators: Doing Something with Values
+
+This @value{SECTION} introduces the @dfn{operators} which make use
+of the values provided by constants and variables.
+
+@menu
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-},
+ etc.)
+* Concatenation:: Concatenating strings.
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+@end menu
+
+@node Arithmetic Ops
+@subsection Arithmetic Operators
+@cindex arithmetic operators
+@cindex operators, arithmetic
+@c @cindex addition
+@c @cindex subtraction
+@c @cindex multiplication
+@c @cindex division
+@c @cindex remainder
+@c @cindex quotient
+@c @cindex exponentiation
+
+The @command{awk} language uses the common arithmetic operators when
+evaluating expressions. All of these arithmetic operators follow normal
+precedence rules and work as you would expect them to.
+
+The following example uses a file named @file{grades}, which contains
+a list of student names as well as three test scores per student (it's
+a small class):
+
+@example
+Pat 100 97 58
+Sandy 84 72 93
+Chris 72 92 89
+@end example
+
+@noindent
+This program takes the file @file{grades} and prints the average
+of the scores:
+
+@example
+$ @kbd{awk '@{ sum = $2 + $3 + $4 ; avg = sum / 3}
+> @kbd{print $1, avg @}' grades}
+@print{} Pat 85
+@print{} Sandy 83
+@print{} Chris 84.3333
+@end example
+
+The following list provides the arithmetic operators in @command{awk},
+in order from the highest precedence to the lowest:
+
+@table @code
+@cindex common extensions, @code{**} operator
+@cindex extensions, common@comma{} @code{**} operator
+@cindex POSIX @command{awk}, arithmetic operators and
+@item @var{x} ^ @var{y}
+@itemx @var{x} ** @var{y}
+Exponentiation; @var{x} raised to the @var{y} power. @samp{2 ^ 3} has
+the value eight; the character sequence @samp{**} is equivalent to
+@samp{^}. @value{COMMONEXT}
+
+@item - @var{x}
+Negation.
+
+@item + @var{x}
+Unary plus; the expression is converted to a number.
+
+@item @var{x} * @var{y}
+Multiplication.
+
+@cindex troubleshooting, division
+@cindex division
+@item @var{x} / @var{y}
+Division; because all numbers in @command{awk} are floating-point
+numbers, the result is @emph{not} rounded to an integer---@samp{3 / 4} has
+the value 0.75. (It is a common mistake, especially for C programmers,
+to forget that @emph{all} numbers in @command{awk} are floating point,
+and that division of integer-looking constants produces a real number,
+not an integer.)
+
+@item @var{x} % @var{y}
+Remainder; further discussion is provided in the text, just
+after this list.
+
+@item @var{x} + @var{y}
+Addition.
+
+@item @var{x} - @var{y}
+Subtraction.
+@end table
+
+Unary plus and minus have the same precedence,
+the multiplication operators all have the same precedence, and
+addition and subtraction have the same precedence.
+
+@cindex differences in @command{awk} and @command{gawk}, trunc-mod operation
+@cindex trunc-mod operation
+When computing the remainder of @samp{@var{x} % @var{y}},
+the quotient is rounded toward zero to an integer and
+multiplied by @var{y}. This result is subtracted from @var{x};
+this operation is sometimes known as ``trunc-mod.'' The following
+relation always holds:
+
+@example
+b * int(a / b) + (a % b) == a
+@end example
+
+One possibly undesirable effect of this definition of remainder is that
+@samp{@var{x} % @var{y}} is negative if @var{x} is negative. Thus:
+
+@example
+-17 % 8 = -1
+@end example
+
+In other @command{awk} implementations, the signedness of the remainder
+may be machine-dependent.
+@c FIXME !!! what does posix say?
+
+@cindex portability, @code{**} operator and
+@cindex @code{*} (asterisk), @code{**} operator
+@cindex asterisk (@code{*}), @code{**} operator
+@quotation NOTE
+The POSIX standard only specifies the use of @samp{^}
+for exponentiation.
+For maximum portability, do not use the @samp{**} operator.
+@end quotation
+
+@node Concatenation
+@subsection String Concatenation
+@cindex Kernighan, Brian
+@quotation
+@i{It seemed like a good idea at the time.}
+@author Brian Kernighan
+@end quotation
+
+@cindex string operators
+@cindex operators, string
+@cindex concatenating
+There is only one string operation: concatenation. It does not have a
+specific operator to represent it. Instead, concatenation is performed by
+writing expressions next to one another, with no operator. For example:
+
+@example
+$ @kbd{awk '@{ print "Field number one: " $1 @}' mail-list}
+@print{} Field number one: Amelia
+@print{} Field number one: Anthony
+@dots{}
+@end example
+
+Without the space in the string constant after the @samp{:}, the line
+runs together. For example:
+
+@example
+$ @kbd{awk '@{ print "Field number one:" $1 @}' mail-list}
+@print{} Field number one:Amelia
+@print{} Field number one:Anthony
+@dots{}
+@end example
+
+@cindex troubleshooting, string concatenation
+Because string concatenation does not have an explicit operator, it is
+often necessary to ensure that it happens at the right time by using
+parentheses to enclose the items to concatenate. For example,
+you might expect that the
+following code fragment concatenates @code{file} and @code{name}:
+
+@example
+file = "file"
+name = "name"
+print "something meaningful" > file name
+@end example
+
+@cindex Brian Kernighan's @command{awk}
+@cindex @command{mawk} utility
+@noindent
+This produces a syntax error with some versions of Unix
+@command{awk}.@footnote{It happens that BWK
+@command{awk}, @command{gawk} and @command{mawk} all ``get it right,''
+but you should not rely on this.}
+It is necessary to use the following:
+
+@example
+print "something meaningful" > (file name)
+@end example
+
+@cindex order of evaluation, concatenation
+@cindex evaluation order, concatenation
+@cindex side effects
+Parentheses should be used around concatenation in all but the
+most common contexts, such as on the righthand side of @samp{=}.
+Be careful about the kinds of expressions used in string concatenation.
+In particular, the order of evaluation of expressions used for concatenation
+is undefined in the @command{awk} language. Consider this example:
+
+@example
+BEGIN @{
+ a = "don't"
+ print (a " " (a = "panic"))
+@}
+@end example
+
+@noindent
+It is not defined whether the second assignment to @code{a} happens
+before or after the value of @code{a} is retrieved for producing the
+concatenated value. The result could be either @samp{don't panic},
+or @samp{panic panic}.
+@c see test/nasty.awk for a worse example
+
+The precedence of concatenation, when mixed with other operators, is often
+counter-intuitive. Consider this example:
+
+@ignore
+> To: bug-gnu-utils@@gnu.org
+> CC: arnold@@gnu.org
+> Subject: gawk 3.0.4 bug with {print -12 " " -24}
+> From: Russell Schulz <Russell_Schulz@locutus.ofB.ORG>
+> Date: Tue, 8 Feb 2000 19:56:08 -0700
+>
+> gawk 3.0.4 on NT gives me:
+>
+> prompt> cat bad.awk
+> BEGIN { print -12 " " -24; }
+>
+> prompt> gawk -f bad.awk
+> -12-24
+>
+> when I would expect
+>
+> -12 -24
+>
+> I have not investigated the source, or other implementations. The
+> bug is there on my NT and DOS versions 2.15.6 .
+@end ignore
+
+@example
+$ @kbd{awk 'BEGIN @{ print -12 " " -24 @}'}
+@print{} -12-24
+@end example
+
+This ``obviously'' is concatenating @minus{}12, a space, and @minus{}24.
+But where did the space disappear to?
+The answer lies in the combination of operator precedences and
+@command{awk}'s automatic conversion rules. To get the desired result,
+write the program this way:
+
+@example
+$ @kbd{awk 'BEGIN @{ print -12 " " (-24) @}'}
+@print{} -12 -24
+@end example
+
+This forces @command{awk} to treat the @samp{-} on the @samp{-24} as unary.
+Otherwise, it's parsed as follows:
+
+@display
+ @minus{}12 (@code{"@ "} @minus{} 24)
+@result{} @minus{}12 (0 @minus{} 24)
+@result{} @minus{}12 (@minus{}24)
+@result{} @minus{}12@minus{}24
+@end display
+
+As mentioned earlier,
+when mixing concatenation with other operators, @emph{parenthesize}. Otherwise,
+you're never quite sure what you'll get.
+
+@node Assignment Ops
+@subsection Assignment Expressions
+@cindex assignment operators
+@cindex operators, assignment
+@cindex expressions, assignment
+@cindex @code{=} (equals sign), @code{=} operator
+@cindex equals sign (@code{=}), @code{=} operator
+An @dfn{assignment} is an expression that stores a (usually different)
+value into a variable. For example, let's assign the value one to the variable
+@code{z}:
+
+@example
+z = 1
+@end example
+
+After this expression is executed, the variable @code{z} has the value one.
+Whatever old value @code{z} had before the assignment is forgotten.
+
+Assignments can also store string values. For example, the
+following stores
+the value @code{"this food is good"} in the variable @code{message}:
+
+@example
+thing = "food"
+predicate = "good"
+message = "this " thing " is " predicate
+@end example
+
+@noindent
+@cindex side effects, assignment expressions
+This also illustrates string concatenation.
+The @samp{=} sign is called an @dfn{assignment operator}. It is the
+simplest assignment operator because the value of the righthand
+operand is stored unchanged.
+Most operators (addition, concatenation, and so on) have no effect
+except to compute a value. If the value isn't used, there's no reason to
+use the operator. An assignment operator is different; it does
+produce a value, but even if you ignore it, the assignment still
+makes itself felt through the alteration of the variable. We call this
+a @dfn{side effect}.
+
+@cindex lvalues/rvalues
+@cindex rvalues/lvalues
+@cindex assignment operators, lvalues/rvalues
+@cindex operators, assignment
+The lefthand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields}) or
+an array element (@pxref{Arrays}).
+These are all called @dfn{lvalues},
+which means they can appear on the lefthand side of an assignment operator.
+The righthand operand may be any expression; it produces the new value
+that the assignment stores in the specified variable, field, or array
+element. (Such values are called @dfn{rvalues}.)
+
+@cindex variables, types of
+It is important to note that variables do @emph{not} have permanent types.
+A variable's type is simply the type of whatever value was last assigned
+to it. In the following program fragment, the variable
+@code{foo} has a numeric value at first, and a string value later on:
+
+@example
+foo = 1
+print foo
+foo = "bar"
+print foo
+@end example
+
+@noindent
+When the second assignment gives @code{foo} a string value, the fact that
+it previously had a numeric value is forgotten.
+
+String values that do not begin with a digit have a numeric value of
+zero. After executing the following code, the value of @code{foo} is five:
+
+@example
+foo = "a string"
+foo = foo + 5
+@end example
+
+@quotation NOTE
+Using a variable as a number and then later as a string
+can be confusing and is poor programming style. The previous two examples
+illustrate how @command{awk} works, @emph{not} how you should write your
+programs!
+@end quotation
+
+An assignment is an expression, so it has a value---the same value that
+is assigned. Thus, @samp{z = 1} is an expression with the value one.
+One consequence of this is that you can write multiple assignments together,
+such as:
+
+@example
+x = y = z = 5
+@end example
+
+@noindent
+This example stores the value five in all three variables
+(@code{x}, @code{y}, and @code{z}).
+It does so because the
+value of @samp{z = 5}, which is five, is stored into @code{y} and then
+the value of @samp{y = z = 5}, which is five, is stored into @code{x}.
+
+Assignments may be used anywhere an expression is called for. For
+example, it is valid to write @samp{x != (y = 1)} to set @code{y} to one,
+and then test whether @code{x} equals one. But this style tends to make
+programs hard to read; such nesting of assignments should be avoided,
+except perhaps in a one-shot program.
+
+@cindex @code{+} (plus sign), @code{+=} operator
+@cindex plus sign (@code{+}), @code{+=} operator
+Aside from @samp{=}, there are several other assignment operators that
+do arithmetic with the old value of the variable. For example, the
+operator @samp{+=} computes a new value by adding the righthand value
+to the old value of the variable. Thus, the following assignment adds
+five to the value of @code{foo}:
+
+@example
+foo += 5
+@end example
+
+@noindent
+This is equivalent to the following:
+
+@example
+foo = foo + 5
+@end example
+
+@noindent
+Use whichever makes the meaning of your program clearer.
+
+There are situations where using @samp{+=} (or any assignment operator)
+is @emph{not} the same as simply repeating the lefthand operand in the
+righthand expression. For example:
+
+@cindex Rankin, Pat
+@example
+# Thanks to Pat Rankin for this example
+BEGIN @{
+ foo[rand()] += 5
+ for (x in foo)
+ print x, foo[x]
+
+ bar[rand()] = bar[rand()] + 5
+ for (x in bar)
+ print x, bar[x]
+@}
+@end example
+
+@cindex operators, assignment, evaluation order
+@cindex assignment operators, evaluation order
+@noindent
+The indices of @code{bar} are practically guaranteed to be different, because
+@code{rand()} returns different values each time it is called.
+(Arrays and the @code{rand()} function haven't been covered yet.
+@xref{Arrays},
+and
+@ifnotdocbook
+@DBPXREF{Numeric Functions}
+@end ifnotdocbook
+@ifdocbook
+@DBREF{Numeric Functions}
+@end ifdocbook
+for more information).
+This example illustrates an important fact about assignment
+operators: the lefthand expression is only evaluated @emph{once}.
+
+It is up to the implementation as to which expression is evaluated
+first, the lefthand or the righthand.
+Consider this example:
+
+@example
+i = 1
+a[i += 2] = i + 1
+@end example
+
+@noindent
+The value of @code{a[3]} could be either two or four.
+
+@ref{table-assign-ops} lists the arithmetic assignment operators. In each
+case, the righthand operand is an expression whose value is converted
+to a number.
+
+@cindex @code{-} (hyphen), @code{-=} operator
+@cindex hyphen (@code{-}), @code{-=} operator
+@cindex @code{*} (asterisk), @code{*=} operator
+@cindex asterisk (@code{*}), @code{*=} operator
+@cindex @code{/} (forward slash), @code{/=} operator
+@cindex forward slash (@code{/}), @code{/=} operator
+@cindex @code{%} (percent sign), @code{%=} operator
+@cindex percent sign (@code{%}), @code{%=} operator
+@cindex @code{^} (caret), @code{^=} operator
+@cindex caret (@code{^}), @code{^=} operator
+@cindex @code{*} (asterisk), @code{**=} operator
+@cindex asterisk (@code{*}), @code{**=} operator
+@float Table,table-assign-ops
+@caption{Arithmetic assignment operators}
+@multitable @columnfractions .30 .70
+@headitem Operator @tab Effect
+@item @var{lvalue} @code{+=} @var{increment} @tab Add @var{increment} to the value of @var{lvalue}
+@item @var{lvalue} @code{-=} @var{decrement} @tab Subtract @var{decrement} from the value of @var{lvalue}
+@item @var{lvalue} @code{*=} @var{coefficient} @tab Multiply the value of @var{lvalue} by @var{coefficient}
+@item @var{lvalue} @code{/=} @var{divisor} @tab Divide the value of @var{lvalue} by @var{divisor}
+@item @var{lvalue} @code{%=} @var{modulus} @tab Set @var{lvalue} to its remainder by @var{modulus}
+@cindex common extensions, @code{**=} operator
+@cindex extensions, common@comma{} @code{**=} operator
+@cindex @command{awk} language, POSIX version
+@cindex POSIX @command{awk}
+@item @var{lvalue} @code{^=} @var{power} @tab
+@item @var{lvalue} @code{**=} @var{power} @tab Raise @var{lvalue} to the power @var{power} @value{COMMONEXT}
+@end multitable
+@end float
+
+@cindex POSIX @command{awk}, @code{**=} operator and
+@cindex portability, @code{**=} operator and
+@quotation NOTE
+Only the @samp{^=} operator is specified by POSIX.
+For maximum portability, do not use the @samp{**=} operator.
+@end quotation
+
+@sidebar Syntactic Ambiguities Between @samp{/=} and Regular Expressions
+@cindex dark corner, regexp constants, @code{/=} operator and
+@cindex @code{/} (forward slash), @code{/=} operator, vs. @code{/=@dots{}/} regexp constant
+@cindex forward slash (@code{/}), @code{/=} operator, vs. @code{/=@dots{}/} regexp constant
+@cindex regexp constants, @code{/=@dots{}/}, @code{/=} operator and
+
+@c derived from email from "Nelson H. F. Beebe" <beebe@math.utah.edu>
+@c Date: Mon, 1 Sep 1997 13:38:35 -0600 (MDT)
+
+@cindex dark corner, @code{/=} operator vs. @code{/=@dots{}/} regexp constant
+@cindex ambiguity, syntactic: @code{/=} operator vs. @code{/=@dots{}/} regexp constant
+@cindex syntactic ambiguity: @code{/=} operator vs. @code{/=@dots{}/} regexp constant
+@cindex @code{/=} operator vs. @code{/=@dots{}/} regexp constant
+There is a syntactic ambiguity between the @code{/=} assignment
+operator and regexp constants whose first character is an @samp{=}.
+@value{DARKCORNER}
+This is most notable in some commercial @command{awk} versions.
+For example:
+
+@example
+$ @kbd{awk /==/ /dev/null}
+@error{} awk: syntax error at source line 1
+@error{} context is
+@error{} >>> /= <<<
+@error{} awk: bailing out at source line 1
+@end example
+
+@noindent
+A workaround is:
+
+@example
+awk '/[=]=/' /dev/null
+@end example
+
+@command{gawk} does not have this problem; BWK @command{awk}
+and @command{mawk} also do not.
+@end sidebar
+
+@node Increment Ops
+@subsection Increment and Decrement Operators
+
+@cindex increment operators
+@cindex operators, decrement/increment
+@dfn{Increment} and @dfn{decrement operators} increase or decrease the value of
+a variable by one. An assignment operator can do the same thing, so
+the increment operators add no power to the @command{awk} language; however, they
+are convenient abbreviations for very common operations.
+
+@cindex side effects
+@cindex @code{+} (plus sign), @code{++} operator
+@cindex plus sign (@code{+}), @code{++} operator
+@cindex side effects, decrement/increment operators
+The operator used for adding one is written @samp{++}. It can be used to increment
+a variable either before or after taking its value.
+To @dfn{pre-increment} a variable @code{v}, write @samp{++v}. This adds
+one to the value of @code{v}---that new value is also the value of the
+expression. (The assignment expression @samp{v += 1} is completely equivalent.)
+Writing the @samp{++} after the variable specifies @dfn{post-increment}. This
+increments the variable value just the same; the difference is that the
+value of the increment expression itself is the variable's @emph{old}
+value. Thus, if @code{foo} has the value four, then the expression @samp{foo++}
+has the value four, but it changes the value of @code{foo} to five.
+In other words, the operator returns the old value of the variable,
+but with the side effect of incrementing it.
+
+The post-increment @samp{foo++} is nearly the same as writing @samp{(foo
++= 1) - 1}. It is not perfectly equivalent because all numbers in
+@command{awk} are floating point---in floating point, @samp{foo + 1 - 1} does
+not necessarily equal @code{foo}. But the difference is minute as
+long as you stick to numbers that are fairly small (less than
+@iftex
+@math{10^{12}}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+10e12).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+10<superscript>12</superscript>). @c
+@end docbook
+
+@cindex @code{$} (dollar sign), incrementing fields and arrays
+@cindex dollar sign (@code{$}), incrementing fields and arrays
+Fields and array elements are incremented
+just like variables. (Use @samp{$(i++)} when you want to do a field reference
+and a variable increment at the same time. The parentheses are necessary
+because of the precedence of the field reference operator @samp{$}.)
+
+@cindex decrement operators
+The decrement operator @samp{--} works just like @samp{++}, except that
+it subtracts one instead of adding it. As with @samp{++}, it can be used before
+the lvalue to pre-decrement or after it to post-decrement.
+Following is a summary of increment and decrement expressions:
+
+@table @code
+@cindex @code{+} (plus sign), @code{++} operator
+@cindex plus sign (@code{+}), @code{++} operator
+@item ++@var{lvalue}
+Increment @var{lvalue}, returning the new value as the
+value of the expression.
+
+@item @var{lvalue}++
+Increment @var{lvalue}, returning the @emph{old} value of @var{lvalue}
+as the value of the expression.
+
+@cindex @code{-} (hyphen), @code{--} operator
+@cindex hyphen (@code{-}), @code{--} operator
+@item --@var{lvalue}
+Decrement @var{lvalue}, returning the new value as the
+value of the expression.
+(This expression is
+like @samp{++@var{lvalue}}, but instead of adding, it subtracts.)
+
+@item @var{lvalue}--
+Decrement @var{lvalue}, returning the @emph{old} value of @var{lvalue}
+as the value of the expression.
+(This expression is
+like @samp{@var{lvalue}++}, but instead of adding, it subtracts.)
+@end table
+
+@sidebar Operator Evaluation Order
+@cindex precedence
+@cindex operators, precedence
+@cindex portability, operators
+@cindex evaluation order
+@cindex Marx, Groucho
+@quotation
+@i{Doctor, doctor! It hurts when I do this!@*
+So don't do that!}
+@author Groucho Marx
+@end quotation
+
+@noindent
+What happens for something like the following?
+
+@example
+b = 6
+print b += b++
+@end example
+
+@noindent
+Or something even stranger?
+
+@example
+b = 6
+b += ++b + b++
+print b
+@end example
+
+@cindex side effects
+In other words, when do the various side effects prescribed by the
+postfix operators (@samp{b++}) take effect?
+When side effects happen is @dfn{implementation defined}.
+In other words, it is up to the particular version of @command{awk}.
+The result for the first example may be 12 or 13, and for the second, it
+may be 22 or 23.
+
+In short, doing things like this is not recommended and definitely
+not anything that you can rely upon for portability.
+You should avoid such things in your own programs.
+@c You'll sleep better at night and be able to look at yourself
+@c in the mirror in the morning.
+@end sidebar
+
+@node Truth Values and Conditions
+@section Truth Values and Conditions
+
+In certain contexts, expression values also serve as ``truth values''; (i.e.,
+they determine what should happen next as the program runs). This
+@value{SECTION} describes how @command{awk} defines ``true'' and ``false''
+and how values are compared.
+
+@menu
+* Truth Values:: What is ``true'' and what is ``false''.
+* Typing and Comparison:: How variables acquire types and how this
+ affects comparison of numbers and strings with
+ @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions using boolean
+ operators @samp{||} (``or''), @samp{&&}
+ (``and'') and @samp{!} (``not'').
+* Conditional Exp:: Conditional expressions select between two
+ subexpressions under control of a third
+ subexpression.
+@end menu
+
+@node Truth Values
+@subsection True and False in @command{awk}
+@cindex truth values
+@cindex logical false/true
+@cindex false, logical
+@cindex true, logical
+
+@cindex null strings
+Many programming languages have a special representation for the concepts
+of ``true'' and ``false.'' Such languages usually use the special
+constants @code{true} and @code{false}, or perhaps their uppercase
+equivalents.
+However, @command{awk} is different.
+It borrows a very simple concept of true and
+false from C. In @command{awk}, any nonzero numeric value @emph{or} any
+nonempty string value is true. Any other value (zero or the null
+string, @code{""}) is false. The following program prints @samp{A strange
+truth value} three times:
+
+@example
+BEGIN @{
+ if (3.1415927)
+ print "A strange truth value"
+ if ("Four Score And Seven Years Ago")
+ print "A strange truth value"
+ if (j = 57)
+ print "A strange truth value"
+@}
+@end example
+
+@cindex dark corner, @code{"0"} is actually true
+There is a surprising consequence of the ``nonzero or non-null'' rule:
+the string constant @code{"0"} is actually true, because it is non-null.
+@value{DARKCORNER}
+
+@node Typing and Comparison
+@subsection Variable Typing and Comparison Expressions
+@quotation
+@i{The Guide is definitive. Reality is frequently inaccurate.}
+@author Douglas Adams, @cite{The Hitchhiker's Guide to the Galaxy}
+@end quotation
+
+@cindex comparison expressions
+@cindex expressions, comparison
+@cindex expressions, matching, See comparison expressions
+@cindex matching, expressions, See comparison expressions
+@cindex relational operators, See comparison operators
+@cindex operators, relational, See operators@comma{} comparison
+@cindex variable typing
+@cindex variables, types of, comparison expressions and
+Unlike other programming languages, @command{awk} variables do not have a
+fixed type. Instead, they can be either a number or a string, depending
+upon the value that is assigned to them.
+We look now at how variables are typed, and how @command{awk}
+compares variables.
+
+@menu
+* Variable Typing:: String type versus numeric type.
+* Comparison Operators:: The comparison operators.
+* POSIX String Comparison:: String comparison with POSIX rules.
+@end menu
+
+@node Variable Typing
+@subsubsection String Type versus Numeric Type
+
+@cindex numeric, strings
+@cindex strings, numeric
+@cindex POSIX @command{awk}, numeric strings and
+The POSIX standard introduced
+the concept of a @dfn{numeric string}, which is simply a string that looks
+like a number---for example, @code{@w{" +2"}}. This concept is used
+for determining the type of a variable.
+The type of the variable is important because the types of two variables
+determine how they are compared.
+Variable typing follows these rules:
+
+
+@itemize @value{BULLET}
+@item
+A numeric constant or the result of a numeric operation has the @var{numeric}
+attribute.
+
+@item
+A string constant or the result of a string operation has the @var{string}
+attribute.
+
+@item
+Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
+@code{ENVIRON} elements, and the elements of an array created by
+@code{match()}, @code{split()}, and @code{patsplit()} that are numeric
+strings have the @var{strnum} attribute. Otherwise, they have
+the @var{string} attribute. Uninitialized variables also have the
+@var{strnum} attribute.
+
+@item
+Attributes propagate across assignments but are not changed by
+any use.
+@c (Although a use may cause the entity to acquire an additional
+@c value such that it has both a numeric and string value, this leaves the
+@c attribute unchanged.)
+@c This is important but not relevant
+@end itemize
+
+The last rule is particularly important. In the following program,
+@code{a} has numeric type, even though it is later used in a string
+operation:
+
+@example
+BEGIN @{
+ a = 12.345
+ b = a " is a cute number"
+ print b
+@}
+@end example
+
+When two operands are compared, either string comparison or numeric comparison
+may be used. This depends upon the attributes of the operands, according to the
+following symmetric matrix:
+
+@c thanks to Karl Berry, kb@cs.umb.edu, for major help with TeX tables
+@tex
+\centerline{
+\vbox{\bigskip % space above the table (about 1 linespace)
+% Because we have vertical rules, we can't let TeX insert interline space
+% in its usual way.
+\offinterlineskip
+%
+% Define the table template. & separates columns, and \cr ends the
+% template (and each row). # is replaced by the text of that entry on
+% each row. The template for the first column breaks down like this:
+% \strut -- a way to make each line have the height and depth
+% of a normal line of type, since we turned off interline spacing.
+% \hfil -- infinite glue; has the effect of right-justifying in this case.
+% # -- replaced by the text (for instance, `STRNUM', in the last row).
+% \quad -- about the width of an `M'. Just separates the columns.
+%
+% The second column (\vrule#) is what generates the vertical rule that
+% spans table rows.
+%
+% The doubled && before the next entry means `repeat the following
+% template as many times as necessary on each line' -- in our case, twice.
+%
+% The template itself, \quad#\hfil, left-justifies with a little space before.
+%
+\halign{\strut\hfil#\quad&\vrule#&&\quad#\hfil\cr
+ &&STRING &NUMERIC &STRNUM\cr
+% The \omit tells TeX to skip inserting the template for this column on
+% this particular row. In this case, we only want a little extra space
+% to separate the heading row from the rule below it. the depth 2pt --
+% `\vrule depth 2pt' is that little space.
+\omit &depth 2pt\cr
+% This is the horizontal rule below the heading. Since it has nothing to
+% do with the columns of the table, we use \noalign to get it in there.
+\noalign{\hrule}
+% Like above, this time a little more space.
+\omit &depth 4pt\cr
+% The remaining rows have nothing special about them.
+STRING &&string &string &string\cr
+NUMERIC &&string &numeric &numeric\cr
+STRNUM &&string &numeric &numeric\cr
+}}}
+@end tex
+@ifnottex
+@ifnotdocbook
+@display
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
+@end display
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<informaltable>
+<tgroup cols="4">
+<colspec colname="1" align="left"/>
+<colspec colname="2" align="left"/>
+<colspec colname="3" align="left"/>
+<colspec colname="4" align="left"/>
+<thead>
+<row>
+<entry/>
+<entry>STRING</entry>
+<entry>NUMERIC</entry>
+<entry>STRNUM</entry>
+</row>
+</thead>
+
+<tbody>
+<row>
+<entry><emphasis role="bold">STRING</emphasis></entry>
+<entry>string</entry>
+<entry>string</entry>
+<entry>string</entry>
+</row>
+
+<row>
+<entry><emphasis role="bold">NUMERIC</emphasis></entry>
+<entry>string</entry>
+<entry>numeric</entry>
+<entry>numeric</entry>
+</row>
+
+<row>
+<entry><emphasis role="bold">STRNUM</emphasis></entry>
+<entry>string</entry>
+<entry>numeric</entry>
+<entry>numeric</entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
+
+The basic idea is that user input that looks numeric---and @emph{only}
+user input---should be treated as numeric, even though it is actually
+made of characters and is therefore also a string.
+Thus, for example, the string constant @w{@code{" +3.14"}},
+when it appears in program source code,
+is a string---even though it looks numeric---and
+is @emph{never} treated as a number for comparison
+purposes.
+
+In short, when one operand is a ``pure'' string, such as a string
+constant, then a string comparison is performed. Otherwise, a
+numeric comparison is performed.
+
+This point bears additional emphasis: All user input is made of characters,
+and so is first and foremost of @var{string} type; input strings
+that look numeric are additionally given the @var{strnum} attribute.
+Thus, the six-character input string @w{@samp{ +3.14}} receives the
+@var{strnum} attribute. In contrast, the eight characters
+@w{@code{" +3.14"}} appearing in program text comprise a string constant.
+The following examples print @samp{1} when the comparison between
+the two different constants is true, @samp{0} otherwise:
+
+@c 22.9.2014: Tested with mawk and BWK awk, got same results.
+@example
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == " +3.14") @}'} @ii{True}
+@print{} 1
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "+3.14") @}'} @ii{False}
+@print{} 0
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "3.14") @}'} @ii{False}
+@print{} 0
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == 3.14) @}'} @ii{True}
+@print{} 1
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == " +3.14") @}'} @ii{False}
+@print{} 0
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "+3.14") @}'} @ii{True}
+@print{} 1
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "3.14") @}'} @ii{False}
+@print{} 0
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
+@print{} 1
+@end example
+
+@node Comparison Operators
+@subsubsection Comparison Operators
+
+@dfn{Comparison expressions} compare strings or numbers for
+relationships such as equality. They are written using @dfn{relational
+operators}, which are a superset of those in C.
+@ref{table-relational-ops} describes them.
+
+@cindex @code{<} (left angle bracket), @code{<} operator
+@cindex left angle bracket (@code{<}), @code{<} operator
+@cindex @code{<} (left angle bracket), @code{<=} operator
+@cindex left angle bracket (@code{<}), @code{<=} operator
+@cindex @code{>} (right angle bracket), @code{>=} operator
+@cindex right angle bracket (@code{>}), @code{>=} operator
+@cindex @code{>} (right angle bracket), @code{>} operator
+@cindex right angle bracket (@code{>}), @code{>} operator
+@cindex @code{=} (equals sign), @code{==} operator
+@cindex equals sign (@code{=}), @code{==} operator
+@cindex @code{!} (exclamation point), @code{!=} operator
+@cindex exclamation point (@code{!}), @code{!=} operator
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@cindex @code{in} operator
+@float Table,table-relational-ops
+@caption{Relational operators}
+@multitable @columnfractions .25 .75
+@headitem Expression @tab Result
+@item @var{x} @code{<} @var{y} @tab True if @var{x} is less than @var{y}
+@item @var{x} @code{<=} @var{y} @tab True if @var{x} is less than or equal to @var{y}
+@item @var{x} @code{>} @var{y} @tab True if @var{x} is greater than @var{y}
+@item @var{x} @code{>=} @var{y} @tab True if @var{x} is greater than or equal to @var{y}
+@item @var{x} @code{==} @var{y} @tab True if @var{x} is equal to @var{y}
+@item @var{x} @code{!=} @var{y} @tab True if @var{x} is not equal to @var{y}
+@item @var{x} @code{~} @var{y} @tab True if the string @var{x} matches the regexp denoted by @var{y}
+@item @var{x} @code{!~} @var{y} @tab True if the string @var{x} does not match the regexp denoted by @var{y}
+@item @var{subscript} @code{in} @var{array} @tab True if the array @var{array} has an element with the subscript @var{subscript}
+@end multitable
+@end float
+
+Comparison expressions have the value one if true and zero if false.
+When comparing operands of mixed types, numeric operands are converted
+to strings using the value of @code{CONVFMT}
+(@pxref{Conversion}).
+
+Strings are compared
+by comparing the first character of each, then the second character of each,
+and so on. Thus, @code{"10"} is less than @code{"9"}. If there are two
+strings where one is a prefix of the other, the shorter string is less than
+the longer one. Thus, @code{"abc"} is less than @code{"abcd"}.
+
+@cindex troubleshooting, @code{==} operator
+It is very easy to accidentally mistype the @samp{==} operator and
+leave off one of the @samp{=} characters. The result is still valid
+@command{awk} code, but the program does not do what is intended:
+
+@example
+if (a = b) # oops! should be a == b
+ @dots{}
+else
+ @dots{}
+@end example
+
+@noindent
+Unless @code{b} happens to be zero or the null string, the @code{if}
+part of the test always succeeds. Because the operators are
+so similar, this kind of error is very difficult to spot when
+scanning the source code.
+
+The following list of expressions illustrates the kinds of comparisons
+@command{awk} performs, as well as what the result of each comparison is:
+
+@table @code
+@item 1.5 <= 2.0
+Numeric comparison (true)
+
+@item "abc" >= "xyz"
+String comparison (false)
+
+@item 1.5 != " +2"
+String comparison (true)
+
+@item "1e2" < "3"
+String comparison (true)
+
+@item a = 2; b = "2"
+@itemx a == b
+String comparison (true)
+
+@item a = 2; b = " +2"
+@itemx a == b
+String comparison (false)
+@end table
+
+In this example:
+
+@example
+$ @kbd{echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'}
+@print{} false
+@end example
+
+@cindex comparison expressions, string vs.@: regexp
+@c @cindex string comparison vs.@: regexp comparison
+@c @cindex regexp comparison vs.@: string comparison
+@noindent
+the result is @samp{false} because both @code{$1} and @code{$2}
+are user input. They are numeric strings---therefore both have
+the @var{strnum} attribute, dictating a numeric comparison.
+The purpose of the comparison rules and the use of numeric strings is
+to attempt to produce the behavior that is ``least surprising,'' while
+still ``doing the right thing.''
+
+String comparisons and regular expression comparisons are very different.
+For example:
+
+@example
+x == "foo"
+@end example
+
+@noindent
+has the value one, or is true if the variable @code{x}
+is precisely @samp{foo}. By contrast:
+
+@example
+x ~ /foo/
+@end example
+
+@noindent
+has the value one if @code{x} contains @samp{foo}, such as
+@code{"Oh, what a fool am I!"}.
+
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+The righthand operand of the @samp{~} and @samp{!~} operators may be
+either a regexp constant (@code{/}@dots{}@code{/}) or an ordinary
+expression. In the latter case, the value of the expression as a string is used as a
+dynamic regexp (@pxref{Regexp Usage}; also
+@pxref{Computed Regexps}).
+
+@cindex @command{awk}, regexp constants and
+@cindex regexp constants
+A constant regular
+expression in slashes by itself is also an expression.
+@code{/@var{regexp}/} is an abbreviation for the following comparison expression:
+
+@example
+$0 ~ /@var{regexp}/
+@end example
+
+One special place where @code{/foo/} is @emph{not} an abbreviation for
+@samp{$0 ~ /foo/} is when it is the righthand operand of @samp{~} or
+@samp{!~}.
+@xref{Using Constant Regexps},
+where this is discussed in more detail.
+
+@node POSIX String Comparison
+@subsubsection String Comparison with POSIX Rules
+
+The POSIX standard says that string comparison is performed based
+on the locale's @dfn{collating order}. This is the order in which
+characters sort, as defined by the locale (for more discussion,
+@pxref{Locales}). This order is usually very different
+from the results obtained when doing straight character-by-character
+comparison.@footnote{Technically, string comparison is supposed
+to behave the same way as if the strings are compared with the C
+@code{strcoll()} function.}
+
+Because this behavior differs considerably from existing practice,
+@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
+locale:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf("ABC < abc = %s\n",}
+> @kbd{("ABC" < "abc" ? "TRUE" : "FALSE")) @}'}
+@print{} ABC < abc = TRUE
+$ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
+> @kbd{("ABC" < "abc" ? "TRUE" : "FALSE")) @}'}
+@print{} ABC < abc = FALSE
+@end example
+
+
+@node Boolean Ops
+@subsection Boolean Expressions
+@cindex and Boolean-logic operator
+@cindex or Boolean-logic operator
+@cindex not Boolean-logic operator
+@cindex expressions, Boolean
+@cindex Boolean expressions
+@cindex operators, Boolean, See Boolean expressions
+@cindex Boolean operators, See Boolean expressions
+@cindex logical operators, See Boolean expressions
+@cindex operators, logical, See Boolean expressions
+
+A @dfn{Boolean expression} is a combination of comparison expressions or
+matching expressions, using the Boolean operators ``or''
+(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
+parentheses to control nesting. The truth value of the Boolean expression is
+computed by combining the truth values of the component expressions.
+Boolean expressions are also referred to as @dfn{logical expressions}.
+The terms are equivalent.
+
+Boolean expressions can be used wherever comparison and matching
+expressions can be used. They can be used in @code{if}, @code{while},
+@code{do}, and @code{for} statements
+(@pxref{Statements}).
+They have numeric values (one if true, zero if false) that come into play
+if the result of the Boolean expression is stored in a variable or
+used in arithmetic.
+
+In addition, every Boolean expression is also a valid pattern, so
+you can use one as a pattern to control the execution of rules.
+The Boolean operators are:
+
+@table @code
+@item @var{boolean1} && @var{boolean2}
+True if both @var{boolean1} and @var{boolean2} are true. For example,
+the following statement prints the current input record if it contains
+both @samp{edu} and @samp{li}:
+
+@example
+if ($0 ~ /edu/ && $0 ~ /li/) print
+@end example
+
+@cindex side effects, Boolean operators
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is true. This can make a difference when @var{boolean2} contains
+expressions that have side effects. In the case of @samp{$0 ~ /foo/ &&
+($2 == bar++)}, the variable @code{bar} is not incremented if there is
+no substring @samp{foo} in the record.
+
+@item @var{boolean1} || @var{boolean2}
+True if at least one of @var{boolean1} or @var{boolean2} is true.
+For example, the following statement prints all records in the input
+that contain @emph{either} @samp{edu} or
+@samp{li}:
+
+@example
+if ($0 ~ /edu/ || $0 ~ /li/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is false. This can make a difference when @var{boolean2} contains
+expressions that have side effects.
+(Thus, this test never really distinguishes records that contain both
+@samp{edu} and @samp{li}---as soon as @samp{edu} is matched,
+the full test succeeds.)
+
+@item ! @var{boolean}
+True if @var{boolean} is false. For example,
+the following program prints @samp{no home!} in
+the unusual event that the @env{HOME} environment
+variable is not defined:
+
+@example
+BEGIN @{ if (! ("HOME" in ENVIRON))
+ print "no home!" @}
+@end example
+
+(The @code{in} operator is described in
+@ref{Reference to Elements}.)
+@end table
+
+@cindex short-circuit operators
+@cindex operators, short-circuit
+@cindex @code{&} (ampersand), @code{&&} operator
+@cindex ampersand (@code{&}), @code{&&} operator
+@cindex @code{|} (vertical bar), @code{||} operator
+@cindex vertical bar (@code{|}), @code{||} operator
+The @samp{&&} and @samp{||} operators are called @dfn{short-circuit}
+operators because of the way they work. Evaluation of the full expression
+is ``short-circuited'' if the result can be determined part way through
+its evaluation.
+
+@cindex line continuations
+Statements that end with @samp{&&} or @samp{||} can be continued simply
+by putting a newline after them. But you cannot put a newline in front
+of either of these operators without using backslash continuation
+(@pxref{Statements/Lines}).
+
+@cindex @code{!} (exclamation point), @code{!} operator
+@cindex exclamation point (@code{!}), @code{!} operator
+@cindex newlines
+@cindex variables, flag
+@cindex flag variables
+The actual value of an expression using the @samp{!} operator is
+either one or zero, depending upon the truth value of the expression it
+is applied to.
+The @samp{!} operator is often useful for changing the sense of a flag
+variable from false to true and back again. For example, the following
+program is one way to print lines in between special bracketing lines:
+
+@example
+$1 == "START" @{ interested = ! interested; next @}
+interested @{ print @}
+$1 == "END" @{ interested = ! interested; next @}
+@end example
+
+@noindent
+The variable @code{interested}, as with all @command{awk} variables, starts
+out initialized to zero, which is also false. When a line is seen whose
+first field is @samp{START}, the value of @code{interested} is toggled
+to true, using @samp{!}. The next rule prints lines as long as
+@code{interested} is true. When a line is seen whose first field is
+@samp{END}, @code{interested} is toggled back to false.@footnote{This
+program has a bug; it prints lines starting with @samp{END}. How
+would you fix it?}
+
+@ignore
+Scott Deifik points out that this program isn't robust against
+bogus input data, but the point is to illustrate the use of `!',
+so we'll leave well enough alone.
+@end ignore
+
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
+@cindex @code{next} statement
+@quotation NOTE
+The @code{next} statement is discussed in
+@ref{Next Statement}.
+@code{next} tells @command{awk} to skip the rest of the rules, get the
+next record, and start processing the rules over again at the top.
+The reason it's there is to avoid printing the bracketing
+@samp{START} and @samp{END} lines.
+@end quotation
+
+@node Conditional Exp
+@subsection Conditional Expressions
+@cindex conditional expressions
+@cindex expressions, conditional
+@cindex expressions, selecting
+
+A @dfn{conditional expression} is a special kind of expression that has
+three operands. It allows you to use one expression's value to select
+one of two other expressions.
+The conditional expression is the same as in the C language,
+as shown here:
+
+@example
+@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
+@end example
+
+@noindent
+There are three subexpressions. The first, @var{selector}, is always
+computed first. If it is ``true'' (not zero or not null), then
+@var{if-true-exp} is computed next and its value becomes the value of
+the whole expression. Otherwise, @var{if-false-exp} is computed next
+and its value becomes the value of the whole expression.
+For example, the following expression produces the absolute value of @code{x}:
+
+@example
+x >= 0 ? x : -x
+@end example
+
+@cindex side effects, conditional expressions
+Each time the conditional expression is computed, only one of
+@var{if-true-exp} and @var{if-false-exp} is used; the other is ignored.
+This is important when the expressions have side effects. For example,
+this conditional expression examines element @code{i} of either array
+@code{a} or array @code{b}, and increments @code{i}:
+
+@example
+x == y ? a[i++] : b[i++]
+@end example
+
+@noindent
+This is guaranteed to increment @code{i} exactly once, because each time
+only one of the two increment expressions is executed
+and the other is not.
+@xref{Arrays},
+for more information about arrays.
+
+@cindex differences in @command{awk} and @command{gawk}, line continuations
+@cindex line continuations, @command{gawk}
+@cindex @command{gawk}, line continuation in
+As a minor @command{gawk} extension,
+a statement that uses @samp{?:} can be continued simply
+by putting a newline after either character.
+However, putting a newline in front
+of either character does not work without using backslash continuation
+(@pxref{Statements/Lines}).
+If @option{--posix} is specified
+(@pxref{Options}), this extension is disabled.
+
+@node Function Calls
+@section Function Calls
+@cindex function calls
+
+A @dfn{function} is a name for a particular calculation.
+This enables you to
+ask for it by name at any point in the program. For
+example, the function @code{sqrt()} computes the square root of a number.
+
+@cindex functions, built-in
+A fixed set of functions are @dfn{built-in}, which means they are
+available in every @command{awk} program. The @code{sqrt()} function is one
+of these. @DBXREF{Built-in} for a list of built-in
+functions and their descriptions. In addition, you can define
+functions for use in your program.
+@DBXREF{User-defined}
+for instructions on how to do this.
+Finally, @command{gawk} lets you write functions in C or C++
+that may be called from your program (@pxref{Dynamic Extensions}).
+
+@cindex arguments, in function calls
+The way to use a function is with a @dfn{function call} expression,
+which consists of the function name followed immediately by a list of
+@dfn{arguments} in parentheses. The arguments are expressions that
+provide the raw materials for the function's calculations.
+When there is more than one argument, they are separated by commas. If
+there are no arguments, just write @samp{()} after the function name.
+The following examples show function calls with and without arguments:
+
+@example
+sqrt(x^2 + y^2) @ii{one argument}
+atan2(y, x) @ii{two arguments}
+rand() @ii{no arguments}
+@end example
+
+@cindex troubleshooting, function call syntax
+@quotation CAUTION
+Do not put any space between the function name and the opening parenthesis!
+A user-defined function name looks just like the name of a
+variable---a space would make the expression look like concatenation of
+a variable with an expression inside parentheses.
+With built-in functions, space before the parenthesis is harmless, but
+it is best not to get into the habit of using space to avoid mistakes
+with user-defined functions.
+@end quotation
+
+Each function expects a particular number
+of arguments. For example, the @code{sqrt()} function must be called with
+a single argument, the number of which to take the square root:
+
+@example
+sqrt(@var{argument})
+@end example
+
+Some of the built-in functions have one or
+more optional arguments.
+If those arguments are not supplied, the functions
+use a reasonable default value.
+@DBXREF{Built-in} for full details. If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables. Such local variables act like the
+empty string if referenced where a string value is required,
+and like zero if referenced where a numeric value is required
+(@pxref{User-defined}).
+
+As an advanced feature, @command{gawk} provides indirect function calls,
+which is a way to choose the function to call at runtime, instead of
+when you write the source code to your program. We defer discussion of
+this feature until later; see @ref{Indirect Calls}.
+
+@cindex side effects, function calls
+Like every other expression, the function call has a value, often
+called the @dfn{return value}, which is computed by the function
+based on the arguments you give it. In this example, the return value
+of @samp{sqrt(@var{argument})} is the square root of @var{argument}.
+The following program reads numbers, one number per line, and prints
+the square root of each one:
+
+@example
+$ @kbd{awk '@{ print "The square root of", $1, "is", sqrt($1) @}'}
+@kbd{1}
+@print{} The square root of 1 is 1
+@kbd{3}
+@print{} The square root of 3 is 1.73205
+@kbd{5}
+@print{} The square root of 5 is 2.23607
+@kbd{Ctrl-d}
+@end example
+
+A function can also have side effects, such as assigning
+values to certain variables or doing I/O.
+This program shows how the @code{match()} function
+(@pxref{String Functions})
+changes the variables @code{RSTART} and @code{RLENGTH}:
+
+@example
+@{
+ if (match($1, $2))
+ print RSTART, RLENGTH
+ else
+ print "no match"
+@}
+@end example
+
+@noindent
+Here is a sample run:
+
+@example
+$ @kbd{awk -f matchit.awk}
+@kbd{aaccdd c+}
+@print{} 3 2
+@kbd{foo bar}
+@print{} no match
+@kbd{abcdefg e}
+@print{} 5 1
+@end example
+
+@node Precedence
+@section Operator Precedence (How Operators Nest)
+@cindex precedence
+@cindex operators, precedence
+
+@dfn{Operator precedence} determines how operators are grouped when
+different operators appear close by in one expression. For example,
+@samp{*} has higher precedence than @samp{+}; thus, @samp{a + b * c}
+means to multiply @code{b} and @code{c}, and then add @code{a} to the
+product (i.e., @samp{a + (b * c)}).
+
+The normal precedence of the operators can be overruled by using parentheses.
+Think of the precedence rules as saying where the
+parentheses are assumed to be. In
+fact, it is wise to always use parentheses whenever there is an unusual
+combination of operators, because other people who read the program may
+not remember what the precedence is in this case.
+Even experienced programmers occasionally forget the exact rules,
+which leads to mistakes.
+Explicit parentheses help prevent
+any such mistakes.
+
+When operators of equal precedence are used together, the leftmost
+operator groups first, except for the assignment, conditional, and
+exponentiation operators, which group in the opposite order.
+Thus, @samp{a - b + c} groups as @samp{(a - b) + c} and
+@samp{a = b = c} groups as @samp{a = (b = c)}.
+
+Normally the precedence of prefix unary operators does not matter,
+because there is only one way to interpret
+them: innermost first. Thus, @samp{$++i} means @samp{$(++i)} and
+@samp{++$x} means @samp{++($x)}. However, when another operator follows
+the operand, then the precedence of the unary operators can matter.
+@samp{$x^2} means @samp{($x)^2}, but @samp{-x^2} means
+@samp{-(x^2)}, because @samp{-} has lower precedence than @samp{^},
+whereas @samp{$} has higher precedence.
+Also, operators cannot be combined in a way that violates the
+precedence rules; for example, @samp{$$0++--} is not a valid
+expression because the first @samp{$} has higher precedence than the
+@samp{++}; to avoid the problem the expression can be rewritten as
+@samp{$($0++)--}.
+
+This list presents @command{awk}'s operators, in order of highest
+to lowest precedence:
+
+@c @asis for docbook to come out right
+@table @asis
+@item @code{(}@dots{}@code{)}
+Grouping.
+
+@cindex @code{$} (dollar sign), @code{$} field operator
+@cindex dollar sign (@code{$}), @code{$} field operator
+@item @code{$}
+Field reference.
+
+@cindex @code{+} (plus sign), @code{++} operator
+@cindex plus sign (@code{+}), @code{++} operator
+@cindex @code{-} (hyphen), @code{--} operator
+@cindex hyphen (@code{-}), @code{--} operator
+@item @code{++ --}
+Increment, decrement.
+
+@cindex @code{^} (caret), @code{^} operator
+@cindex caret (@code{^}), @code{^} operator
+@cindex @code{*} (asterisk), @code{**} operator
+@cindex asterisk (@code{*}), @code{**} operator
+@item @code{^ **}
+Exponentiation. These operators group right-to-left.
+
+@cindex @code{+} (plus sign), @code{+} operator
+@cindex plus sign (@code{+}), @code{+} operator
+@cindex @code{-} (hyphen), @code{-} operator
+@cindex hyphen (@code{-}), @code{-} operator
+@cindex @code{!} (exclamation point), @code{!} operator
+@cindex exclamation point (@code{!}), @code{!} operator
+@item @code{+ - !}
+Unary plus, minus, logical ``not.''
+
+@cindex @code{*} (asterisk), @code{*} operator, as multiplication operator
+@cindex asterisk (@code{*}), @code{*} operator, as multiplication operator
+@cindex @code{/} (forward slash), @code{/} operator
+@cindex forward slash (@code{/}), @code{/} operator
+@cindex @code{%} (percent sign), @code{%} operator
+@cindex percent sign (@code{%}), @code{%} operator
+@item @code{* / %}
+Multiplication, division, remainder.
+
+@cindex @code{+} (plus sign), @code{+} operator
+@cindex plus sign (@code{+}), @code{+} operator
+@cindex @code{-} (hyphen), @code{-} operator
+@cindex hyphen (@code{-}), @code{-} operator
+@item @code{+ -}
+Addition, subtraction.
+
+@item String concatenation
+There is no special symbol for concatenation.
+The operands are simply written side by side
+(@pxref{Concatenation}).
+
+@cindex @code{<} (left angle bracket), @code{<} operator
+@cindex left angle bracket (@code{<}), @code{<} operator
+@cindex @code{<} (left angle bracket), @code{<=} operator
+@cindex left angle bracket (@code{<}), @code{<=} operator
+@cindex @code{>} (right angle bracket), @code{>=} operator
+@cindex right angle bracket (@code{>}), @code{>=} operator
+@cindex @code{>} (right angle bracket), @code{>} operator
+@cindex right angle bracket (@code{>}), @code{>} operator
+@cindex @code{=} (equals sign), @code{==} operator
+@cindex equals sign (@code{=}), @code{==} operator
+@cindex @code{!} (exclamation point), @code{!=} operator
+@cindex exclamation point (@code{!}), @code{!=} operator
+@cindex @code{>} (right angle bracket), @code{>>} operator (I/O)
+@cindex right angle bracket (@code{>}), @code{>>} operator (I/O)
+@cindex operators, input/output
+@cindex @code{|} (vertical bar), @code{|} operator (I/O)
+@cindex vertical bar (@code{|}), @code{|} operator (I/O)
+@cindex operators, input/output
+@cindex @code{|} (vertical bar), @code{|&} operator (I/O)
+@cindex vertical bar (@code{|}), @code{|&} operator (I/O)
+@cindex operators, input/output
+@item @code{< <= == != > >= >> | |&}
+Relational and redirection.
+The relational operators and the redirections have the same precedence
+level. Characters such as @samp{>} serve both as relationals and as
+redirections; the context distinguishes between the two meanings.
+
+@cindex @code{print} statement, I/O operators in
+@cindex @code{printf} statement, I/O operators in
+Note that the I/O redirection operators in @code{print} and @code{printf}
+statements belong to the statement level, not to expressions. The
+redirection does not produce an expression that could be the operand of
+another operator. As a result, it does not make sense to use a
+redirection operator near another operator of lower precedence without
+parentheses. Such combinations (e.g., @samp{print foo > a ? b : c}),
+result in syntax errors.
+The correct way to write this statement is @samp{print foo > (a ? b : c)}.
+
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@item @code{~ !~}
+Matching, nonmatching.
+
+@cindex @code{in} operator
+@item @code{in}
+Array membership.
+
+@cindex @code{&} (ampersand), @code{&&} operator
+@cindex ampersand (@code{&}), @code{&&} operator
+@item @code{&&}
+Logical ``and''.
+
+@cindex @code{|} (vertical bar), @code{||} operator
+@cindex vertical bar (@code{|}), @code{||} operator
+@item @code{||}
+Logical ``or''.
+
+@cindex @code{?} (question mark), @code{?:} operator
+@cindex question mark (@code{?}), @code{?:} operator
+@item @code{?:}
+Conditional. This operator groups right-to-left.
+
+@cindex @code{+} (plus sign), @code{+=} operator
+@cindex plus sign (@code{+}), @code{+=} operator
+@cindex @code{-} (hyphen), @code{-=} operator
+@cindex hyphen (@code{-}), @code{-=} operator
+@cindex @code{*} (asterisk), @code{*=} operator
+@cindex asterisk (@code{*}), @code{*=} operator
+@cindex @code{*} (asterisk), @code{**=} operator
+@cindex asterisk (@code{*}), @code{**=} operator
+@cindex @code{/} (forward slash), @code{/=} operator
+@cindex forward slash (@code{/}), @code{/=} operator
+@cindex @code{%} (percent sign), @code{%=} operator
+@cindex percent sign (@code{%}), @code{%=} operator
+@cindex @code{^} (caret), @code{^=} operator
+@cindex caret (@code{^}), @code{^=} operator
+@item @code{= += -= *= /= %= ^= **=}
+Assignment. These operators group right-to-left.
+@end table
+
+@cindex POSIX @command{awk}, @code{**} operator and
+@cindex portability, operators, not in POSIX @command{awk}
+@quotation NOTE
+The @samp{|&}, @samp{**}, and @samp{**=} operators are not specified by POSIX.
+For maximum portability, do not use them.
+@end quotation
+
+@node Locales
+@section Where You Are Makes a Difference
+@cindex locale, definition of
+
+Modern systems support the notion of @dfn{locales}: a way to tell the
+system about the local character set and language. The ISO C standard
+defines a default @code{"C"} locale, which is an environment that is
+typical of what many C programmers are used to.
+
+Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (@pxref{Ranges and Locales}).
+
+Locales can affect record splitting. For the normal case of @samp{RS =
+"\n"}, the locale is largely irrelevant. For other single-character
+record separators, setting @samp{LC_ALL=C} in the environment will
+give you much better performance when reading records. Otherwise,
+@command{gawk} has to make several function calls, @emph{per input
+character}, to find the record terminator.
+
+Locales can affect how dates and times are formatted (@pxref{Time
+Functions}). For example, a common way to abbreviate the date September
+4, 2015, in the United States is ``9/4/15.'' In many countries in
+Europe, however, it is abbreviated ``4.9.15.'' Thus, the @samp{%x}
+specification in a @code{"US"} locale might produce @samp{9/4/15},
+while in a @code{"EUROPE"} locale, it might produce @samp{4.9.15}.
+
+According to POSIX, string comparison is also affected by locales (similar
+to regular expressions). The details are presented in @ref{POSIX String
+Comparison}.
+
+Finally, the locale affects the value of the decimal point character
+used when @command{gawk} parses input data. This is discussed in detail
+in @ref{Conversion}.
+
+@node Expressions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Expressions are the basic elements of computation in programs. They are
+built from constants, variables, function calls, and combinations of the
+various kinds of values with operators.
+
+@item
+@command{awk} supplies three kinds of constants: numeric, string, and
+regexp. @command{gawk} lets you specify numeric constants in octal
+and hexadecimal (bases 8 and 16) as well as decimal (base 10).
+In certain contexts, a standalone regexp constant such as @code{/foo/}
+has the same meaning as @samp{$0 ~ /foo/}.
+
+@item
+Variables hold values between uses in computations. A number of built-in
+variables provide information to your @command{awk} program, and a number
+of others let you control how @command{awk} behaves.
+
+@item
+Numbers are automatically converted to strings, and strings to numbers,
+as needed by @command{awk}. Numeric values are converted as if they were
+formatted with @code{sprintf()} using the format in @code{CONVFMT}.
+Locales can influence the conversions.
+
+@item
+@command{awk} provides the usual arithmetic operators (addition,
+subtraction, multiplication, division, modulus), and unary plus and minus.
+It also provides comparison operators, boolean operators, array membership
+testing, and regexp
+matching operators. String concatenation is accomplished by placing
+two expressions next to each other; there is no explicit operator.
+The three-operand @samp{?:} operator provides an ``if-else'' test within
+expressions.
+
+@item
+Assignment operators provide convenient shorthands for common arithmetic
+operations.
+
+@item
+In @command{awk}, a value is considered to be true if it is non-zero
+@emph{or} non-null. Otherwise, the value is false.
+
+@item
+A variable's type is set upon each assignment and may change over its
+lifetime. The type determines how it behaves in comparisons (string
+or numeric).
+
+@item
+Function calls return a value which may be used as part of a larger
+expression. Expressions used to pass parameter values are fully
+evaluated before the function is called. @command{awk} provides
+built-in and user-defined functions; this is described in
+@ref{Functions}.
+
+@item
+Operator precedence specifies the order in which operations are performed,
+unless explicitly overridden by parentheses. @command{awk}'s operator
+precedence is compatible with that of C.
+
+@item
+Locales can affect the format of data as output by an @command{awk}
+program, and occasionally the format for data read as input.
+
+@end itemize
+
+
+@node Patterns and Actions
+@chapter Patterns, Actions, and Variables
+@cindex patterns
+
+As you have already seen, each @command{awk} statement consists of
+a pattern with an associated action. This @value{CHAPTER} describes how
+you build patterns and actions, what kinds of things you can do within
+actions, and @command{awk}'s predefined variables.
+
+The pattern-action rules and the statements available for use
+within actions form the core of @command{awk} programming.
+In a sense, everything covered
+up to here has been the foundation
+that programs are built on top of. Now it's time to start
+building something useful.
+
+@menu
+* Pattern Overview:: What goes into a pattern.
+* Using Shell Variables:: How to use shell variables with @command{awk}.
+* Action Overview:: What goes into an action.
+* Statements:: Describes the various control statements in
+ detail.
+* Built-in Variables:: Summarizes the predefined variables.
+* Pattern Action Summary:: Patterns and Actions summary.
+@end menu
+
+@node Pattern Overview
+@section Pattern Elements
+
+@menu
+* Regexp Patterns:: Using regexps as patterns.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* BEGINFILE/ENDFILE:: Two special patterns for advanced control.
+* Empty:: The empty pattern, which matches every record.
+@end menu
+
+@cindex patterns, types of
+Patterns in @command{awk} control the execution of rules---a rule is
+executed when its pattern matches the current input record.
+The following is a summary of the types of @command{awk} patterns:
+
+@table @code
+@item /@var{regular expression}/
+A regular expression. It matches when the text of the
+input record fits the regular expression.
+(@xref{Regexp}.)
+
+@item @var{expression}
+A single expression. It matches when its value
+is nonzero (if a number) or non-null (if a string).
+(@xref{Expression Patterns}.)
+
+@item @var{begpat}, @var{endpat}
+A pair of patterns separated by a comma, specifying a @dfn{range} of records.
+The range includes both the initial record that matches @var{begpat} and
+the final record that matches @var{endpat}.
+(@xref{Ranges}.)
+
+@item BEGIN
+@itemx END
+Special patterns for you to supply startup or cleanup actions for your
+@command{awk} program.
+(@xref{BEGIN/END}.)
+
+@item BEGINFILE
+@itemx ENDFILE
+Special patterns for you to supply startup or cleanup actions to be
+done on a per-file basis.
+(@xref{BEGINFILE/ENDFILE}.)
+
+@item @var{empty}
+The empty pattern matches every input record.
+(@xref{Empty}.)
+@end table
+
+@node Regexp Patterns
+@subsection Regular Expressions as Patterns
+@cindex patterns, expressions as
+@cindex regular expressions, as patterns
+
+Regular expressions are one of the first kinds of patterns presented
+in this book.
+This kind of pattern is simply a regexp constant in the pattern part of
+a rule. Its meaning is @samp{$0 ~ /@var{pattern}/}.
+The pattern matches when the input record matches the regexp.
+For example:
+
+@example
+/foo|bar|baz/ @{ buzzwords++ @}
+END @{ print buzzwords, "buzzwords seen" @}
+@end example
+
+@node Expression Patterns
+@subsection Expressions as Patterns
+@cindex expressions, as patterns
+
+Any @command{awk} expression is valid as an @command{awk} pattern.
+The pattern matches if the expression's value is nonzero (if a
+number) or non-null (if a string).
+The expression is reevaluated each time the rule is tested against a new
+input record. If the expression uses fields such as @code{$1}, the
+value depends directly on the new input record's text; otherwise, it
+depends on only what has happened so far in the execution of the
+@command{awk} program.
+
+@cindex comparison expressions, as patterns
+@cindex patterns, comparison expressions as
+Comparison expressions, using the comparison operators described in
+@ref{Typing and Comparison},
+are a very common kind of pattern.
+Regexp matching and nonmatching are also very common expressions.
+The left operand of the @samp{~} and @samp{!~} operators is a string.
+The right operand is either a constant regular expression enclosed in
+slashes (@code{/@var{regexp}/}), or any expression whose string value
+is used as a dynamic regular expression
+(@pxref{Computed Regexps}).
+The following example prints the second field of each input record
+whose first field is precisely @samp{li}:
+
+@cindex @code{/} (forward slash), patterns and
+@cindex forward slash (@code{/}), patterns and
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@example
+$ @kbd{awk '$1 == "li" @{ print $2 @}' mail-list}
+@end example
+
+@noindent
+(There is no output, because there is no person with the exact name @samp{li}.)
+Contrast this with the following regular expression match, which
+accepts any record with a first field that contains @samp{li}:
+
+@example
+$ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
+@print{} 555-5553
+@print{} 555-6699
+@end example
+
+@cindex regexp constants, as patterns
+@cindex patterns, regexp constants as
+pattern. The expression @code{/li/} has the value one if @samp{li}
+appears in the current input record. Thus, as a pattern, @code{/li/}
+matches any record containing @samp{li}.
+
+@cindex Boolean expressions, as patterns
+Boolean expressions are also commonly used as patterns.
+Whether the pattern
+matches an input record depends on whether its subexpressions match.
+For example, the following command prints all the records in
+@file{mail-list} that contain both @samp{edu} and @samp{li}:
+
+@example
+$ @kbd{awk '/edu/ && /li/' mail-list}
+@print{} Samuel 555-3430 samuel.lanceolis@@shu.edu A
+@end example
+
+The following command prints all records in
+@file{mail-list} that contain @emph{either} @samp{edu} or @samp{li}
+(or both, of course):
+
+@example
+$ @kbd{awk '/edu/ || /li/' mail-list}
+@print{} Amelia 555-5553 amelia.zodiacusque@@gmail.com F
+@print{} Broderick 555-0542 broderick.aliquotiens@@yahoo.com R
+@print{} Fabius 555-1234 fabius.undevicesimus@@ucb.edu F
+@print{} Julie 555-6699 julie.perscrutabor@@skeeve.com F
+@print{} Samuel 555-3430 samuel.lanceolis@@shu.edu A
+@print{} Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
+@end example
+
+The following command prints all records in
+@file{mail-list} that do @emph{not} contain the string @samp{li}:
+
+@example
+$ @kbd{awk '! /li/' mail-list}
+@print{} Anthony 555-3412 anthony.asserturo@@hotmail.com A
+@print{} Becky 555-7685 becky.algebrarum@@gmail.com A
+@print{} Bill 555-1675 bill.drowning@@hotmail.com A
+@print{} Camilla 555-2912 camilla.infusarum@@skynet.be R
+@print{} Fabius 555-1234 fabius.undevicesimus@@ucb.edu F
+@print{} Martin 555-6480 martin.codicibus@@hotmail.com A
+@print{} Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
+@end example
+
+@cindex @code{BEGIN} pattern, Boolean patterns and
+@cindex @code{END} pattern, Boolean patterns and
+@cindex @code{BEGINFILE} pattern, Boolean patterns and
+@cindex @code{ENDFILE} pattern, Boolean patterns and
+The subexpressions of a Boolean operator in a pattern can be constant regular
+expressions, comparisons, or any other @command{awk} expressions. Range
+patterns are not expressions, so they cannot appear inside Boolean
+patterns. Likewise, the special patterns @code{BEGIN}, @code{END},
+@code{BEGINFILE}, and @code{ENDFILE},
+which never match any input record, are not expressions and cannot
+appear inside Boolean patterns.
+
+The precedence of the different operators which can appear in
+patterns is described in @ref{Precedence}.
+
+@node Ranges
+@subsection Specifying Record Ranges with Patterns
+
+@cindex range patterns
+@cindex patterns, ranges in
+@cindex lines, matching ranges of
+@cindex @code{,} (comma), in range patterns
+@cindex comma (@code{,}), in range patterns
+A @dfn{range pattern} is made of two patterns separated by a comma, in
+the form @samp{@var{begpat}, @var{endpat}}. It is used to match ranges of
+consecutive input records. The first pattern, @var{begpat}, controls
+where the range begins, while @var{endpat} controls where
+the pattern ends. For example, the following:
+
+@example
+awk '$1 == "on", $1 == "off"' myfile
+@end example
+
+@noindent
+prints every record in @file{myfile} between @samp{on}/@samp{off} pairs, inclusive.
+
+A range pattern starts out by matching @var{begpat} against every
+input record. When a record matches @var{begpat}, the range pattern is
+@dfn{turned on} and the range pattern matches this record as well. As long as
+the range pattern stays turned on, it automatically matches every input
+record read. The range pattern also matches @var{endpat} against every
+input record; when this succeeds, the range pattern is @dfn{turned off} again
+for the following record. Then the range pattern goes back to checking
+@var{begpat} against each record.
+
+@cindex @code{if} statement, actions@comma{} changing
+The record that turns on the range pattern and the one that turns it
+off both match the range pattern. If you don't want to operate on
+these records, you can write @code{if} statements in the rule's action
+to distinguish them from the records you are interested in.
+
+It is possible for a pattern to be turned on and off by the same
+record. If the record satisfies both conditions, then the action is
+executed for just that record.
+For example, suppose there is text between two identical markers (e.g.,
+the @samp{%} symbol), each on its own line, that should be ignored.
+A first attempt would be to
+combine a range pattern that describes the delimited text with the
+@code{next} statement
+(not discussed yet, @pxref{Next Statement}).
+This causes @command{awk} to skip any further processing of the current
+record and start over again with the next input record. Such a program
+looks like this:
+
+@example
+/^%$/,/^%$/ @{ next @}
+ @{ print @}
+@end example
+
+@noindent
+@cindex lines, skipping between markers
+@c @cindex flag variables
+This program fails because the range pattern is both turned on and turned off
+by the first line, which just has a @samp{%} on it. To accomplish this task,
+write the program in the following manner, using a flag:
+
+@cindex @code{!} (exclamation point), @code{!} operator
+@example
+/^%$/ @{ skip = ! skip; next @}
+skip == 1 @{ next @} # skip lines with `skip' set
+@end example
+
+In a range pattern, the comma (@samp{,}) has the lowest precedence of
+all the operators (i.e., it is evaluated last). Thus, the following
+program attempts to combine a range pattern with another, simpler test:
+
+@example
+echo Yes | awk '/1/,/2/ || /Yes/'
+@end example
+
+The intent of this program is @samp{(/1/,/2/) || /Yes/}.
+However, @command{awk} interprets this as @samp{/1/, (/2/ || /Yes/)}.
+This cannot be changed or worked around; range patterns do not combine
+with other patterns:
+
+@example
+$ @kbd{echo Yes | gawk '(/1/,/2/) || /Yes/'}
+@error{} gawk: cmd. line:1: (/1/,/2/) || /Yes/
+@error{} gawk: cmd. line:1: ^ syntax error
+@end example
+
+@cindex range patterns, line continuation and
+As a minor point of interest, although it is poor style,
+POSIX allows you to put a newline after the comma in
+a range pattern. @value{DARKCORNER}
+
+@node BEGIN/END
+@subsection The @code{BEGIN} and @code{END} Special Patterns
+
+@cindex @code{BEGIN} pattern
+@cindex @code{END} pattern
+All the patterns described so far are for matching input records.
+The @code{BEGIN} and @code{END} special patterns are different.
+They supply startup and cleanup actions for @command{awk} programs.
+@code{BEGIN} and @code{END} rules must have actions; there is no default
+action for these rules because there is no current record when they run.
+@code{BEGIN} and @code{END} rules are often referred to as
+``@code{BEGIN} and @code{END} blocks'' by longtime @command{awk}
+programmers.
+
+@menu
+* Using BEGIN/END:: How and why to use BEGIN/END rules.
+* I/O And BEGIN/END:: I/O issues in BEGIN/END rules.
+@end menu
+
+@node Using BEGIN/END
+@subsubsection Startup and Cleanup Actions
+
+@cindex @code{BEGIN} pattern
+@cindex @code{END} pattern
+A @code{BEGIN} rule is executed once only, before the first input record
+is read. Likewise, an @code{END} rule is executed once only, after all the
+input is read. For example:
+
+@example
+$ @kbd{awk '}
+> @kbd{BEGIN @{ print "Analysis of \"li\"" @}}
+> @kbd{/li/ @{ ++n @}}
+> @kbd{END @{ print "\"li\" appears in", n, "records." @}' mail-list}
+@print{} Analysis of "li"
+@print{} "li" appears in 4 records.
+@end example
+
+@cindex @code{BEGIN} pattern, operators and
+@cindex @code{END} pattern, operators and
+This program finds the number of records in the input file @file{mail-list}
+that contain the string @samp{li}. The @code{BEGIN} rule prints a title
+for the report. There is no need to use the @code{BEGIN} rule to
+initialize the counter @code{n} to zero, as @command{awk} does this
+automatically (@pxref{Variables}).
+The second rule increments the variable @code{n} every time a
+record containing the pattern @samp{li} is read. The @code{END} rule
+prints the value of @code{n} at the end of the run.
+
+The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
+or with Boolean operators (indeed, they cannot be used with any operators).
+An @command{awk} program may have multiple @code{BEGIN} and/or @code{END}
+rules. They are executed in the order in which they appear: all the @code{BEGIN}
+rules at startup and all the @code{END} rules at termination.
+@code{BEGIN} and @code{END} rules may be intermixed with other rules.
+This feature was added in the 1987 version of @command{awk} and is included
+in the POSIX standard.
+The original (1978) version of @command{awk}
+required the @code{BEGIN} rule to be placed at the beginning of the
+program, the @code{END} rule to be placed at the end, and only allowed one of
+each.
+This is no longer required, but it is a good idea to follow this template
+in terms of program organization and readability.
+
+Multiple @code{BEGIN} and @code{END} rules are useful for writing
+library functions, because each library file can have its own @code{BEGIN} and/or
+@code{END} rule to do its own initialization and/or cleanup.
+The order in which library functions are named on the command line
+controls the order in which their @code{BEGIN} and @code{END} rules are
+executed. Therefore, you have to be careful when writing such rules in
+library files so that the order in which they are executed doesn't matter.
+@DBXREF{Options} for more information on
+using library functions.
+@xref{Library Functions},
+for a number of useful library functions.
+
+If an @command{awk} program has only @code{BEGIN} rules and no
+other rules, then the program exits after the @code{BEGIN} rule is
+run.@footnote{The original version of @command{awk} kept
+reading and ignoring input until the end of the file was seen.} However, if an
+@code{END} rule exists, then the input is read, even if there are
+no other rules in the program. This is necessary in case the @code{END}
+rule checks the @code{FNR} and @code{NR} variables.
+
+@node I/O And BEGIN/END
+@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules
+
+@cindex input/output, from @code{BEGIN} and @code{END}
+There are several (sometimes subtle) points to be aware of when doing I/O
+from a @code{BEGIN} or @code{END} rule.
+The first has to do with the value of @code{$0} in a @code{BEGIN}
+rule. Because @code{BEGIN} rules are executed before any input is read,
+there simply is no input record, and therefore no fields, when
+executing @code{BEGIN} rules. References to @code{$0} and the fields
+yield a null string or zero, depending upon the context. One way
+to give @code{$0} a real value is to execute a @code{getline} command
+without a variable (@pxref{Getline}).
+Another way is simply to assign a value to @code{$0}.
+
+@cindex Brian Kernighan's @command{awk}
+@cindex differences in @command{awk} and @command{gawk}, @code{BEGIN}/@code{END} patterns
+@cindex POSIX @command{awk}, @code{BEGIN}/@code{END} patterns
+@cindex @code{print} statement, @code{BEGIN}/@code{END} patterns and
+@cindex @code{BEGIN} pattern, @code{print} statement and
+@cindex @code{END} pattern, @code{print} statement and
+The second point is similar to the first but from the other direction.
+Traditionally, due largely to implementation issues, @code{$0} and
+@code{NF} were @emph{undefined} inside an @code{END} rule.
+The POSIX standard specifies that @code{NF} is available in an @code{END}
+rule. It contains the number of fields from the last input record.
+Most probably due to an oversight, the standard does not say that @code{$0}
+is also preserved, although logically one would think that it should be.
+In fact, all of BWK @command{awk}, @command{mawk}, and @command{gawk}
+preserve the value of @code{$0} for use in @code{END} rules. Be aware,
+however, that some other implementations and many older versions
+of Unix @command{awk} do not.
+
+The third point follows from the first two. The meaning of @samp{print}
+inside a @code{BEGIN} or @code{END} rule is the same as always:
+@samp{print $0}. If @code{$0} is the null string, then this prints an
+empty record. Many longtime @command{awk} programmers use an unadorned
+@samp{print} in @code{BEGIN} and @code{END} rules, to mean @samp{@w{print ""}},
+relying on @code{$0} being null. Although one might generally get away with
+this in @code{BEGIN} rules, it is a very bad idea in @code{END} rules,
+at least in @command{gawk}. It is also poor style, because if an empty
+line is needed in the output, the program should print one explicitly.
+
+@cindex @code{next} statement, @code{BEGIN}/@code{END} patterns and
+@cindex @code{nextfile} statement, @code{BEGIN}/@code{END} patterns and
+@cindex @code{BEGIN} pattern, @code{next}/@code{nextfile} statements and
+@cindex @code{END} pattern, @code{next}/@code{nextfile} statements and
+Finally, the @code{next} and @code{nextfile} statements are not allowed
+in a @code{BEGIN} rule, because the implicit
+read-a-record-and-match-against-the-rules loop has not started yet. Similarly, those statements
+are not valid in an @code{END} rule, because all the input has been read.
+(@DBXREF{Next Statement} and
+@ifnotdocbook
+@DBPXREF{Nextfile Statement}.)
+@end ifnotdocbook
+@ifdocbook
+@DBREF{Nextfile Statement}.)
+@end ifdocbook
+
+@node BEGINFILE/ENDFILE
+@subsection The @code{BEGINFILE} and @code{ENDFILE} Special Patterns
+@cindex @code{BEGINFILE} pattern
+@cindex @code{ENDFILE} pattern
+@cindex differences in @command{awk} and @command{gawk}, @code{BEGINFILE}/@code{ENDFILE} patterns
+
+This @value{SECTION} describes a @command{gawk}-specific feature.
+
+Two special kinds of rule, @code{BEGINFILE} and @code{ENDFILE}, give
+you ``hooks'' into @command{gawk}'s command-line file processing loop.
+As with the @code{BEGIN} and @code{END} rules
+@ifnottex
+@ifnotdocbook
+(@pxref{BEGIN/END}),
+@end ifnotdocbook
+@end ifnottex
+@iftex
+(see the previous section),
+@end iftex
+@ifdocbook
+(see the previous section),
+@end ifdocbook
+all @code{BEGINFILE} rules in a program are merged, in the order they are
+read by @command{gawk}, and all @code{ENDFILE} rules are merged as well.
+
+The body of the @code{BEGINFILE} rules is executed just before
+@command{gawk} reads the first record from a file. @code{FILENAME}
+is set to the name of the current file, and @code{FNR} is set to zero.
+
+The @code{BEGINFILE} rule provides you the opportunity to accomplish two tasks
+that would otherwise be difficult or impossible to perform:
+
+@itemize @value{BULLET}
+@item
+You can test if the file is readable. Normally, it is a fatal error if a
+file named on the command line cannot be opened for reading. However,
+you can bypass the fatal error and move on to the next file on the
+command line.
+
+@cindex @command{gawk}, @code{ERRNO} variable in
+@cindex @code{ERRNO} variable, with @code{BEGINFILE} pattern
+@cindex @code{nextfile} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
+You do this by checking if the @code{ERRNO} variable is not the empty
+string; if so, then @command{gawk} was not able to open the file. In
+this case, your program can execute the @code{nextfile} statement
+(@pxref{Nextfile Statement}). This causes @command{gawk} to skip
+the file entirely. Otherwise, @command{gawk} exits with the usual
+fatal error.
+
+@item
+If you have written extensions that modify the record handling (by
+inserting an ``input parser,'' @pxref{Input Parsers}), you can invoke
+them at this point, before @command{gawk} has started processing the file.
+(This is a @emph{very} advanced feature, currently used only by the
+@uref{http://gawkextlib.sourceforge.net, @code{gawkextlib} project}.)
+@end itemize
+
+The @code{ENDFILE} rule is called when @command{gawk} has finished processing
+the last record in an input file. For the last input file,
+it will be called before any @code{END} rules.
+The @code{ENDFILE} rule is executed even for empty input files.
+
+Normally, when an error occurs when reading input in the normal input
+processing loop, the error is fatal. However, if an @code{ENDFILE}
+rule is present, the error becomes non-fatal, and instead @code{ERRNO}
+is set. This makes it possible to catch and process I/O errors at the
+level of the @command{awk} program.
+
+@cindex @code{next} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
+The @code{next} statement (@pxref{Next Statement}) is not allowed inside
+either a @code{BEGINFILE} or an @code{ENDFILE} rule. The @code{nextfile}
+statement is allowed only inside a
+@code{BEGINFILE} rule, but not inside an @code{ENDFILE} rule.
+
+@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
+The @code{getline} statement (@pxref{Getline}) is restricted inside
+both @code{BEGINFILE} and @code{ENDFILE}: only redirected
+forms of @code{getline} are allowed.
+
+@code{BEGINFILE} and @code{ENDFILE} are @command{gawk} extensions.
+In most other @command{awk} implementations, or if @command{gawk} is in
+compatibility mode (@pxref{Options}), they are not special.
+
+@c FIXME: For 4.2 maybe deal with this?
+@ignore
+Date: Tue, 17 May 2011 02:06:10 PDT
+From: rankin@pactechdata.com (Pat Rankin)
+Message-Id: <110517015127.20240f4a@pactechdata.com>
+Subject: BEGINFILE
+To: arnold@skeeve.com
+
+ The documentation for BEGINFILE states that FNR is 0, which seems
+pretty obvious. It doesn't mention what the value of $0 is, and that's
+not obvious. I think setting it to null before starting the BEGINFILE
+action would be preferable to leaving whatever was there in the last
+record of the previous file.
+
+ ENDFILE can retain the last record in $0. I guess it has to if
+the END rule's actions see that value too. But the beginning of a new
+file doesn't just mean that the old one has been closed; the old file
+is being superseded, so leaving the old data around feels wrong to me.
+[If the user wants to keep it on hand, he or she can use an ENDFILE
+rule to grab it before moving on to the next file.]
+@end ignore
+
+@node Empty
+@subsection The Empty Pattern
+
+@cindex empty pattern
+@cindex patterns, empty
+An empty (i.e., nonexistent) pattern is considered to match @emph{every}
+input record. For example, the program:
+
+@example
+awk '@{ print $1 @}' mail-list
+@end example
+
+@noindent
+prints the first field of every record.
+
+@node Using Shell Variables
+@section Using Shell Variables in Programs
+@cindex shells, variables
+@cindex @command{awk} programs, shell variables in
+@c @cindex shell and @command{awk} interaction
+
+@command{awk} programs are often used as components in larger
+programs written in shell.
+For example, it is very common to use a shell variable to
+hold a pattern that the @command{awk} program searches for.
+There are two ways to get the value of the shell variable
+into the body of the @command{awk} program.
+
+@cindex shells, quoting
+A common method is to use shell quoting to substitute
+the variable's value into the program inside the script.
+For example, consider the following program:
+
+@example
+printf "Enter search pattern: "
+read pattern
+awk "/$pattern/ "'@{ nmatches++ @}
+ END @{ print nmatches, "found" @}' /path/to/data
+@end example
+
+@noindent
+The @command{awk} program consists of two pieces of quoted text
+that are concatenated together to form the program.
+The first part is double quoted, which allows substitution of
+the @code{pattern} shell variable inside the quotes.
+The second part is single quoted.
+
+Variable substitution via quoting works, but can be potentially
+messy. It requires a good understanding of the shell's quoting rules
+(@pxref{Quoting}),
+and it's often difficult to correctly
+match up the quotes when reading the program.
+
+A better method is to use @command{awk}'s variable assignment feature
+(@pxref{Assignment Options})
+to assign the shell variable's value to an @command{awk} variable.
+Then use dynamic regexps to match the pattern
+(@pxref{Computed Regexps}).
+The following shows how to redo the
+previous example using this technique:
+
+@example
+printf "Enter search pattern: "
+read pattern
+awk -v pat="$pattern" '$0 ~ pat @{ nmatches++ @}
+ END @{ print nmatches, "found" @}' /path/to/data
+@end example
+
+@noindent
+Now, the @command{awk} program is just one single-quoted string.
+The assignment @samp{-v pat="$pattern"} still requires double quotes,
+in case there is whitespace in the value of @code{$pattern}.
+The @command{awk} variable @code{pat} could be named @code{pattern}
+too, but that would be more confusing. Using a variable also
+provides more flexibility, as the variable can be used anywhere inside
+the program---for printing, as an array subscript, or for any other
+use---without requiring the quoting tricks at every point in the program.
+
+@node Action Overview
+@section Actions
+@c @cindex action, definition of
+@c @cindex curly braces
+@c @cindex action, curly braces
+@c @cindex action, separating statements
+@cindex actions
+
+An @command{awk} program or script consists of a series of
+rules and function definitions interspersed. (Functions are
+described later. @xref{User-defined}.)
+A rule contains a pattern and an action, either of which (but not
+both) may be omitted. The purpose of the @dfn{action} is to tell
+@command{awk} what to do once a match for the pattern is found. Thus,
+in outline, an @command{awk} program generally looks like this:
+
+@display
+[@var{pattern}] @code{@{ @var{action} @}}
+ @var{pattern} [@code{@{ @var{action} @}}]
+@dots{}
+@code{function @var{name}(@var{args}) @{ @dots{} @}}
+@dots{}
+@end display
+
+@cindex @code{@{@}} (braces), actions and
+@cindex braces (@code{@{@}}), actions and
+@cindex separators, for statements in actions
+@cindex newlines, separating statements in actions
+@cindex @code{;} (semicolon), separating statements in actions
+@cindex semicolon (@code{;}), separating statements in actions
+An action consists of one or more @command{awk} @dfn{statements}, enclosed
+in braces (@samp{@{@r{@dots{}}@}}). Each statement specifies one
+thing to do. The statements are separated by newlines or semicolons.
+The braces around an action must be used even if the action
+contains only one statement, or if it contains no statements at
+all. However, if you omit the action entirely, omit the braces as
+well. An omitted action is equivalent to @samp{@{ print $0 @}}:
+
+@example
+/foo/ @{ @} @ii{match @code{foo}, do nothing --- empty action}
+/foo/ @ii{match @code{foo}, print the record --- omitted action}
+@end example
+
+The following types of statements are supported in @command{awk}:
+
+@table @asis
+@cindex side effects, statements
+@item Expressions
+Call functions or assign values to variables
+(@pxref{Expressions}). Executing
+this kind of statement simply computes the value of the expression.
+This is useful when the expression has side effects
+(@pxref{Assignment Ops}).
+
+@item Control statements
+Specify the control flow of @command{awk}
+programs. The @command{awk} language gives you C-like constructs
+(@code{if}, @code{for}, @code{while}, and @code{do}) as well as a few
+special ones (@pxref{Statements}).
+
+@item Compound statements
+Enclose one or more statements in braces. A compound statement
+is used in order to put several statements together in the body of an
+@code{if}, @code{while}, @code{do}, or @code{for} statement.
+
+@item Input statements
+Use the @code{getline} command
+(@pxref{Getline}).
+Also supplied in @command{awk} are the @code{next}
+statement (@pxref{Next Statement})
+and the @code{nextfile} statement
+(@pxref{Nextfile Statement}).
+
+@item Output statements
+Such as @code{print} and @code{printf}.
+@xref{Printing}.
+
+@item Deletion statements
+For deleting array elements.
+@xref{Delete}.
+@end table
+
+@node Statements
+@section Control Statements in Actions
+@cindex control statements
+@cindex statements, control, in actions
+@cindex actions, control statements in
+
+@dfn{Control statements}, such as @code{if}, @code{while}, and so on,
+control the flow of execution in @command{awk} programs. Most of @command{awk}'s
+control statements are patterned after similar statements in C.
+
+@cindex compound statements@comma{} control statements and
+@cindex statements, compound@comma{} control statements and
+@cindex body, in actions
+@cindex @code{@{@}} (braces), statements, grouping
+@cindex braces (@code{@{@}}), statements, grouping
+@cindex newlines, separating statements in actions
+@cindex @code{;} (semicolon), separating statements in actions
+@cindex semicolon (@code{;}), separating statements in actions
+All the control statements start with special keywords, such as @code{if}
+and @code{while}, to distinguish them from simple expressions.
+Many control statements contain other statements. For example, the
+@code{if} statement contains another statement that may or may not be
+executed. The contained statement is called the @dfn{body}.
+To include more than one statement in the body, group them into a
+single @dfn{compound statement} with braces, separating them with
+newlines or semicolons.
+
+@menu
+* If Statement:: Conditionally execute some @command{awk}
+ statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Switch Statement:: Switch/case evaluation for conditional
+ execution of statements based on a value.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost enclosing
+ loop.
+* Next Statement:: Stop processing the current input record.
+* Nextfile Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @command{awk}.
+@end menu
+
+@node If Statement
+@subsection The @code{if}-@code{else} Statement
+
+@cindex @code{if} statement
+The @code{if}-@code{else} statement is @command{awk}'s decision-making
+statement. It looks like this:
+
+@display
+@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}]
+@end display
+
+@noindent
+The @var{condition} is an expression that controls what the rest of the
+statement does. If the @var{condition} is true, @var{then-body} is
+executed; otherwise, @var{else-body} is executed.
+The @code{else} part of the statement is
+optional. The condition is considered false if its value is zero or
+the null string; otherwise, the condition is true.
+Refer to the following:
+
+@example
+if (x % 2 == 0)
+ print "x is even"
+else
+ print "x is odd"
+@end example
+
+In this example, if the expression @samp{x % 2 == 0} is true (i.e.,
+if the value of @code{x} is evenly divisible by two), then the first
+@code{print} statement is executed; otherwise, the second @code{print}
+statement is executed.
+If the @code{else} keyword appears on the same line as @var{then-body} and
+@var{then-body} is not a compound statement (i.e., not surrounded by
+braces), then a semicolon must separate @var{then-body} from
+the @code{else}.
+To illustrate this, the previous example can be rewritten as:
+
+@example
+if (x % 2 == 0) print "x is even"; else
+ print "x is odd"
+@end example
+
+@noindent
+If the @samp{;} is left out, @command{awk} can't interpret the statement and
+it produces a syntax error. Don't actually write programs this way,
+because a human reader might fail to see the @code{else} if it is not
+the first thing on its line.
+
+@node While Statement
+@subsection The @code{while} Statement
+@cindex @code{while} statement
+@cindex loops
+@cindex loops, @code{while}
+@cindex loops, See Also @code{while} statement
+
+In programming, a @dfn{loop} is a part of a program that can
+be executed two or more times in succession.
+The @code{while} statement is the simplest looping statement in
+@command{awk}. It repeatedly executes a statement as long as a condition is
+true. For example:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@cindex body, in loops
+@noindent
+@var{body} is a statement called the @dfn{body} of the loop,
+and @var{condition} is an expression that controls how long the loop
+keeps running.
+The first thing the @code{while} statement does is test the @var{condition}.
+If the @var{condition} is true, it executes the statement @var{body}.
+@ifinfo
+(The @var{condition} is true when the value
+is not zero and not a null string.)
+@end ifinfo
+After @var{body} has been executed,
+@var{condition} is tested again, and if it is still true, @var{body}
+executes again. This process repeats until the @var{condition} is no longer
+true. If the @var{condition} is initially false, the body of the loop
+never executes and @command{awk} continues with the statement following
+the loop.
+This example prints the first three fields of each record, one per line:
+
+@example
+awk '
+@{
+ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
+@}' inventory-shipped
+@end example
+
+@noindent
+The body of this loop is a compound statement enclosed in braces,
+containing two statements.
+The loop works in the following manner: first, the value of @code{i} is set to one.
+Then, the @code{while} statement tests whether @code{i} is less than or equal to
+three. This is true when @code{i} equals one, so the @code{i}-th
+field is printed. Then the @samp{i++} increments the value of @code{i}
+and the loop repeats. The loop terminates when @code{i} reaches four.
+
+A newline is not required between the condition and the
+body; however, using one makes the program clearer unless the body is a
+compound statement or else is very simple. The newline after the open-brace
+that begins the compound statement is not required either, but the
+program is harder to read without it.
+
+@node Do Statement
+@subsection The @code{do}-@code{while} Statement
+@cindex @code{do}-@code{while} statement
+@cindex loops, @code{do}-@code{while}
+
+The @code{do} loop is a variation of the @code{while} looping statement.
+The @code{do} loop executes the @var{body} once and then repeats the
+@var{body} as long as the @var{condition} is true. It looks like this:
+
+@example
+do
+ @var{body}
+while (@var{condition})
+@end example
+
+Even if the @var{condition} is false at the start, the @var{body}
+executes at least once (and only once, unless executing @var{body}
+makes @var{condition} true). Contrast this with the corresponding
+@code{while} statement:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+This statement does not execute @var{body} even once if the @var{condition}
+is false to begin with.
+The following is an example of a @code{do} statement:
+
+@example
+@{
+ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
+@}
+@end example
+
+@noindent
+This program prints each input record 10 times. However, it isn't a very
+realistic example, because in this case an ordinary @code{while} would do
+just as well. This situation reflects actual experience; only
+occasionally is there a real use for a @code{do} statement.
+
+@node For Statement
+@subsection The @code{for} Statement
+@cindex @code{for} statement
+@cindex loops, @code{for}, iterative
+
+The @code{for} statement makes it more convenient to count iterations of a
+loop. The general form of the @code{for} statement looks like this:
+
+@example
+for (@var{initialization}; @var{condition}; @var{increment})
+ @var{body}
+@end example
+
+@noindent
+The @var{initialization}, @var{condition}, and @var{increment} parts are
+arbitrary @command{awk} expressions, and @var{body} stands for any
+@command{awk} statement.
+
+The @code{for} statement starts by executing @var{initialization}.
+Then, as long
+as the @var{condition} is true, it repeatedly executes @var{body} and then
+@var{increment}. Typically, @var{initialization} sets a variable to
+either zero or one, @var{increment} adds one to it, and @var{condition}
+compares it against the desired number of iterations.
+For example:
+
+@example
+awk '
+@{
+ for (i = 1; i <= 3; i++)
+ print $i
+@}' inventory-shipped
+@end example
+
+@noindent
+This prints the first three fields of each input record, with one field per
+line.
+
+It isn't possible to
+set more than one variable in the
+@var{initialization} part without using a multiple assignment statement
+such as @samp{x = y = 0}. This makes sense only if all the initial values
+are equal. (But it is possible to initialize additional variables by writing
+their assignments as separate statements preceding the @code{for} loop.)
+
+@c @cindex comma operator, not supported
+The same is true of the @var{increment} part. Incrementing additional
+variables requires separate statements at the end of the loop.
+The C compound expression, using C's comma operator, is useful in
+this context but it is not supported in @command{awk}.
+
+Most often, @var{increment} is an increment expression, as in the previous
+example. But this is not required; it can be any expression
+whatsoever. For example, the following statement prints all the powers of two
+between 1 and 100:
+
+@example
+for (i = 1; i <= 100; i *= 2)
+ print i
+@end example
+
+If there is nothing to be done, any of the three expressions in the
+parentheses following the @code{for} keyword may be omitted. Thus,
+@w{@samp{for (; x > 0;)}} is equivalent to @w{@samp{while (x > 0)}}. If the
+@var{condition} is omitted, it is treated as true, effectively
+yielding an @dfn{infinite loop} (i.e., a loop that never terminates).
+
+In most cases, a @code{for} loop is an abbreviation for a @code{while}
+loop, as shown here:
+
+@example
+@var{initialization}
+while (@var{condition}) @{
+ @var{body}
+ @var{increment}
+@}
+@end example
+
+@cindex loops, @code{continue} statements and
+@noindent
+The only exception is when the @code{continue} statement
+(@pxref{Continue Statement}) is used
+inside the loop. Changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.
+
+The @command{awk} language has a @code{for} statement in addition to a
+@code{while} statement because a @code{for} loop is often both less work to
+type and more natural to think of. Counting the number of iterations is
+very common in loops. It can be easier to think of this counting as part
+of looping rather than as something to do inside the loop.
+
+@cindex @code{in} operator
+There is an alternative version of the @code{for} loop, for iterating over
+all the indices of an array:
+
+@example
+for (i in array)
+ @var{do something with} array[i]
+@end example
+
+@noindent
+@DBXREF{Scanning an Array}
+for more information on this version of the @code{for} loop.
+
+@node Switch Statement
+@subsection The @code{switch} Statement
+@cindex @code{switch} statement
+@cindex @code{case} keyword
+@cindex @code{default} keyword
+
+This @value{SECTION} describes a @command{gawk}-specific feature.
+If @command{gawk} is in compatibility mode (@pxref{Options}),
+it is not available.
+
+The @code{switch} statement allows the evaluation of an expression and
+the execution of statements based on a @code{case} match. Case statements
+are checked for a match in the order they are defined. If no suitable
+@code{case} is found, the @code{default} section is executed, if supplied.
+
+Each @code{case} contains a single constant, be it numeric, string, or
+regexp. The @code{switch} expression is evaluated, and then each
+@code{case}'s constant is compared against the result in turn. The type of constant
+determines the comparison: numeric or string do the usual comparisons.
+A regexp constant does a regular expression match against the string
+value of the original expression. The general form of the @code{switch}
+statement looks like this:
+
+@example
+switch (@var{expression}) @{
+case @var{value or regular expression}:
+ @var{case-body}
+default:
+ @var{default-body}
+@}
+@end example
+
+Control flow in
+the @code{switch} statement works as it does in C. Once a match to a given
+case is made, the case statement bodies execute until a @code{break},
+@code{continue}, @code{next}, @code{nextfile} or @code{exit} is encountered,
+or the end of the @code{switch} statement itself. For example:
+
+@example
+while ((c = getopt(ARGC, ARGV, "aksx")) != -1) @{
+ switch (c) @{
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ @}
+@}
+@end example
+
+Note that if none of the statements specified here halt execution
+of a matched @code{case} statement, execution falls through to the
+next @code{case} until execution halts. In this example, the
+@code{case} for @code{"?"} falls through to the @code{default}
+case, which is to call a function named @code{usage()}.
+(The @code{getopt()} function being called here is
+described in @ref{Getopt Function}.)
+
+@node Break Statement
+@subsection The @code{break} Statement
+@cindex @code{break} statement
+@cindex loops, exiting
+@cindex loops, @code{break} statement and
+
+The @code{break} statement jumps out of the innermost @code{for},
+@code{while}, or @code{do} loop that encloses it. The following example
+finds the smallest divisor of any integer, and also identifies prime
+numbers:
+
+@example
+# find smallest divisor of num
+@{
+ num = $1
+ for (div = 2; div * div <= num; div++) @{
+ if (num % div == 0)
+ break
+ @}
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num
+@}
+@end example
+
+When the remainder is zero in the first @code{if} statement, @command{awk}
+immediately @dfn{breaks out} of the containing @code{for} loop. This means
+that @command{awk} proceeds immediately to the statement following the loop
+and continues processing. (This is very different from the @code{exit}
+statement, which stops the entire @command{awk} program.
+@xref{Exit Statement}.)
+
+The following program illustrates how the @var{condition} of a @code{for}
+or @code{while} statement could be replaced with a @code{break} inside
+an @code{if}:
+
+@example
+# find smallest divisor of num
+@{
+ num = $1
+ for (div = 2; ; div++) @{
+ if (num % div == 0) @{
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ @}
+ if (div * div > num) @{
+ printf "%d is prime\n", num
+ break
+ @}
+ @}
+@}
+@end example
+
+The @code{break} statement is also used to break out of the
+@code{switch} statement.
+This is discussed in @ref{Switch Statement}.
+
+@c @cindex @code{break}, outside of loops
+@c @cindex historical features
+@c @cindex @command{awk} language, POSIX version
+@cindex POSIX @command{awk}, @code{break} statement and
+@cindex dark corner, @code{break} statement
+@cindex @command{gawk}, @code{break} statement in
+@cindex Brian Kernighan's @command{awk}
+The @code{break} statement has no meaning when
+used outside the body of a loop or @code{switch}.
+However, although it was never documented,
+historical implementations of @command{awk} treated the @code{break}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement}).
+@value{DARKCORNER}
+Recent versions of BWK @command{awk} no longer allow this usage,
+nor does @command{gawk}.
+
+@node Continue Statement
+@subsection The @code{continue} Statement
+
+@cindex @code{continue} statement
+Similar to @code{break}, the @code{continue} statement is used only inside
+@code{for}, @code{while}, and @code{do} loops. It skips
+over the rest of the loop body, causing the next cycle around the loop
+to begin immediately. Contrast this with @code{break}, which jumps out
+of the loop altogether.
+
+The @code{continue} statement in a @code{for} loop directs @command{awk} to
+skip the rest of the body of the loop and resume execution with the
+increment-expression of the @code{for} statement. The following program
+illustrates this fact:
+
+@example
+BEGIN @{
+ for (x = 0; x <= 20; x++) @{
+ if (x == 5)
+ continue
+ printf "%d ", x
+ @}
+ print ""
+@}
+@end example
+
+@noindent
+This program prints all the numbers from 0 to 20---except for 5, for
+which the @code{printf} is skipped. Because the increment @samp{x++}
+is not skipped, @code{x} does not remain stuck at 5. Contrast the
+@code{for} loop from the previous example with the following @code{while} loop:
+
+@example
+BEGIN @{
+ x = 0
+ while (x <= 20) @{
+ if (x == 5)
+ continue
+ printf "%d ", x
+ x++
+ @}
+ print ""
+@}
+@end example
+
+@noindent
+This program loops forever once @code{x} reaches 5, because
+the increment (@samp{x++}) is never reached.
+
+@c @cindex @code{continue}, outside of loops
+@c @cindex historical features
+@c @cindex @command{awk} language, POSIX version
+@cindex POSIX @command{awk}, @code{continue} statement and
+@cindex dark corner, @code{continue} statement
+@cindex @command{gawk}, @code{continue} statement in
+@cindex Brian Kernighan's @command{awk}
+The @code{continue} statement has no special meaning with respect to the
+@code{switch} statement, nor does it have any meaning when used outside the
+body of a loop. Historical versions of @command{awk} treated a @code{continue}
+statement outside a loop the same way they treated a @code{break}
+statement outside a loop: as if it were a @code{next}
+statement
+(@pxref{Next Statement}).
+@value{DARKCORNER}
+Recent versions of BWK @command{awk} no longer work this way, nor
+does @command{gawk}.
+
+@node Next Statement
+@subsection The @code{next} Statement
+@cindex @code{next} statement
+
+The @code{next} statement forces @command{awk} to immediately stop processing
+the current record and go on to the next record. This means that no
+further rules are executed for the current record, and the rest of the
+current rule's action isn't executed.
+
+Contrast this with the effect of the @code{getline} function
+(@pxref{Getline}). That also causes
+@command{awk} to read the next record immediately, but it does not alter the
+flow of control in any way (i.e., the rest of the current action executes
+with a new input record).
+
+@cindex @command{awk} programs, execution of
+At the highest level, @command{awk} program execution is a loop that reads
+an input record and then tests each rule's pattern against it. If you
+think of this loop as a @code{for} statement whose body contains the
+rules, then the @code{next} statement is analogous to a @code{continue}
+statement. It skips to the end of the body of this implicit loop and
+executes the increment (which reads another record).
+
+For example, suppose an @command{awk} program works only on records
+with four fields, and it shouldn't fail when given bad input. To avoid
+complicating the rest of the program, write a ``weed out'' rule near
+the beginning, in the following manner:
+
+@example
+NF != 4 @{
+ printf("%s:%d: skipped: NF != 4\n", FILENAME, FNR) > "/dev/stderr"
+ next
+@}
+@end example
+
+@noindent
+Because of the @code{next} statement,
+the program's subsequent rules won't see the bad record. The error
+message is redirected to the standard error output stream, as error
+messages should be.
+For more detail, see
+@ref{Special Files}.
+
+If the @code{next} statement causes the end of the input to be reached,
+then the code in any @code{END} rules is executed.
+@xref{BEGIN/END}.
+
+The @code{next} statement is not allowed inside @code{BEGINFILE} and
+@code{ENDFILE} rules. @xref{BEGINFILE/ENDFILE}.
+
+@c @cindex @command{awk} language, POSIX version
+@c @cindex @code{next}, inside a user-defined function
+@cindex @code{BEGIN} pattern, @code{next}/@code{nextfile} statements and
+@cindex @code{END} pattern, @code{next}/@code{nextfile} statements and
+@cindex POSIX @command{awk}, @code{next}/@code{nextfile} statements and
+@cindex @code{next} statement, user-defined functions and
+@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
+According to the POSIX standard, the behavior is undefined if the
+@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@command{gawk} treats it as a syntax error. Although POSIX does not disallow it,
+most other @command{awk} implementations don't allow the @code{next}
+statement inside function bodies (@pxref{User-defined}). Just as with any
+other @code{next} statement, a @code{next} statement inside a function
+body reads the next record and starts processing it with the first rule
+in the program.
+
+@node Nextfile Statement
+@subsection The @code{nextfile} Statement
+@cindex @code{nextfile} statement
+
+The @code{nextfile} statement
+is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{nextfile} statement instructs @command{awk} to stop processing the
+current @value{DF}.
+
+Upon execution of the @code{nextfile} statement,
+@code{FILENAME} is
+updated to the name of the next @value{DF} listed on the command line,
+@code{FNR} is reset to one,
+and processing
+starts over with the first rule in the program.
+If the @code{nextfile} statement causes the end of the input to be reached,
+then the code in any @code{END} rules is executed. An exception to this is
+when @code{nextfile} is invoked during execution of any statement in an
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
+
+The @code{nextfile} statement is useful when there are many @value{DF}s
+to process but it isn't necessary to process every record in every file.
+Without @code{nextfile},
+in order to move on to the next @value{DF}, a program
+would have to continue scanning the unwanted records. The @code{nextfile}
+statement accomplishes this much more efficiently.
+
+In @command{gawk}, execution of @code{nextfile} causes additional things
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
+
+With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
+rule to skip over a file that would otherwise cause @command{gawk}
+to exit with a fatal error. In this case, @code{ENDFILE} rules are not
+executed. @xref{BEGINFILE/ENDFILE}.
+
+Although it might seem that @samp{close(FILENAME)} would accomplish
+the same as @code{nextfile}, this isn't true. @code{close()} is
+reserved for closing files, pipes, and coprocesses that are
+opened with redirections. It is not related to the main processing that
+@command{awk} does with the files listed in @code{ARGV}.
+
+@quotation NOTE
+For many years, @code{nextfile} was a
+common extension. In September 2012, it was accepted for
+inclusion into the POSIX standard.
+See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
+@end quotation
+
+@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
+@cindex @code{nextfile} statement, user-defined functions and
+@cindex Brian Kernighan's @command{awk}
+@cindex @command{mawk} utility
+The current version of BWK @command{awk}, and @command{mawk}
+also support @code{nextfile}. However, they don't allow the
+@code{nextfile} statement inside function bodies (@pxref{User-defined}).
+@command{gawk} does; a @code{nextfile} inside a function body reads the
+next record and starts processing it with the first rule in the program,
+just as any other @code{nextfile} statement.
+
+@node Exit Statement
+@subsection The @code{exit} Statement
+
+@cindex @code{exit} statement
+The @code{exit} statement causes @command{awk} to immediately stop
+executing the current rule and to stop processing input; any remaining input
+is ignored. The @code{exit} statement is written as follows:
+
+@display
+@code{exit} [@var{return code}]
+@end display
+
+@cindex @code{BEGIN} pattern, @code{exit} statement and
+@cindex @code{END} pattern, @code{exit} statement and
+When an @code{exit} statement is executed from a @code{BEGIN} rule, the
+program stops processing everything immediately. No input records are
+read. However, if an @code{END} rule is present,
+as part of executing the @code{exit} statement,
+the @code{END} rule is executed
+(@pxref{BEGIN/END}).
+If @code{exit} is used in the body of an @code{END} rule, it causes
+the program to stop immediately.
+
+An @code{exit} statement that is not part of a @code{BEGIN} or @code{END}
+rule stops the execution of any further automatic rules for the current
+record, skips reading any remaining input records, and executes the
+@code{END} rule if there is one. @command{gawk} also skips
+any @code{ENDFILE} rules; they do not execute.
+
+In such a case,
+if you don't want the @code{END} rule to do its job, set a variable
+to nonzero before the @code{exit} statement and check that variable in
+the @code{END} rule.
+@DBXREF{Assert Function}
+for an example that does this.
+
+@cindex dark corner, @code{exit} statement
+If an argument is supplied to @code{exit}, its value is used as the exit
+status code for the @command{awk} process. If no argument is supplied,
+@code{exit} causes @command{awk} to return a ``success'' status.
+In the case where an argument
+is supplied to a first @code{exit} statement, and then @code{exit} is
+called a second time from an @code{END} rule with no argument,
+@command{awk} uses the previously supplied exit value. @value{DARKCORNER}
+@DBXREF{Exit Status} for more information.
+
+@cindex programming conventions, @code{exit} statement
+For example, suppose an error condition occurs that is difficult or
+impossible to handle. Conventionally, programs report this by
+exiting with a nonzero status. An @command{awk} program can do this
+using an @code{exit} statement with a nonzero argument, as shown
+in the following example:
+
+@example
+BEGIN @{
+ if (("date" | getline date_now) <= 0) @{
+ print "Can't get system date" > "/dev/stderr"
+ exit 1
+ @}
+ print "current date is", date_now
+ close("date")
+@}
+@end example
+
+@quotation NOTE
+For full portability, exit values should be between zero and 126, inclusive.
+Negative values, and values of 127 or greater, may not produce consistent
+results across different operating systems.
+@end quotation
+
+
+@node Built-in Variables
+@section Predefined Variables
+@cindex predefined variables
+@cindex variables, predefined
+
+Most @command{awk} variables are available to use for your own
+purposes; they never change unless your program assigns values to
+them, and they never affect anything unless your program examines them.
+However, a few variables in @command{awk} have special built-in meanings.
+@command{awk} examines some of these automatically, so that they enable you
+to tell @command{awk} how to do certain things. Others are set
+automatically by @command{awk}, so that they carry information from the
+internal workings of @command{awk} to your program.
+
+@cindex @command{gawk}, predefined variables and
+This @value{SECTION} documents all of @command{gawk}'s predefined variables,
+most of which are also documented in the @value{CHAPTER}s describing
+their areas of activity.
+
+@menu
+* User-modified:: Built-in variables that you change to control
+ @command{awk}.
+* Auto-set:: Built-in variables where @command{awk} gives
+ you information.
+* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}.
+@end menu
+
+@node User-modified
+@subsection Built-In Variables That Control @command{awk}
+@cindex predefined variables, user-modifiable
+@cindex user-modifiable variables
+
+The following is an alphabetical list of variables that you can change to
+control how @command{awk} does certain things.
+
+The variables that are specific to @command{gawk} are marked with a pound
+sign (@samp{#}). These variables are @command{gawk} extensions. In other
+@command{awk} implementations or if @command{gawk} is in compatibility
+mode (@pxref{Options}), they are not special. (Any exceptions are noted
+in the description of each variable.)
+
+@table @code
+@cindex @code{BINMODE} variable
+@cindex binary input/output
+@cindex input/output, binary
+@cindex differences in @command{awk} and @command{gawk}, @code{BINMODE} variable
+@item BINMODE #
+On non-POSIX systems, this variable specifies use of binary mode
+for all I/O. Numeric values of one, two, or three specify that input
+files, output files, or all files, respectively, should use binary I/O.
+A numeric value less than zero is treated as zero, and a numeric value
+greater than three is treated as three. Alternatively, string values
+of @code{"r"} or @code{"w"} specify that input files and output files,
+respectively, should use binary I/O. A string value of @code{"rw"} or
+@code{"wr"} indicates that all files should use binary I/O. Any other
+string value is treated the same as @code{"rw"}, but causes @command{gawk}
+to generate a warning message. @code{BINMODE} is described in more
+detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}),
+also supports this variable, but only using numeric values.
+
+@cindex @code{CONVFMT} variable
+@cindex POSIX @command{awk}, @code{CONVFMT} variable and
+@cindex numbers, converting, to strings
+@cindex strings, converting, numbers to
+@item @code{CONVFMT}
+This string controls conversion of numbers to
+strings (@pxref{Conversion}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf()} function
+(@pxref{String Functions}).
+Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the POSIX standard.
+
+@cindex @command{gawk}, @code{FIELDWIDTHS} variable in
+@cindex @code{FIELDWIDTHS} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{FIELDWIDTHS} variable
+@cindex field separators, @code{FIELDWIDTHS} variable and
+@cindex separators, field, @code{FIELDWIDTHS} variable and
+@item FIELDWIDTHS #
+A space-separated list of columns that tells @command{gawk}
+how to split input with fixed columnar boundaries.
+Assigning a value to @code{FIELDWIDTHS}
+overrides the use of @code{FS} and @code{FPAT} for field splitting.
+@DBXREF{Constant Size} for more information.
+
+@cindex @command{gawk}, @code{FPAT} variable in
+@cindex @code{FPAT} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{FPAT} variable
+@cindex field separators, @code{FPAT} variable and
+@cindex separators, field, @code{FPAT} variable and
+@item FPAT #
+A regular expression (as a string) that tells @command{gawk}
+to create the fields based on text that matches the regular expression.
+Assigning a value to @code{FPAT}
+overrides the use of @code{FS} and @code{FIELDWIDTHS} for field splitting.
+@DBXREF{Splitting By Content} for more information.
+
+@cindex @code{FS} variable
+@cindex separators, field
+@cindex field separators
+@item FS
+The input field separator (@pxref{Field Separators}).
+The value is a single-character string or a multicharacter regular
+expression that matches the separations between fields in an input
+record. If the value is the null string (@code{""}), then each
+character in the record becomes a separate field.
+(This behavior is a @command{gawk} extension. POSIX @command{awk} does not
+specify the behavior when @code{FS} is the null string.
+Nonetheless, some other versions of @command{awk} also treat
+@code{""} specially.)
+
+@cindex POSIX @command{awk}, @code{FS} variable and
+The default value is @w{@code{" "}}, a string consisting of a single
+space. As a special exception, this value means that any
+sequence of spaces, TABs, and/or newlines is a single separator.@footnote{In
+POSIX @command{awk}, newline does not count as whitespace.} It also causes
+spaces, TABs, and newlines at the beginning and end of a record to be ignored.
+
+You can set the value of @code{FS} on the command line using the
+@option{-F} option:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+@cindex @command{gawk}, field separators and
+If @command{gawk} is using @code{FIELDWIDTHS} or @code{FPAT}
+for field splitting,
+assigning a value to @code{FS} causes @command{gawk} to return to
+the normal, @code{FS}-based field splitting. An easy way to do this
+is to simply say @samp{FS = FS}, perhaps with an explanatory comment.
+
+@cindex @command{gawk}, @code{IGNORECASE} variable in
+@cindex @code{IGNORECASE} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{IGNORECASE} variable
+@cindex case sensitivity, and string comparisons
+@cindex case sensitivity, and regexps
+@cindex regular expressions, case sensitivity
+@item IGNORECASE #
+If @code{IGNORECASE} is nonzero or non-null, then all string comparisons
+and all regular expression matching are case independent. Thus, regexp
+matching with @samp{~} and @samp{!~}, as well as the @code{gensub()},
+@code{gsub()}, @code{index()}, @code{match()}, @code{patsplit()},
+@code{split()}, and @code{sub()}
+functions, record termination with @code{RS}, and field splitting with
+@code{FS} and @code{FPAT}, all ignore case when doing their particular regexp operations.
+However, the value of @code{IGNORECASE} does @emph{not} affect array subscripting
+and it does not affect field splitting when using a single-character
+field separator.
+@xref{Case-sensitivity}.
+
+@cindex @command{gawk}, @code{LINT} variable in
+@cindex @code{LINT} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{LINT} variable
+@cindex lint checking
+@item LINT #
+When this variable is true (nonzero or non-null), @command{gawk}
+behaves as if the @option{--lint} command-line option is in effect
+(@pxref{Options}).
+With a value of @code{"fatal"}, lint warnings become fatal errors.
+With a value of @code{"invalid"}, only warnings about things that are
+actually invalid are issued. (This is not fully implemented yet.)
+Any other true value prints nonfatal warnings.
+Assigning a false value to @code{LINT} turns off the lint warnings.
+
+This variable is a @command{gawk} extension. It is not special
+in other @command{awk} implementations. Unlike the other special variables,
+changing @code{LINT} does affect the production of lint warnings,
+even if @command{gawk} is in compatibility mode. Much as
+the @option{--lint} and @option{--traditional} options independently
+control different aspects of @command{gawk}'s behavior, the control
+of lint warnings during program execution is independent of the flavor
+of @command{awk} being executed.
+
+@cindex @code{OFMT} variable
+@cindex numbers, converting, to strings
+@cindex strings, converting, numbers to
+@item OFMT
+Controls conversion of numbers to
+strings (@pxref{Conversion}) for
+printing with the @code{print} statement. It works by being passed
+as the first argument to the @code{sprintf()} function
+(@pxref{String Functions}).
+Its default value is @code{"%.6g"}. Earlier versions of @command{awk}
+used @code{OFMT} to specify the format for converting numbers to
+strings in general expressions; this is now done by @code{CONVFMT}.
+
+@cindex @code{sprintf()} function, @code{OFMT} variable and
+@cindex @code{print} statement, @code{OFMT} variable and
+@cindex @code{OFS} variable
+@cindex separators, field
+@cindex field separators
+@item OFS
+This is the output field separator (@pxref{Output Separators}). It is
+output between the fields printed by a @code{print} statement. Its
+default value is @w{@code{" "}}, a string consisting of a single space.
+
+@cindex @code{ORS} variable
+@item ORS
+The output record separator. It is output at the end of every
+@code{print} statement. Its default value is @code{"\n"}, the newline
+character. (@xref{Output Separators}.)
+
+@cindex @code{PREC} variable
+@item PREC #
+The working precision of arbitrary-precision floating-point numbers,
+53 bits by default (@pxref{Setting precision}).
+
+@cindex @code{ROUNDMODE} variable
+@item ROUNDMODE #
+The rounding mode to use for arbitrary-precision arithmetic on
+numbers, by default @code{"N"} (@samp{roundTiesToEven} in
+the IEEE 754 standard; @pxref{Setting the rounding mode}).
+
+@cindex @code{RS} variable
+@cindex separators, for records
+@cindex record separators
+@item @code{RS}
+The input record separator. Its default value is a string
+containing a single newline character, which means that an input record
+consists of a single line of text.
+It can also be the null string, in which case records are separated by
+runs of blank lines.
+If it is a regexp, records are separated by
+matches of the regexp in the input text.
+(@xref{Records}.)
+
+The ability for @code{RS} to be a regular expression
+is a @command{gawk} extension.
+In most other @command{awk} implementations,
+or if @command{gawk} is in compatibility mode
+(@pxref{Options}),
+just the first character of @code{RS}'s value is used.
+
+@cindex @code{SUBSEP} variable
+@cindex separators, subscript
+@cindex subscript separators
+@item @code{SUBSEP}
+The subscript separator. It has the default value of
+@code{"\034"} and is used to separate the parts of the indices of a
+multidimensional array. Thus, the expression @code{@w{foo["A", "B"]}}
+really accesses @code{foo["A\034B"]}
+(@pxref{Multidimensional}).
+
+@cindex @command{gawk}, @code{TEXTDOMAIN} variable in
+@cindex @code{TEXTDOMAIN} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{TEXTDOMAIN} variable
+@cindex internationalization, localization
+@item TEXTDOMAIN #
+Used for internationalization of programs at the
+@command{awk} level. It sets the default text domain for specially
+marked string constants in the source text, as well as for the
+@code{dcgettext()}, @code{dcngettext()}, and @code{bindtextdomain()} functions
+(@pxref{Internationalization}).
+The default value of @code{TEXTDOMAIN} is @code{"messages"}.
+@end table
+
+@node Auto-set
+@subsection Built-In Variables That Convey Information
+
+@cindex predefined variables, conveying information
+@cindex variables, predefined conveying information
+The following is an alphabetical list of variables that @command{awk}
+sets automatically on certain occasions in order to provide
+information to your program.
+
+The variables that are specific to @command{gawk} are marked with a pound
+sign (@samp{#}). These variables are @command{gawk} extensions. In other
+@command{awk} implementations or if @command{gawk} is in compatibility
+mode (@pxref{Options}), they are not special:
+
+@c @asis for docbook
+@table @asis
+@cindex @code{ARGC}/@code{ARGV} variables
+@cindex arguments, command-line
+@cindex command line, arguments
+@item @code{ARGC}, @code{ARGV}
+The command-line arguments available to @command{awk} programs are stored in
+an array called @code{ARGV}. @code{ARGC} is the number of command-line
+arguments present. @xref{Other Arguments}.
+Unlike most @command{awk} arrays,
+@code{ARGV} is indexed from 0 to @code{ARGC} @minus{} 1.
+In the following example:
+
+@example
+$ @kbd{awk 'BEGIN @{}
+> @kbd{for (i = 0; i < ARGC; i++)}
+> @kbd{print ARGV[i]}
+> @kbd{@}' inventory-shipped mail-list}
+@print{} awk
+@print{} inventory-shipped
+@print{} mail-list
+@end example
+
+@noindent
+@code{ARGV[0]} contains @samp{awk}, @code{ARGV[1]}
+contains @samp{inventory-shipped}, and @code{ARGV[2]} contains
+@samp{mail-list}. The value of @code{ARGC} is three, one more than the
+index of the last element in @code{ARGV}, because the elements are numbered
+from zero.
+
+@cindex programming conventions, @code{ARGC}/@code{ARGV} variables
+The names @code{ARGC} and @code{ARGV}, as well as the convention of indexing
+the array from 0 to @code{ARGC} @minus{} 1, are derived from the C language's
+method of accessing command-line arguments.
+
+@cindex dark corner, value of @code{ARGV[0]}
+The value of @code{ARGV[0]} can vary from system to system.
+Also, you should note that the program text is @emph{not} included in
+@code{ARGV}, nor are any of @command{awk}'s command-line options.
+@DBXREF{ARGC and ARGV} for information
+about how @command{awk} uses these variables.
+@value{DARKCORNER}
+
+@cindex @code{ARGIND} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{ARGIND} variable
+@item @code{ARGIND #}
+The index in @code{ARGV} of the current file being processed.
+Every time @command{gawk} opens a new @value{DF} for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the @value{FN}.
+When @command{gawk} is processing the input files,
+@samp{FILENAME == ARGV[ARGIND]} is always true.
+
+@cindex files, processing@comma{} @code{ARGIND} variable and
+This variable is useful in file processing; it allows you to tell how far
+along you are in the list of @value{DF}s as well as to distinguish between
+successive instances of the same @value{FN} on the command line.
+
+@cindex file names, distinguishing
+While you can change the value of @code{ARGIND} within your @command{awk}
+program, @command{gawk} automatically sets it to a new value when it
+opens the next file.
+
+@cindex @code{ENVIRON} array
+@cindex environment variables, in @code{ENVIRON} array
+@item @code{ENVIRON}
+An associative array containing the values of the environment. The array
+indices are the environment variable names; the elements are the values of
+the particular environment variables. For example,
+@code{ENVIRON["HOME"]} might be @code{/home/arnold}.
+
+For POSIX @command{awk}, changing this array does not affect the
+environment passed on to any programs that @command{awk} may spawn via
+redirection or the @code{system()} function.
+
+However, beginning with version 4.2, if not in POSIX
+compatibility mode, @command{gawk} does update its own environment when
+@code{ENVIRON} is changed, thus changing the environment seen by programs
+that it creates. You should therefore be especially careful if you
+modify @code{ENVIRON["PATH"]"}, which is the search path for finding
+executable programs.
+
+This can also affect the running @command{gawk} program, since some of the
+built-in functions may pay attention to certain environment variables.
+The most notable instance of this is @code{mktime()} (@pxref{Time
+Functions}), which pays attention the value of the @env{TZ} environment
+variable on many systems.
+
+Some operating systems may not have environment variables.
+On such systems, the @code{ENVIRON} array is empty (except for
+@w{@code{ENVIRON["AWKPATH"]}} and
+@w{@code{ENVIRON["AWKLIBPATH"]}};
+@DBPXREF{AWKPATH Variable} and
+@ifdocbook
+@DBREF{AWKLIBPATH Variable}).
+@end ifdocbook
+@ifnotdocbook
+@pxref{AWKLIBPATH Variable}).
+@end ifnotdocbook
+
+@cindex @command{gawk}, @code{ERRNO} variable in
+@cindex @code{ERRNO} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{ERRNO} variable
+@cindex error handling, @code{ERRNO} variable and
+@item @code{ERRNO #}
+If a system error occurs during a redirection for @code{getline}, during
+a read for @code{getline}, or during a @code{close()} operation, then
+@code{ERRNO} contains a string describing the error.
+
+In addition, @command{gawk} clears @code{ERRNO} before opening each
+command-line input file. This enables checking if the file is readable
+inside a @code{BEGINFILE} pattern (@pxref{BEGINFILE/ENDFILE}).
+
+Otherwise, @code{ERRNO} works similarly to the C variable @code{errno}.
+Except for the case just mentioned, @command{gawk} @emph{never} clears
+it (sets it to zero or @code{""}). Thus, you should only expect its
+value to be meaningful when an I/O operation returns a failure value,
+such as @code{getline} returning @minus{}1. You are, of course, free
+to clear it yourself before doing an I/O operation.
+
+@cindex @code{FILENAME} variable
+@cindex dark corner, @code{FILENAME} variable
+@item @code{FILENAME}
+The name of the current input file. When no @value{DF}s are listed
+on the command line, @command{awk} reads from the standard input and
+@code{FILENAME} is set to @code{"-"}. @code{FILENAME} changes each
+time a new file is read (@pxref{Reading Files}). Inside a @code{BEGIN}
+rule, the value of @code{FILENAME} is @code{""}, because there are no input
+files being processed yet.@footnote{Some early implementations of Unix
+@command{awk} initialized @code{FILENAME} to @code{"-"}, even if there
+were @value{DF}s to be processed. This behavior was incorrect and should
+not be relied upon in your programs.} @value{DARKCORNER} Note, though,
+that using @code{getline} (@pxref{Getline}) inside a @code{BEGIN} rule
+can give @code{FILENAME} a value.
+
+@cindex @code{FNR} variable
+@item @code{FNR}
+The current record number in the current file. @command{awk} increments
+@code{FNR} each time it reads a new record (@pxref{Records}).
+@command{awk} resets @code{FNR} to zero each time it starts a new
+input file.
+
+@cindex @code{NF} variable
+@item @code{NF}
+The number of fields in the current input record.
+@code{NF} is set each time a new record is read, when a new field is
+created or when @code{$0} changes (@pxref{Fields}).
+
+Unlike most of the variables described in this @value{SUBSECTION},
+assigning a value to @code{NF} has the potential to affect
+@command{awk}'s internal workings. In particular, assignments
+to @code{NF} can be used to create or remove fields from the
+current record. @xref{Changing Fields}.
+
+@cindex @code{FUNCTAB} array
+@cindex @command{gawk}, @code{FUNCTAB} array in
+@cindex differences in @command{awk} and @command{gawk}, @code{FUNCTAB} variable
+@item @code{FUNCTAB #}
+An array whose indices and corresponding values are the names of all
+the built-in, user-defined, and extension functions in the program.
+
+@quotation NOTE
+Attempting to use the @code{delete} statement with the @code{FUNCTAB}
+array causes a fatal error. Any attempt to assign to an element of
+@code{FUNCTAB} also causes a fatal error.
+@end quotation
+
+@cindex @code{NR} variable
+@item @code{NR}
+The number of input records @command{awk} has processed since
+the beginning of the program's execution
+(@pxref{Records}).
+@command{awk} increments @code{NR} each time it reads a new record.
+
+@cindex @command{gawk}, @code{PROCINFO} array in
+@cindex @code{PROCINFO} array
+@cindex differences in @command{awk} and @command{gawk}, @code{PROCINFO} array
+@item @code{PROCINFO #}
+The elements of this array provide access to information about the
+running @command{awk} program.
+The following elements (listed alphabetically)
+are guaranteed to be available:
+
+@table @code
+@cindex effective group ID of @command{gawk} user
+@item PROCINFO["egid"]
+The value of the @code{getegid()} system call.
+
+@item PROCINFO["euid"]
+@cindex effective user ID of @command{gawk} user
+The value of the @code{geteuid()} system call.
+
+@item PROCINFO["FS"]
+This is
+@code{"FS"} if field splitting with @code{FS} is in effect,
+@code{"FIELDWIDTHS"} if field splitting with @code{FIELDWIDTHS} is in effect,
+or @code{"FPAT"} if field matching with @code{FPAT} is in effect.
+
+@item PROCINFO["identifiers"]
+@cindex program identifiers
+A subarray, indexed by the names of all identifiers used in the text of
+the AWK program. An @dfn{identifier} is simply the name of a variable
+(be it scalar or array), built-in function, user-defined function, or
+extension function. For each identifier, the value of the element is
+one of the following:
+
+@table @code
+@item "array"
+The identifier is an array.
+
+@item "builtin"
+The identifier is a built-in function.
+
+@item "extension"
+The identifier is an extension function loaded via
+@code{@@load} or @option{-l}.
+
+@item "scalar"
+The identifier is a scalar.
+
+@item "untyped"
+The identifier is untyped (could be used as a scalar or array,
+@command{gawk} doesn't know yet).
+
+@item "user"
+The identifier is a user-defined function.
+@end table
+
+@noindent
+The values indicate what @command{gawk} knows about the identifiers
+after it has finished parsing the program; they are @emph{not} updated
+while the program runs.
+
+@item PROCINFO["gid"]
+@cindex group ID of @command{gawk} user
+The value of the @code{getgid()} system call.
+
+@item PROCINFO["pgrpid"]
+@cindex process group idIDof @command{gawk} process
+The process group ID of the current process.
+
+@item PROCINFO["pid"]
+@cindex process ID of @command{gawk} process
+The process ID of the current process.
+
+@item PROCINFO["ppid"]
+@cindex parent process ID of @command{gawk} process
+The parent process ID of the current process.
+
+@item PROCINFO["sorted_in"]
+If this element exists in @code{PROCINFO}, its value controls the
+order in which array indices will be processed by
+@samp{for (@var{indx} in @var{array})} loops.
+This is an advanced feature, so we defer the
+full description until later; see
+@ref{Scanning an Array}.
+
+@item PROCINFO["strftime"]
+The default time format string for @code{strftime()}.
+Assigning a new value to this element changes the default.
+@xref{Time Functions}.
+
+@item PROCINFO["uid"]
+The value of the @code{getuid()} system call.
+
+@item PROCINFO["version"]
+@cindex version of @command{gawk}
+@cindex @command{gawk} version
+The version of @command{gawk}.
+@end table
+
+The following additional elements in the array
+are available to provide information about the MPFR and GMP libraries
+if your version of @command{gawk} supports arbitrary-precision arithmetic
+(@pxref{Arbitrary Precision Arithmetic}):
+
+@table @code
+@cindex version of GNU MPFR library
+@item PROCINFO["mpfr_version"]
+The version of the GNU MPFR library.
+
+@item PROCINFO["gmp_version"]
+@cindex version of GNU MP library
+The version of the GNU MP library.
+
+@item PROCINFO["prec_max"]
+@cindex maximum precision supported by MPFR library
+The maximum precision supported by MPFR.
+
+@item PROCINFO["prec_min"]
+@cindex minimum precision supported by MPFR library
+The minimum precision required by MPFR.
+@end table
+
+The following additional elements in the array are available to provide
+information about the version of the extension API, if your version
+of @command{gawk} supports dynamic loading of extension functions
+(@pxref{Dynamic Extensions}):
+
+@table @code
+@item PROCINFO["api_major"]
+@cindex version of @command{gawk} extension API
+@cindex extension API, version number
+The major version of the extension API.
+
+@item PROCINFO["api_minor"]
+The minor version of the extension API.
+@end table
+
+@cindex supplementary groups of @command{gawk} process
+On some systems, there may be elements in the array, @code{"group1"}
+through @code{"group@var{N}"} for some @var{N}. @var{N} is the number of
+supplementary groups that the process has. Use the @code{in} operator
+to test for these elements
+(@pxref{Reference to Elements}).
+
+@cindex @command{gawk}, @code{PROCINFO} array in
+@cindex @code{PROCINFO} array, uses
+The @code{PROCINFO} array has the following additional uses:
+
+@itemize @value{BULLET}
+@item
+It may be used to provide a timeout when reading from any
+open input file, pipe, or coprocess.
+@DBXREF{Read Timeout} for more information.
+
+@item
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
+@end itemize
+
+@cindex @code{RLENGTH} variable
+@item @code{RLENGTH}
+The length of the substring matched by the
+@code{match()} function
+(@pxref{String Functions}).
+@code{RLENGTH} is set by invoking the @code{match()} function. Its value
+is the length of the matched string, or @minus{}1 if no match is found.
+
+@cindex @code{RSTART} variable
+@item @code{RSTART}
+The start-index in characters of the substring that is matched by the
+@code{match()} function
+(@pxref{String Functions}).
+@code{RSTART} is set by invoking the @code{match()} function. Its value
+is the position of the string where the matched substring starts, or zero
+if no match was found.
+
+@cindex @command{gawk}, @code{RT} variable in
+@cindex @code{RT} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{RT} variable
+@item @code{RT #}
+The input text that matched the text denoted by @code{RS},
+the record separator. It is set every time a record is read.
+
+@cindex @command{gawk}, @code{SYMTAB} array in
+@cindex @code{SYMTAB} array
+@cindex differences in @command{awk} and @command{gawk}, @code{SYMTAB} variable
+@item @code{SYMTAB #}
+An array whose indices are the names of all defined global variables and
+arrays in the program. @code{SYMTAB} makes @command{gawk}'s symbol table
+visible to the @command{awk} programmer. It is built as @command{gawk}
+parses the program and is complete before the program starts to run.
+
+The array may be used for indirect access to read or write the value of
+a variable:
+
+@example
+foo = 5
+SYMTAB["foo"] = 4
+print foo # prints 4
+@end example
+
+@noindent
+The @code{isarray()} function (@pxref{Type Functions}) may be used to test
+if an element in @code{SYMTAB} is an array.
+Also, you may not use the @code{delete} statement with the
+@code{SYMTAB} array.
+
+You may use an index for @code{SYMTAB} that is not a predefined identifier:
+
+@example
+SYMTAB["xxx"] = 5
+print SYMTAB["xxx"]
+@end example
+
+@noindent
+This works as expected: in this case @code{SYMTAB} acts just like
+a regular array. The only difference is that you can't then delete
+@code{SYMTAB["xxx"]}.
+
+@cindex Schorr, Andrew
+The @code{SYMTAB} array is more interesting than it looks. Andrew Schorr
+points out that it effectively gives @command{awk} data pointers. Consider his
+example:
+
+@example
+# Indirect multiply of any variable by amount, return result
+
+function multiply(variable, amount)
+@{
+ return SYMTAB[variable] *= amount
+@}
+@end example
+
+@quotation NOTE
+In order to avoid severe time-travel paradoxes,@footnote{Not to mention difficult
+implementation issues.} neither @code{FUNCTAB} nor @code{SYMTAB}
+are available as elements within the @code{SYMTAB} array.
+@end quotation
+@end table
+
+@sidebar Changing @code{NR} and @code{FNR}
+@cindex @code{NR} variable, changing
+@cindex @code{FNR} variable, changing
+@cindex dark corner, @code{FNR}/@code{NR} variables
+@command{awk} increments @code{NR} and @code{FNR}
+each time it reads a record, instead of setting them to the absolute
+value of the number of records read. This means that a program can
+change these variables and their new values are incremented for
+each record.
+@value{DARKCORNER}
+The following example shows this:
+
+@example
+$ @kbd{echo '1}
+> @kbd{2}
+> @kbd{3}
+> @kbd{4' | awk 'NR == 2 @{ NR = 17 @}}
+> @kbd{@{ print NR @}'}
+@print{} 1
+@print{} 17
+@print{} 18
+@print{} 19
+@end example
+
+@noindent
+Before @code{FNR} was added to the @command{awk} language
+(@pxref{V7/SVR3.1}),
+many @command{awk} programs used this feature to track the number of
+records in a file by resetting @code{NR} to zero when @code{FILENAME}
+changed.
+@end sidebar
+
+@node ARGC and ARGV
+@subsection Using @code{ARGC} and @code{ARGV}
+@cindex @code{ARGC}/@code{ARGV} variables, how to use
+@cindex arguments, command-line
+@cindex command line, arguments
+
+@DBREF{Auto-set}
+presented the following program describing the information contained in @code{ARGC}
+and @code{ARGV}:
+
+@example
+$ @kbd{awk 'BEGIN @{}
+> @kbd{for (i = 0; i < ARGC; i++)}
+> @kbd{print ARGV[i]}
+> @kbd{@}' inventory-shipped mail-list}
+@print{} awk
+@print{} inventory-shipped
+@print{} mail-list
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @samp{awk}, @code{ARGV[1]}
+contains @samp{inventory-shipped}, and @code{ARGV[2]} contains
+@samp{mail-list}.
+Notice that the @command{awk} program is not entered in @code{ARGV}. The
+other command-line options, with their arguments, are also not
+entered. This includes variable assignments done with the @option{-v}
+option (@pxref{Options}).
+Normal variable assignments on the command line @emph{are}
+treated as arguments and do show up in the @code{ARGV} array.
+Given the following program in a file named @file{showargs.awk}:
+
+@example
+BEGIN @{
+ printf "A=%d, B=%d\n", A, B
+ for (i = 0; i < ARGC; i++)
+ printf "\tARGV[%d] = %s\n", i, ARGV[i]
+@}
+END @{ printf "A=%d, B=%d\n", A, B @}
+@end example
+
+@noindent
+Running it produces the following:
+
+@example
+$ @kbd{awk -v A=1 -f showargs.awk B=2 /dev/null}
+@print{} A=1, B=0
+@print{} ARGV[0] = awk
+@print{} ARGV[1] = B=2
+@print{} ARGV[2] = /dev/null
+@print{} A=1, B=2
+@end example
+
+A program can alter @code{ARGC} and the elements of @code{ARGV}.
+Each time @command{awk} reaches the end of an input file, it uses the next
+element of @code{ARGV} as the name of the next input file. By storing a
+different string there, a program can change which files are read.
+Use @code{"-"} to represent the standard input. Storing
+additional elements and incrementing @code{ARGC} causes
+additional files to be read.
+
+If the value of @code{ARGC} is decreased, that eliminates input files
+from the end of the list. By recording the old value of @code{ARGC}
+elsewhere, a program can treat the eliminated arguments as
+something other than @value{FN}s.
+
+To eliminate a file from the middle of the list, store the null string
+(@code{""}) into @code{ARGV} in place of the file's name. As a
+special feature, @command{awk} ignores @value{FN}s that have been
+replaced with the null string.
+Another option is to
+use the @code{delete} statement to remove elements from
+@code{ARGV} (@pxref{Delete}).
+
+All of these actions are typically done in the @code{BEGIN} rule,
+before actual processing of the input begins.
+@DBXREF{Split Program} and
+@ifnotdocbook
+@DBPXREF{Tee Program}
+@end ifnotdocbook
+@ifdocbook
+@DBREF{Tee Program}
+@end ifdocbook
+for examples
+of each way of removing elements from @code{ARGV}.
+
+To actually get options into an @command{awk} program,
+end the @command{awk} options with @option{--} and then supply
+the @command{awk} program's options, in the following manner:
+
+@example
+awk -f myprog.awk -- -v -q file1 file2 @dots{}
+@end example
+
+The following fragment processes @code{ARGV} in order to examine, and
+then remove, the previously mentioned command-line options:
+
+@example
+BEGIN @{
+ for (i = 1; i < ARGC; i++) @{
+ if (ARGV[i] == "-v")
+ verbose = 1
+ else if (ARGV[i] == "-q")
+ debug = 1
+ else if (ARGV[i] ~ /^-./) @{
+ e = sprintf("%s: unrecognized option -- %c",
+ ARGV[0], substr(ARGV[i], 2, 1))
+ print e > "/dev/stderr"
+ @} else
+ break
+ delete ARGV[i]
+ @}
+@}
+@end example
+
+@cindex differences in @command{awk} and @command{gawk}, @code{ARGC}/@code{ARGV} variables
+Ending the @command{awk} options with @option{--} isn't
+necessary in @command{gawk}. Unless @option{--posix} has
+been specified, @command{gawk} silently puts any unrecognized options
+into @code{ARGV} for the @command{awk} program to deal with. As soon
+as it sees an unknown option, @command{gawk} stops looking for other
+options that it might otherwise recognize. The previous command line with
+@command{gawk} would be:
+
+@example
+gawk -f myprog.awk -q -v file1 file2 @dots{}
+@end example
+
+@noindent
+Because @option{-q} is not a valid @command{gawk} option, it and the
+following @option{-v} are passed on to the @command{awk} program.
+(@DBXREF{Getopt Function} for an @command{awk} library function that
+parses command-line options.)
+
+When designing your program, you should choose options that don't
+conflict with @command{gawk}'s, because it will process any options
+that it accepts before passing the rest of the command line on to
+your program. Using @samp{#!} with the @option{-E} option may help
+(@DBXREF{Executable Scripts}
+and
+@ifnotdocbook
+@DBPXREF{Options}).
+@end ifnotdocbook
+@ifdocbook
+@DBREF{Options}).
+@end ifdocbook
+
+@node Pattern Action Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Pattern-action pairs make up the basic elements of an @command{awk}
+program. Patterns are either normal expressions, range expressions,
+regexp constants, one of the special keywords @code{BEGIN}, @code{END},
+@code{BEGINFILE}, @code{ENDFILE}, or empty. The action executes if
+the current record matches the pattern. Empty (missing) patterns match
+all records.
+
+@item
+I/O from @code{BEGIN} and @code{END} rules have certain constraints.
+This is also true, only more so, for @code{BEGINFILE} and @code{ENDFILE}
+rules. The latter two give you ``hooks'' into @command{gawk}'s file
+processing, allowing you to recover from a file that otherwise would
+cause a fatal error (such as a file that cannot be opened).
+
+@item
+Shell variables can be used in @command{awk} programs by careful
+use of shell quoting. It is easier to pass a shell variable into
+@command{awk} by using the @option{-v} option and an @command{awk}
+variable.
+
+@item
+Actions consist of statements enclosed in curly braces. Statements
+are built up from expressions, control statements, compound statements,
+input and output statements, and deletion statements.
+
+@item
+The control statements in @command{awk} are @code{if}-@code{else},
+@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
+adds the @code{switch} statement. There are two flavors of @code{for}
+statement: one for performing general looping, and the other for iterating
+through an array.
+
+@item
+@code{break} and @code{continue} let you exit early or start the next
+iteration of a loop (or get out of a @code{switch}).
+
+@item
+@code{next} and @code{nextfile} let you read the next record and start
+over at the top of your program, or skip to the next input file and
+start over, respectively.
+
+@item
+The @code{exit} statement terminates your program. When executed
+from an action (or function body) it transfers control to the
+@code{END} statements. From an @code{END} statement body, it exits
+immediately. You may pass an optional numeric value to be used
+as @command{awk}'s exit status.
+
+@item
+Some predefined variables provide control over @command{awk}, mainly for I/O.
+Other variables convey information from @command{awk} to your program.
+
+@item
+@code{ARGC} and @code{ARGV} make the command-line arguments available
+to your program. Manipulating them from a @code{BEGIN} rule lets you
+control how @command{awk} will process the provided @value{DF}s.
+
+@end itemize
+
+@node Arrays
+@chapter Arrays in @command{awk}
+@cindex arrays
+
+An @dfn{array} is a table of values called @dfn{elements}. The
+elements of an array are distinguished by their @dfn{indices}. Indices
+may be either numbers or strings.
+
+This @value{CHAPTER} describes how arrays work in @command{awk},
+how to use array elements, how to scan through every element in an array,
+and how to remove array elements.
+It also describes how @command{awk} simulates multidimensional
+arrays, as well as some of the less obvious points about array usage.
+The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
+for sorting arrays, and ends with a brief description of @command{gawk}'s
+ability to support true arrays of arrays.
+
+@menu
+* Array Basics:: The basics of arrays.
+* Numeric Array Subscripts:: How to use numbers as subscripts in
+ @command{awk}.
+* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
+* Multidimensional:: Emulating multidimensional arrays in
+ @command{awk}.
+* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
+@end menu
+
+@node Array Basics
+@section The Basics of Arrays
+
+This @value{SECTION} presents the basics: working with elements
+in arrays one at a time, and traversing all of the elements in
+an array.
+
+@menu
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement. It
+ loops through the indices of an array's
+ existing elements.
+* Controlling Scanning:: Controlling the order in which arrays are
+ scanned.
+@end menu
+
+@node Array Intro
+@subsection Introduction to Arrays
+
+@cindex Wall, Larry
+@quotation
+@i{Doing linear scans over an associative array is like trying to club someone
+to death with a loaded Uzi.}
+@author Larry Wall
+@end quotation
+
+The @command{awk} language provides one-dimensional arrays
+for storing groups of related strings or numbers.
+Every @command{awk} array must have a name. Array names have the same
+syntax as variable names; any valid variable name would also be a valid
+array name. But one name cannot be used in both ways (as an array and
+as a variable) in the same @command{awk} program.
+
+Arrays in @command{awk} superficially resemble arrays in other programming
+languages, but there are fundamental differences. In @command{awk}, it
+isn't necessary to specify the size of an array before starting to use it.
+Additionally, any number or string, not just consecutive integers,
+may be used as an array index.
+
+In most other languages, arrays must be @dfn{declared} before use,
+including a specification of
+how many elements or components they contain. In such languages, the
+declaration causes a contiguous block of memory to be allocated for that
+many elements. Usually, an index in the array must be a positive integer.
+For example, the index zero specifies the first element in the array, which is
+actually stored at the beginning of the block of memory. Index one
+specifies the second element, which is stored in memory right after the
+first element, and so on. It is impossible to add more elements to the
+array, because it has room only for as many elements as given in
+the declaration.
+(Some languages allow arbitrary starting and ending
+indices---e.g., @samp{15 .. 27}---but the size of the array is still fixed when
+the array is declared.)
+
+A contiguous array of four elements might look like the following example,
+conceptually, if the element values are 8, @code{"foo"},
+@code{""}, and 30
+@ifnotdocbook
+as shown in @ref{figure-array-elements}:
+@end ifnotdocbook
+@ifdocbook
+as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}:
+@end ifdocbook
+
+@ifnotdocbook
+@float Figure,figure-array-elements
+@caption{A contiguous array}
+@ifinfo
+@center @image{array-elements, , , Basic Program Stages, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{array-elements, , , Basic Program Stages}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-array-elements" float="0">
+<title>A contiguous array</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="array-elements.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
+
+@noindent
+Only the values are stored; the indices are implicit from the order of
+the values. Here, 8 is the value at index zero, because 8 appears in the
+position with zero elements before it.
+
+@cindex arrays, indexing
+@cindex indexing arrays
+@cindex associative arrays
+@cindex arrays, associative
+Arrays in @command{awk} are different---they are @dfn{associative}. This means
+that each array is a collection of pairs---an index and its corresponding
+array element value:
+
+@ifnotdocbook
+@example
+@r{Index} 3 @r{Value} 30
+@r{Index} 1 @r{Value} "foo"
+@r{Index} 0 @r{Value} 8
+@r{Index} 2 @r{Value} ""
+@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+
+<tbody>
+<row>
+<entry><literal>3</literal></entry>
+<entry><literal>30</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"foo"</literal></entry>
+</row>
+
+<row>
+<entry><literal>0</literal></entry>
+<entry><literal>8</literal></entry>
+</row>
+
+<row>
+<entry><literal>2</literal></entry>
+<entry><literal>""</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
+
+@noindent
+The pairs are shown in jumbled order because their order is
+irrelevant.@footnote{The ordering will vary among @command{awk}
+implementations, which typically use hash tables to store array elements
+and values.}
+
+One advantage of associative arrays is that new pairs can be added
+at any time. For example, suppose a tenth element is added to the array
+whose value is @w{@code{"number ten"}}. The result is:
+
+@ifnotdocbook
+@example
+@r{Index} 10 @r{Value} "number ten"
+@r{Index} 3 @r{Value} 30
+@r{Index} 1 @r{Value} "foo"
+@r{Index} 0 @r{Value} 8
+@r{Index} 2 @r{Value} ""
+@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+<tbody>
+
+<row>
+<entry><literal>10</literal></entry>
+<entry><literal>"number ten"</literal></entry>
+</row>
+
+<row>
+<entry><literal>3</literal></entry>
+<entry><literal>30</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"foo"</literal></entry>
+</row>
+
+<row>
+<entry><literal>0</literal></entry>
+<entry><literal>8</literal></entry>
+</row>
+
+<row>
+<entry><literal>2</literal></entry>
+<entry><literal>""</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
+
+@noindent
+@cindex sparse arrays
+@cindex arrays, sparse
+Now the array is @dfn{sparse}, which just means some indices are missing.
+It has elements 0--3 and 10, but doesn't have elements 4, 5, 6, 7, 8, or 9.
+
+Another consequence of associative arrays is that the indices don't
+have to be positive integers. Any number, or even a string, can be
+an index. For example, the following is an array that translates words from
+English to French:
+
+@ifnotdocbook
+@example
+@r{Index} "dog" @r{Value} "chien"
+@r{Index} "cat" @r{Value} "chat"
+@r{Index} "one" @r{Value} "un"
+@r{Index} 1 @r{Value} "un"
+@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+<tbody>
+<row>
+<entry><literal>"dog"</literal></entry>
+<entry><literal>"chien"</literal></entry>
+</row>
+
+<row>
+<entry><literal>"cat"</literal></entry>
+<entry><literal>"chat"</literal></entry>
+</row>
+
+<row>
+<entry><literal>"one"</literal></entry>
+<entry><literal>"un"</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"un"</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
+
+@noindent
+Here we decided to translate the number one in both spelled-out and
+numeric form---thus illustrating that a single array can have both
+numbers and strings as indices.
+(In fact, array subscripts are always strings.
+There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in
+@ref{Numeric Array Subscripts}.)
+Here, the number @code{1} isn't double quoted, because @command{awk}
+automatically converts it to a string.
+
+@cindex @command{gawk}, @code{IGNORECASE} variable in
+@cindex case sensitivity, array indices and
+@cindex arrays, and @code{IGNORECASE} variable
+@cindex @code{IGNORECASE} variable, and array indices
+The value of @code{IGNORECASE} has no effect upon array subscripting.
+The identical string value used to store an array element must be used
+to retrieve it.
+When @command{awk} creates an array (e.g., with the @code{split()}
+built-in function),
+that array's indices are consecutive integers starting at one.
+(@xref{String Functions}.)
+
+@command{awk}'s arrays are efficient---the time to access an element
+is independent of the number of elements in the array.
+
+@node Reference to Elements
+@subsection Referring to an Array Element
+@cindex arrays, referencing elements
+@cindex array members
+@cindex elements of arrays
+
+The principal way to use an array is to refer to one of its elements.
+An array reference is an expression as follows:
+
+@example
+@var{array}[@var{index-expression}]
+@end example
+
+@noindent
+Here, @var{array} is the name of an array. The expression @var{index-expression} is
+the index of the desired element of the array.
+
+The value of the array reference is the current value of that array
+element. For example, @code{foo[4.3]} is an expression for the element
+of array @code{foo} at index @samp{4.3}.
+
+@cindex arrays, unassigned elements
+@cindex unassigned array elements
+@cindex empty array elements
+A reference to an array element that has no recorded value yields a value of
+@code{""}, the null string. This includes elements
+that have not been assigned any value as well as elements that have been
+deleted (@pxref{Delete}).
+
+@cindex non-existent array elements
+@cindex arrays, elements that don't exist
+@quotation NOTE
+A reference to an element that does not exist @emph{automatically} creates
+that array element, with the null string as its value. (In some cases,
+this is unfortunate, because it might waste memory inside @command{awk}.)
+
+Novice @command{awk} programmers often make the mistake of checking if
+an element exists by checking if the value is empty:
+
+@example
+# Check if "foo" exists in a: @ii{Incorrect!}
+if (a["foo"] != "") @dots{}
+@end example
+
+@noindent
+This is incorrect for two reasons. First, it @emph{creates} @code{a["foo"]}
+if it didn't exist before! Second, it is valid (if a bit unusual) to set
+an array element equal to the empty string.
+@end quotation
+
+@c @cindex arrays, @code{in} operator and
+@cindex @code{in} operator, testing if array element exists
+To determine whether an element exists in an array at a certain index, use
+the following expression:
+
+@example
+@var{indx} in @var{array}
+@end example
+
+@cindex side effects, array indexing
+@noindent
+This expression tests whether the particular index @var{indx} exists,
+without the side effect of creating that element if it is not present.
+The expression has the value one (true) if @code{@var{array}[@var{indx}]}
+exists and zero (false) if it does not exist.
+(We use @var{indx} here, because @samp{index} is the name of a built-in
+function.)
+For example, this statement tests whether the array @code{frequencies}
+contains the index @samp{2}:
+
+@example
+if (2 in frequencies)
+ print "Subscript 2 is present."
+@end example
+
+Note that this is @emph{not} a test of whether the array
+@code{frequencies} contains an element whose @emph{value} is two.
+There is no way to do that except to scan all the elements. Also, this
+@emph{does not} create @code{frequencies[2]}, while the following
+(incorrect) alternative does:
+
+@example
+if (frequencies[2] != "")
+ print "Subscript 2 is present."
+@end example
+
+@node Assigning Elements
+@subsection Assigning Array Elements
+@cindex arrays, elements, assigning values
+@cindex elements in arrays, assigning values
+
+Array elements can be assigned values just like
+@command{awk} variables:
+
+@example
+@var{array}[@var{index-expression}] = @var{value}
+@end example
+
+@noindent
+@var{array} is the name of an array. The expression
+@var{index-expression} is the index of the element of the array that is
+assigned a value. The expression @var{value} is the value to
+assign to that element of the array.
+
+@node Array Example
+@subsection Basic Array Example
+@cindex arrays, an example of using
+
+The following program takes a list of lines, each beginning with a line
+number, and prints them out in order of line number. The line numbers
+are not in order when they are first read---instead they
+are scrambled. This program sorts the lines by making an array using
+the line numbers as subscripts. The program then prints out the lines
+in sorted order of their numbers. It is a very simple program and gets
+confused upon encountering repeated numbers, gaps, or lines that don't
+begin with a number:
+
+@example
+@c file eg/misc/arraymax.awk
+@{
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
+@}
+
+END @{
+ for (x = 1; x <= max; x++)
+ print arr[x]
+@}
+@c endfile
+@end example
+
+The first rule keeps track of the largest line number seen so far;
+it also stores each line into the array @code{arr}, at an index that
+is the line's number.
+The second rule runs after all the input has been read, to print out
+all the lines.
+When this program is run with the following input:
+
+@example
+@c file eg/misc/arraymax.data
+5 I am the Five man
+2 Who are you? The new number two!
+4 . . . And four on the floor
+1 Who is number one?
+3 I three you.
+@c endfile
+@end example
+
+@noindent
+Its output is:
+
+@example
+1 Who is number one?
+2 Who are you? The new number two!
+3 I three you.
+4 . . . And four on the floor
+5 I am the Five man
+@end example
+
+If a line number is repeated, the last line with a given number overrides
+the others.
+Gaps in the line numbers can be handled with an easy improvement to the
+program's @code{END} rule, as follows:
+
+@example
+END @{
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
+@}
+@end example
+
+@node Scanning an Array
+@subsection Scanning All Elements of an Array
+@cindex elements in arrays, scanning
+@cindex scanning arrays
+@cindex arrays, scanning
+@cindex loops, @code{for}, array scanning
+
+In programs that use arrays, it is often necessary to use a loop that
+executes once for each element of an array. In other languages, where
+arrays are contiguous and indices are limited to positive integers,
+this is easy: all the valid indices can be found by counting from
+the lowest index up to the highest. This technique won't do the job
+in @command{awk}, because any number or string can be an array index.
+So @command{awk} has a special kind of @code{for} statement for scanning
+an array:
+
+@example
+for (@var{var} in @var{array})
+ @var{body}
+@end example
+
+@noindent
+@cindex @code{in} operator, use in loops
+This loop executes @var{body} once for each index in @var{array} that the
+program has previously used, with the variable @var{var} set to that index.
+
+@cindex arrays, @code{for} statement and
+@cindex @code{for} statement, looping over arrays
+The following program uses this form of the @code{for} statement. The
+first rule scans the input records and notes which words appear (at
+least once) in the input, by storing a one into the array @code{used} with
+the word as index. The second rule scans the elements of @code{used} to
+find all the distinct words that appear in the input. It prints each
+word that is more than 10 characters long and also prints the number of
+such words.
+@DBXREF{String Functions}
+for more information on the built-in function @code{length()}.
+
+@example
+# Record a 1 for each word that is used at least once
+@{
+ for (i = 1; i <= NF; i++)
+ used[$i] = 1
+@}
+
+# Find number of distinct words more than 10 characters long
+END @{
+ for (x in used) @{
+ if (length(x) > 10) @{
+ ++num_long_words
+ print x
+ @}
+ @}
+ print num_long_words, "words longer than 10 characters"
+@}
+@end example
+
+@noindent
+@DBXREF{Word Sorting}
+for a more detailed example of this type.
+
+@cindex arrays, elements, order of access by @code{in} operator
+@cindex elements in arrays, order of access by @code{in} operator
+@cindex @code{in} operator, order of array access
+The order in which elements of the array are accessed by this statement
+is determined by the internal arrangement of the array elements within
+@command{awk} and in standard @command{awk} cannot be controlled
+or changed. This can lead to problems if new elements are added to
+@var{array} by statements in the loop body; it is not predictable whether
+the @code{for} loop will reach them. Similarly, changing @var{var} inside
+the loop may produce strange results. It is best to avoid such things.
+
+As a point of information, @command{gawk} sets up the list of elements
+to be iterated over before the loop starts, and does not change it.
+But not all @command{awk} versions do so. Consider this program, named
+@file{loopcheck.awk}:
+
+@example
+BEGIN @{
+ a["here"] = "here"
+ a["is"] = "is"
+ a["a"] = "a"
+ a["loop"] = "loop"
+ for (i in a) @{
+ j++
+ a[j] = j
+ print i
+ @}
+@}
+@end example
+
+Here is what happens when run with @command{gawk} (and @command{mawk}):
+
+@example
+$ @kbd{gawk -f loopcheck.awk}
+@print{} here
+@print{} loop
+@print{} a
+@print{} is
+@end example
+
+Contrast this to BWK @command{awk}:
+
+@example
+$ @kbd{nawk -f loopcheck.awk}
+@print{} loop
+@print{} here
+@print{} is
+@print{} a
+@print{} 1
+@end example
+
+@node Controlling Scanning
+@subsection Using Predefined Array Scanning Orders with @command{gawk}
+
+This @value{SUBSECTION} describes a feature that is specific to @command{gawk}.
+
+By default, when a @code{for} loop traverses an array, the order
+is undefined, meaning that the @command{awk} implementation
+determines the order in which the array is traversed.
+This order is usually based on the internal implementation of arrays
+and will vary from one version of @command{awk} to the next.
+
+@cindex array scanning order, controlling
+@cindex controlling array scanning order
+Often, though, you may wish to do something simple, such as
+``traverse the array by comparing the indices in ascending order,''
+or ``traverse the array by comparing the values in descending order.''
+@command{gawk} provides two mechanisms which give you this control.
+
+@itemize @value{BULLET}
+@item
+Set @code{PROCINFO["sorted_in"]} to one of a set of predefined values.
+We describe this now.
+
+@item
+Set @code{PROCINFO["sorted_in"]} to the name of a user-defined function
+to use for comparison of array elements. This advanced feature
+is described later in @ref{Array Sorting}.
+@end itemize
+
+@cindex @code{PROCINFO}, values of @code{sorted_in}
+The following special values for @code{PROCINFO["sorted_in"]} are available:
+
+@table @code
+@item "@@unsorted"
+Array elements are processed in arbitrary order, which is the default
+@command{awk} behavior.
+
+@item "@@ind_str_asc"
+Order by indices in ascending order compared as strings; this is the most basic sort.
+(Internally, array indices are always strings, so with @samp{a[2*5] = 1}
+the index is @code{"10"} rather than numeric 10.)
+
+@item "@@ind_num_asc"
+Order by indices in ascending order but force them to be treated as numbers in the process.
+Any index with a non-numeric value will end up positioned as if it were zero.
+
+@item "@@val_type_asc"
+Order by element values in ascending order (rather than by indices).
+Ordering is by the type assigned to the element
+(@pxref{Typing and Comparison}).
+All numeric values come before all string values,
+which in turn come before all subarrays.
+(Subarrays have not been described yet;
+@pxref{Arrays of Arrays}.)
+
+@item "@@val_str_asc"
+Order by element values in ascending order (rather than by indices). Scalar values are
+compared as strings. Subarrays, if present, come out last.
+
+@item "@@val_num_asc"
+Order by element values in ascending order (rather than by indices). Scalar values are
+compared as numbers. Subarrays, if present, come out last.
+When numeric values are equal, the string values are used to provide
+an ordering: this guarantees consistent results across different
+versions of the C @code{qsort()} function,@footnote{When two elements
+compare as equal, the C @code{qsort()} function does not guarantee
+that they will maintain their original relative order after sorting.
+Using the string value to provide a unique ordering when the numeric
+values are equal ensures that @command{gawk} behaves consistently
+across different environments.} which @command{gawk} uses internally
+to perform the sorting.
+
+@item "@@ind_str_desc"
+String indices ordered from high to low.
+
+@item "@@ind_num_desc"
+Numeric indices ordered from high to low.
+
+@item "@@val_type_desc"
+Element values, based on type, ordered from high to low.
+Subarrays, if present, come out first.
+
+@item "@@val_str_desc"
+Element values, treated as strings, ordered from high to low.
+Subarrays, if present, come out first.
+
+@item "@@val_num_desc"
+Element values, treated as numbers, ordered from high to low.
+Subarrays, if present, come out first.
+@end table
+
+The array traversal order is determined before the @code{for} loop
+starts to run. Changing @code{PROCINFO["sorted_in"]} in the loop body
+does not affect the loop.
+For example:
+
+@example
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
+> @kbd{ a[4] = 4}
+> @kbd{ a[3] = 3}
+> @kbd{ for (i in a)}
+> @kbd{ print i, a[i]}
+> @kbd{@}'}
+@print{} 4 4
+@print{} 3 3
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
+> @kbd{ PROCINFO["sorted_in"] = "@@ind_str_asc"}
+> @kbd{ a[4] = 4}
+> @kbd{ a[3] = 3}
+> @kbd{ for (i in a)}
+> @kbd{ print i, a[i]}
+> @kbd{@}'}
+@print{} 3 3
+@print{} 4 4
+@end example
+
+When sorting an array by element values, if a value happens to be
+a subarray then it is considered to be greater than any string or
+numeric value, regardless of what the subarray itself contains,
+and all subarrays are treated as being equal to each other. Their
+order relative to each other is determined by their index strings.
+
+Here are some additional things to bear in mind about sorted
+array traversal:
+
+@itemize @value{BULLET}
+@item
+The value of @code{PROCINFO["sorted_in"]} is global. That is, it affects
+all array traversal @code{for} loops. If you need to change it within your
+own code, you should see if it's defined and save and restore the value:
+
+@example
+@dots{}
+if ("sorted_in" in PROCINFO) @{
+ save_sorted = PROCINFO["sorted_in"]
+ PROCINFO["sorted_in"] = "@@val_str_desc" # or whatever
+@}
+@dots{}
+if (save_sorted)
+ PROCINFO["sorted_in"] = save_sorted
+@end example
+
+@item
+As already mentioned, the default array traversal order is represented by
+@code{"@@unsorted"}. You can also get the default behavior by assigning
+the null string to @code{PROCINFO["sorted_in"]} or by just deleting the
+@code{"sorted_in"} element from the @code{PROCINFO} array with
+the @code{delete} statement.
+(The @code{delete} statement hasn't been described yet; @pxref{Delete}.)
+@end itemize
+
+In addition, @command{gawk} provides built-in functions for
+sorting arrays; see @ref{Array Sorting Functions}.
+
+@node Numeric Array Subscripts
+@section Using Numbers to Subscript Arrays
+
+@cindex numbers, as array subscripts
+@cindex arrays, numeric subscripts
+@cindex subscripts in arrays, numbers as
+@cindex @code{CONVFMT} variable, and array subscripts
+An important aspect to remember about arrays is that @emph{array subscripts
+are always strings}. When a numeric value is used as a subscript,
+it is converted to a string value before being used for subscripting
+(@pxref{Conversion}).
+This means that the value of the predefined variable @code{CONVFMT} can
+affect how your program accesses elements of an array. For example:
+
+@example
+xyz = 12.153
+data[xyz] = 1
+CONVFMT = "%2.2f"
+if (xyz in data)
+ printf "%s is in data\n", xyz
+else
+ printf "%s is not in data\n", xyz
+@end example
+
+@noindent
+This prints @samp{12.15 is not in data}. The first statement gives
+@code{xyz} a numeric value. Assigning to
+@code{data[xyz]} subscripts @code{data} with the string value @code{"12.153"}
+(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}).
+Thus, the array element @code{data["12.153"]} is assigned the value one.
+The program then changes
+the value of @code{CONVFMT}. The test @samp{(xyz in data)} generates a new
+string value from @code{xyz}---this time @code{"12.15"}---because the value of
+@code{CONVFMT} only allows two significant digits. This test fails,
+because @code{"12.15"} is different from @code{"12.153"}.
+
+@cindex converting integer array subscripts
+@cindex integer array indices
+According to the rules for conversions
+(@pxref{Conversion}), integer
+values always convert to strings as integers, no matter what the
+value of @code{CONVFMT} may happen to be. So the usual case of
+the following works:
+
+@example
+for (i = 1; i <= maxsub; i++)
+ @ii{do something with} array[i]
+@end example
+
+The ``integer values always convert to strings as integers'' rule
+has an additional consequence for array indexing.
+Octal and hexadecimal constants
+@ifnotdocbook
+(@pxref{Nondecimal-numbers})
+@end ifnotdocbook
+@ifdocbook
+(covered in @ref{Nondecimal-numbers})
+@end ifdocbook
+are converted internally into numbers, and their original form
+is forgotten. This means, for example, that @code{array[17]},
+@code{array[021]}, and @code{array[0x11]} all refer to the same element!
+
+As with many things in @command{awk}, the majority of the time
+things work as you would expect them to. But it is useful to have a precise
+knowledge of the actual rules, as they can sometimes have a subtle
+effect on your programs.
+
+@node Uninitialized Subscripts
+@section Using Uninitialized Variables as Subscripts
+
+@cindex variables, uninitialized@comma{} as array subscripts
+@cindex uninitialized variables, as array subscripts
+@cindex subscripts in arrays, uninitialized variables as
+@cindex arrays, subscripts, uninitialized variables as
+Suppose it's necessary to write a program
+to print the input data in reverse order.
+A reasonable attempt to do so (with some test
+data) might look like this:
+
+@example
+$ @kbd{echo 'line 1}
+> @kbd{line 2}
+> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
+> @kbd{END @{}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
+> @kbd{print l[i]}
+> @kbd{@}'}
+@print{} line 3
+@print{} line 2
+@end example
+
+Unfortunately, the very first line of input data did not appear in the
+output!
+
+Upon first glance, we would think that this program should have worked.
+The variable @code{lines}
+is uninitialized, and uninitialized variables have the numeric value zero.
+So, @command{awk} should have printed the value of @code{l[0]}.
+
+The issue here is that subscripts for @command{awk} arrays are @emph{always}
+strings. Uninitialized variables, when used as strings, have the
+value @code{""}, not zero. Thus, @samp{line 1} ends up stored in
+@code{l[""]}.
+The following version of the program works correctly:
+
+@example
+@{ l[lines++] = $0 @}
+END @{
+ for (i = lines - 1; i >= 0; i--)
+ print l[i]
+@}
+@end example
+
+Here, the @samp{++} forces @code{lines} to be numeric, thus making
+the ``old value'' numeric zero. This is then converted to @code{"0"}
+as the array subscript.
+
+@cindex null strings, as array subscripts
+@cindex dark corner, array subscripts
+@cindex lint checking, array subscripts
+Even though it is somewhat unusual, the null string
+(@code{""}) is a valid array subscript.
+@value{DARKCORNER}
+@command{gawk} warns about the use of the null string as a subscript
+if @option{--lint} is provided
+on the command line (@pxref{Options}).
+
+@node Delete
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements in arrays
+@cindex arrays, elements, deleting
+@cindex elements in arrays, deleting
+
+To remove an individual element of an array, use the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index-expression}]
+@end example
+
+Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never
+been referred to or been given a value.
+The following is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+Once an element is deleted, a subsequent @code{for} statement to scan the array
+does not report that element and the @code{in} operator to check for
+the presence of that element returns zero (i.e., false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+@cindex null strings, and deleting array elements
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+For example:
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+@cindex lint checking, array elements
+It is not an error to delete an element that does not exist.
+However, if @option{--lint} is provided on the command line
+(@pxref{Options}),
+@command{gawk} issues a warning message when an element that
+is not in the array is deleted.
+
+@cindex common extensions, @code{delete} to delete entire arrays
+@cindex extensions, common@comma{} @code{delete} to delete entire arrays
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex @code{delete} @var{array}
+@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
+All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the @code{delete} statement,
+as follows:
+
+
+@example
+delete @var{array}
+@end example
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+This form of the @code{delete} statement is also supported
+by BWK @command{awk} and @command{mawk}, as well as
+by a number of other implementations.
+
+@cindex Brian Kernighan's @command{awk}
+@quotation NOTE
+For many years, using @code{delete} without a subscript was a common
+extension. In September 2012, it was accepted for inclusion into the
+POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
+the Austin Group website}.
+@end quotation
+
+@cindex portability, deleting array elements
+@cindex Brennan, Michael
+The following statement provides a portable but nonobvious way to clear
+out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
+
+@example
+split("", array)
+@end example
+
+@cindex @code{split()} function, array elements@comma{} deleting
+The @code{split()} function
+(@pxref{String Functions})
+clears out the target array first. This call asks it to split
+apart the null string. Because there is no data to split out, the
+function simply clears the array and then returns.
+
+@quotation CAUTION
+Deleting all the elements from an array does not change its type; you cannot
+clear an array and then use the array's name as a scalar
+(i.e., a regular variable). For example, the following does not work:
+
+@example
+a[1] = 3
+delete a
+a = 3
+@end example
+@end quotation
+
+@node Multidimensional
+@section Multidimensional Arrays
+
+@menu
+* Multiscanning:: Scanning multidimensional arrays.
+@end menu
+
+@cindex subscripts in arrays, multidimensional
+@cindex arrays, multidimensional
+A @dfn{multidimensional array} is an array in which an element is identified
+by a sequence of indices instead of a single index. For example, a
+two-dimensional array requires two indices. The usual way (in many
+languages, including @command{awk}) to refer to an element of a
+two-dimensional array named @code{grid} is with
+@code{grid[@var{x},@var{y}]}.
+
+@cindex @code{SUBSEP} variable, and multidimensional arrays
+Multidimensional arrays are supported in @command{awk} through
+concatenation of indices into one string.
+@command{awk} converts the indices into strings
+(@pxref{Conversion}) and
+concatenates them together, with a separator between them. This creates
+a single string that describes the values of the separate indices. The
+combined string is used as a single index into an ordinary,
+one-dimensional array. The separator used is the value of the built-in
+variable @code{SUBSEP}.
+
+For example, suppose we evaluate the expression @samp{foo[5,12] = "value"}
+when the value of @code{SUBSEP} is @code{"@@"}. The numbers 5 and 12 are
+converted to strings and
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
+the array element @code{foo["5@@12"]} is set to @code{"value"}.
+
+Once the element's value is stored, @command{awk} has no record of whether
+it was stored with a single index or a sequence of indices. The two
+expressions @samp{foo[5,12]} and @w{@samp{foo[5 SUBSEP 12]}} are always
+equivalent.
+
+The default value of @code{SUBSEP} is the string @code{"\034"},
+which contains a nonprinting character that is unlikely to appear in an
+@command{awk} program or in most input data.
+The usefulness of choosing an unlikely character comes from the fact
+that index values that contain a string matching @code{SUBSEP} can lead to
+combined strings that are ambiguous. Suppose that @code{SUBSEP} is
+@code{"@@"}; then @w{@samp{foo["a@@b", "c"]}} and @w{@samp{foo["a",
+"b@@c"]}} are indistinguishable because both are actually
+stored as @samp{foo["a@@b@@c"]}.
+
+@cindex @code{in} operator, index existence in multidimensional arrays
+To test whether a particular index sequence exists in a
+multidimensional array, use the same operator (@code{in}) that is
+used for single-dimensional arrays. Write the whole sequence of indices
+in parentheses, separated by commas, as the left operand:
+
+@example
+if ((@var{subscript1}, @var{subscript2}, @dots{}) in @var{array})
+ @dots{}
+@end example
+
+Here is an example that treats its input as a two-dimensional array of
+fields; it rotates this array 90 degrees clockwise and prints the
+result. It assumes that all lines have the same number of
+elements:
+
+@example
+@{
+ if (max_nf < NF)
+ max_nf = NF
+ max_nr = NR
+ for (x = 1; x <= NF; x++)
+ vector[x, NR] = $x
+@}
+
+END @{
+ for (x = 1; x <= max_nf; x++) @{
+ for (y = max_nr; y >= 1; --y)
+ printf("%s ", vector[x, y])
+ printf("\n")
+ @}
+@}
+@end example
+
+@noindent
+When given the input:
+
+@example
+1 2 3 4 5 6
+2 3 4 5 6 1
+3 4 5 6 1 2
+4 5 6 1 2 3
+@end example
+
+@noindent
+the program produces the following output:
+
+@example
+4 3 2 1
+5 4 3 2
+6 5 4 3
+1 6 5 4
+2 1 6 5
+3 2 1 6
+@end example
+
+@node Multiscanning
+@subsection Scanning Multidimensional Arrays
+
+There is no special @code{for} statement for scanning a
+``multidimensional'' array. There cannot be one, because, in truth,
+@command{awk} does not have
+multidimensional arrays or elements---there is only a
+multidimensional @emph{way of accessing} an array.
+
+@cindex subscripts in arrays, multidimensional, scanning
+@cindex arrays, multidimensional, scanning
+@cindex scanning multidimensional arrays
+However, if your program has an array that is always accessed as
+multidimensional, you can get the effect of scanning it by combining
+the scanning @code{for} statement
+(@pxref{Scanning an Array}) with the
+built-in @code{split()} function
+(@pxref{String Functions}).
+It works in the following manner:
+
+@example
+for (combined in array) @{
+ split(combined, separate, SUBSEP)
+ @dots{}
+@}
+@end example
+
+@noindent
+This sets the variable @code{combined} to
+each concatenated combined index in the array, and splits it
+into the individual indices by breaking it apart where the value of
+@code{SUBSEP} appears. The individual indices then become the elements of
+the array @code{separate}.
+
+Thus, if a value is previously stored in @code{array[1, "foo"]}, then
+an element with index @code{"1\034foo"} exists in @code{array}. (Recall
+that the default value of @code{SUBSEP} is the character with code 034.)
+Sooner or later, the @code{for} statement finds that index and does an
+iteration with the variable @code{combined} set to @code{"1\034foo"}.
+Then the @code{split()} function is called as follows:
+
+@example
+split("1\034foo", separate, "\034")
+@end example
+
+@noindent
+The result is to set @code{separate[1]} to @code{"1"} and
+@code{separate[2]} to @code{"foo"}. Presto! The original sequence of
+separate indices is recovered.
+
+
+@node Arrays of Arrays
+@section Arrays of Arrays
+@cindex arrays of arrays
+
+@command{gawk} goes beyond standard @command{awk}'s multidimensional
+array access and provides true arrays of
+arrays. Elements of a subarray are referred to by their own indices
+enclosed in square brackets, just like the elements of the main array.
+For example, the following creates a two-element subarray at index @code{1}
+of the main array @code{a}:
+
+@example
+a[1][1] = 1
+a[1][2] = 2
+@end example
+
+This simulates a true two-dimensional array. Each subarray element can
+contain another subarray as a value, which in turn can hold other arrays
+as well. In this way, you can create arrays of three or more dimensions.
+The indices can be any @command{awk} expression, including scalars
+separated by commas (i.e., a regular @command{awk} simulated
+multidimensional subscript). So the following is valid in
+@command{gawk}:
+
+@example
+a[1][3][1, "name"] = "barney"
+@end example
+
+Each subarray and the main array can be of different length. In fact, the
+elements of an array or its subarray do not all have to have the same
+type. This means that the main array and any of its subarrays can be
+non-rectangular, or jagged in structure. You can assign a scalar value to
+the index @code{4} of the main array @code{a}, even though @code{a[1]}
+is itself an array and not a scalar:
+
+@example
+a[4] = "An element in a jagged array"
+@end example
+
+The terms @dfn{dimension}, @dfn{row}, and @dfn{column} are
+meaningless when applied
+to such an array, but we will use ``dimension'' henceforth to imply the
+maximum number of indices needed to refer to an existing element. The
+type of any element that has already been assigned cannot be changed
+by assigning a value of a different type. You have to first delete the
+current element, which effectively makes @command{gawk} forget about
+the element at that index:
+
+@example
+delete a[4]
+a[4][5][6][7] = "An element in a four-dimensional array"
+@end example
+
+@noindent
+This removes the scalar value from index @code{4} and then inserts a
+subarray of subarray of subarray containing a scalar. You can also
+delete an entire subarray or subarray of subarrays:
+
+@example
+delete a[4][5]
+a[4][5] = "An element in subarray a[4]"
+@end example
+
+But recall that you can not delete the main array @code{a} and then use it
+as a scalar.
+
+The built-in functions which take array arguments can also be used
+with subarrays. For example, the following code fragment uses @code{length()}
+(@pxref{String Functions})
+to determine the number of elements in the main array @code{a} and
+its subarrays:
+
+@example
+print length(a), length(a[1]), length(a[1][3])
+@end example
+
+@noindent
+This results in the following output for our main array @code{a}:
+
+@example
+2, 3, 1
+@end example
+
+@noindent
+The @samp{@var{subscript} in @var{array}} expression
+(@pxref{Reference to Elements}) works similarly for both
+regular @command{awk}-style
+arrays and arrays of arrays. For example, the tests @samp{1 in a},
+@samp{3 in a[1]}, and @samp{(1, "name") in a[1][3]} all evaluate to
+one (true) for our array @code{a}.
+
+The @samp{for (item in array)} statement (@pxref{Scanning an Array})
+can be nested to scan all the
+elements of an array of arrays if it is rectangular in structure. In order
+to print the contents (scalar values) of a two-dimensional array of arrays
+(i.e., in which each first-level element is itself an
+array, not necessarily of the same length)
+you could use the following code:
+
+@example
+for (i in array)
+ for (j in array[i])
+ print array[i][j]
+@end example
+
+The @code{isarray()} function (@pxref{Type Functions})
+lets you test if an array element is itself an array:
+
+@example
+for (i in array) @{
+ if (isarray(array[i]) @{
+ for (j in array[i]) @{
+ print array[i][j]
+ @}
+ @}
+ else
+ print array[i]
+@}
+@end example
+
+If the structure of a jagged array of arrays is known in advance,
+you can often devise workarounds using control statements. For example,
+the following code prints the elements of our main array @code{a}:
+
+@example
+for (i in a) @{
+ for (j in a[i]) @{
+ if (j == 3) @{
+ for (k in a[i][j])
+ print a[i][j][k]
+ @} else
+ print a[i][j]
+ @}
+@}
+@end example
+
+@noindent
+@DBXREF{Walking Arrays} for a user-defined function that ``walks'' an
+arbitrarily dimensioned array of arrays.
+
+Recall that a reference to an uninitialized array element yields a value
+of @code{""}, the null string. This has one important implication when you
+intend to use a subarray as an argument to a function, as illustrated by
+the following example:
+
+@example
+$ @kbd{gawk 'BEGIN @{ split("a b c d", b[1]); print b[1][1] @}'}
+@error{} gawk: cmd. line:1: fatal: split: second argument is not an array
+@end example
+
+The way to work around this is to first force @code{b[1]} to be an array by
+creating an arbitrary index:
+
+@example
+$ @kbd{gawk 'BEGIN @{ b[1][1] = ""; split("a b c d", b[1]); print b[1][1] @}'}
+@print{} a
+@end example
+
+@node Arrays Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Standard @command{awk} provides one-dimensional associative arrays
+(arrays indexed by string values). All arrays are associative; numeric
+indices are converted automatically to strings.
+
+@item
+Array elements are referenced as @code{@var{array}[@var{indx}]}.
+Referencing an element creates it if it did not exist previously.
+
+@item
+The proper way to see if an array has an element with a given index
+is to use the @code{in} operator: @samp{@var{indx} in @var{array}}.
+
+@item
+Use @samp{for (@var{indx} in @var{array}) @dots{}} to scan through all the
+individual elements of an array. In the body of the loop, @var{indx} takes
+on the value of each element's index in turn.
+
+@item
+The order in which a @samp{for (@var{indx} in @var{array})} loop
+traverses an array is undefined in POSIX @command{awk} and varies among
+implementations. @command{gawk} lets you control the order by assigning
+special predefined values to @code{PROCINFO["sorted_in"]}.
+
+@item
+Use @samp{delete @var{array}[@var{indx}]} to delete an individual element.
+To delete all of the elements in an array,
+use @samp{delete @var{array}}.
+This latter feature has been a common extension for many
+years and is now standard, but may not be supported by all commercial
+versions of @command{awk}.
+
+@item
+Standard @command{awk} simulates multidimensional arrays by separating
+subscript values with a comma. The values are concatenated into a
+single string, separated by the value of @code{SUBSEP}. The fact
+that such a subscript was created in this way is not retained; thus
+changing @code{SUBSEP} may have unexpected consequences. You can use
+@samp{(@var{sub1}, @var{sub2}, @dots{}) in @var{array}} to see if such
+a multidimensional subscript exists in @var{array}.
+
+@item
+@command{gawk} provides true arrays of arrays. You use a separate
+set of square brackets for each dimension in such an array:
+@code{data[row][col]}, for example. Array elements may thus be either
+scalar values (number or string) or another array.
+
+@item
+Use the @code{isarray()} built-in function to determine if an array
+element is itself a subarray.
+
+@end itemize
+
+
+@node Functions
+@chapter Functions
+
+@cindex functions, built-in
+@cindex built-in functions
+This @value{CHAPTER} describes @command{awk}'s built-in functions,
+which fall into three categories: numeric, string, and I/O.
+@command{gawk} provides additional groups of functions
+to work with values that represent time, do
+bit manipulation, sort arrays,
+provide type information, and internationalize and localize programs.
+
+Besides the built-in functions, @command{awk} has provisions for
+writing new functions that the rest of a program can use.
+The second half of this @value{CHAPTER} describes these
+@dfn{user-defined} functions.
+
+@menu
+* Built-in:: Summarizes the built-in functions.
+* User-defined:: Describes User-defined functions in detail.
+* Indirect Calls:: Choosing the function to call at runtime.
+* Functions Summary:: Summary of functions.
+@end menu
+
+@node Built-in
+@section Built-In Functions
+
+@dfn{Built-in} functions are always available for
+your @command{awk} program to call. This @value{SECTION} defines all
+the built-in
+functions in @command{awk}; some of these are mentioned in other sections
+but are summarized here for your convenience.
+
+@menu
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers, including
+ @code{int()}, @code{sin()} and @code{rand()}.
+* String Functions:: Functions for string manipulation, such as
+ @code{split()}, @code{match()} and
+ @code{sprintf()}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with timestamps.
+* Bitwise Functions:: Functions for bitwise operations.
+* Type Functions:: Functions for type information.
+* I18N Functions:: Functions for string translation.
+@end menu
+
+@node Calling Built-in
+@subsection Calling Built-In Functions
+
+To call one of @command{awk}'s built-in functions, write the name of
+the function followed
+by arguments in parentheses. For example, @samp{atan2(y + z, 1)}
+is a call to the function @code{atan2()} and has two arguments.
+
+@cindex programming conventions, functions, calling
+@cindex whitespace, functions@comma{} calling
+Whitespace is ignored between the built-in function name and the
+opening parenthesis, but nonetheless it is good practice to avoid using whitespace
+there. User-defined functions do not permit whitespace in this way, and
+it is easier to avoid mistakes by following a simple
+convention that always works---no whitespace after a function name.
+
+@cindex troubleshooting, @command{gawk}, fatal errors@comma{} function arguments
+@cindex @command{gawk}, function arguments and
+@cindex differences in @command{awk} and @command{gawk}, function arguments (@command{gawk})
+Each built-in function accepts a certain number of arguments.
+In some cases, arguments can be omitted. The defaults for omitted
+arguments vary from function to function and are described under the
+individual functions. In some @command{awk} implementations, extra
+arguments given to built-in functions are ignored. However, in @command{gawk},
+it is a fatal error to give extra arguments to a built-in function.
+
+When a function is called, expressions that create the function's actual
+parameters are evaluated completely before the call is performed.
+For example, in the following code fragment:
+
+@example
+i = 4
+j = sqrt(i++)
+@end example
+
+@cindex evaluation order, functions
+@cindex functions, built-in, evaluation order
+@cindex built-in functions, evaluation order
+@noindent
+the variable @code{i} is incremented to the value five before @code{sqrt()}
+is called with a value of four for its actual parameter.
+The order of evaluation of the expressions used for the function's
+parameters is undefined. Thus, avoid writing programs that
+assume that parameters are evaluated from left to right or from
+right to left. For example:
+
+@example
+i = 5
+j = atan2(++i, i *= 2)
+@end example
+
+If the order of evaluation is left to right, then @code{i} first becomes
+6, and then 12, and @code{atan2()} is called with the two arguments 6
+and 12. But if the order of evaluation is right to left, @code{i}
+first becomes 10, then 11, and @code{atan2()} is called with the
+two arguments 11 and 10.
+
+@node Numeric Functions
+@subsection Numeric Functions
+@cindex numeric functions
+
+The following list describes all of
+the built-in functions that work with numbers.
+Optional parameters are enclosed in square brackets@w{ ([ ]):}
+
+@c @asis for docbook
+@table @asis
+@item @code{atan2(@var{y}, @var{x})}
+@cindexawkfunc{atan2}
+@cindex arctangent
+Return the arctangent of @code{@var{y} / @var{x}} in radians.
+You can use @samp{pi = atan2(0, -1)} to retrieve the value of
+@value{PI}.
+
+@item @code{cos(@var{x})}
+@cindexawkfunc{cos}
+@cindex cosine
+Return the cosine of @var{x}, with @var{x} in radians.
+
+@item @code{div(@var{numerator}, @var{denominator}, @var{result})}
+@cindexawkfunc{div}
+@cindex div
+Perform integer division, similar to the standard C function of the
+same name. First, truncate @code{numerator} and @code{denominator}
+towards zero, creating integer values. Clear the @code{result}
+array, and then set @code{result["quotient"]} to the result of
+@samp{numerator / denominator}, truncated towards zero to an integer,
+and set @code{result["remainder"]} to the result of @samp{numerator %
+denominator}, truncated towards zero to an integer. This function is
+primarily intended for use with arbitrary length integers; it avoids
+creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary
+Precision Integers}).
+
+This function is a @code{gawk} extension. It is not available in
+compatibility mode (@pxref{Options}).
+
+@item @code{exp(@var{x})}
+@cindexawkfunc{exp}
+@cindex exponent
+Return the exponential of @var{x} (@code{e ^ @var{x}}) or report
+an error if @var{x} is out of range. The range of values @var{x} can have
+depends on your machine's floating-point representation.
+
+@item @code{int(@var{x})}
+@cindexawkfunc{int}
+@cindex round to nearest integer
+Return the nearest integer to @var{x}, located between @var{x} and zero and
+truncated toward zero.
+For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
+is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
+
+@item @code{log(@var{x})}
+@cindexawkfunc{log}
+@cindex logarithm
+Return the natural logarithm of @var{x}, if @var{x} is positive;
+otherwise, return @code{NaN} (``not a number'') on IEEE 754 systems.
+Additionally, @command{gawk} prints a warning message when @code{x}
+is negative.
+
+@item @code{rand()}
+@cindexawkfunc{rand}
+@cindex random numbers, @code{rand()}/@code{srand()} functions
+Return a random number. The values of @code{rand()} are
+uniformly distributed between zero and one.
+The value could be zero but is never one.@footnote{The C version of @code{rand()}
+on many Unix systems
+is known to produce fairly poor sequences of random numbers.
+However, nothing requires that an @command{awk} implementation use the C
+@code{rand()} to implement the @command{awk} version of @code{rand()}.
+In fact, @command{gawk} uses the BSD @code{random()} function, which is
+considerably better than @code{rand()}, to produce random numbers.}
+
+Often random integers are needed instead. Following is a user-defined function
+that can be used to obtain a random non-negative integer less than @var{n}:
+
+@example
+function randint(n)
+@{
+ return int(n * rand())
+@}
+@end example
+
+@noindent
+The multiplication produces a random number greater than zero and less
+than @code{n}. Using @code{int()}, this result is made into
+an integer between zero and @code{n} @minus{} 1, inclusive.
+
+The following example uses a similar function to produce random integers
+between one and @var{n}. This program prints a new random number for
+each input record:
+
+@example
+# Function to roll a simulated die.
+function roll(n) @{ return 1 + int(rand() * n) @}
+
+# Roll 3 six-sided dice and
+# print total number of points.
+@{
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
+@}
+@end example
+
+@cindex seeding random number generator
+@cindex random numbers, seed of
+@quotation CAUTION
+In most @command{awk} implementations, including @command{gawk},
+@code{rand()} starts generating numbers from the same
+starting number, or @dfn{seed}, each time you run @command{awk}.@footnote{@command{mawk}
+uses a different seed each time.} Thus,
+a program generates the same results each time you run it.
+The numbers are random within one @command{awk} run but predictable
+from run to run. This is convenient for debugging, but if you want
+a program to do different things each time it is used, you must change
+the seed to a value that is different in each run. To do this,
+use @code{srand()}.
+@end quotation
+
+@item @code{sin(@var{x})}
+@cindexawkfunc{sin}
+@cindex sine
+Return the sine of @var{x}, with @var{x} in radians.
+
+@item @code{sqrt(@var{x})}
+@cindexawkfunc{sqrt}
+@cindex square root
+Return the positive square root of @var{x}.
+@command{gawk} prints a warning message
+if @var{x} is negative. Thus, @code{sqrt(4)} is 2.
+
+@item @code{srand(}[@var{x}]@code{)}
+@cindexawkfunc{srand}
+Set the starting point, or seed,
+for generating random numbers to the value @var{x}.
+
+Each seed value leads to a particular sequence of random
+numbers.@footnote{Computer-generated random numbers really are not truly
+random. They are technically known as ``pseudorandom.'' This means
+that although the numbers in a sequence appear to be random, you can in
+fact generate the same sequence of random numbers over and over again.}
+Thus, if the seed is set to the same value a second time,
+the same sequence of random numbers is produced again.
+
+@quotation CAUTION
+Different @command{awk} implementations use different random-number
+generators internally. Don't expect the same @command{awk} program
+to produce the same series of random numbers when executed by
+different versions of @command{awk}.
+@end quotation
+
+If the argument @var{x} is omitted, as in @samp{srand()}, then the current
+date and time of day are used for a seed. This is the way to get random
+numbers that are truly unpredictable.
+
+The return value of @code{srand()} is the previous seed. This makes it
+easy to keep track of the seeds in case you need to consistently reproduce
+sequences of random numbers.
+
+POSIX does not specify the initial seed; it differs among @command{awk}
+implementations.
+@end table
+
+@node String Functions
+@subsection String-Manipulation Functions
+@cindex string-manipulation functions
+
+The functions in this @value{SECTION} look at or change the text of one
+or more strings.
+
+@code{gawk} understands locales (@pxref{Locales}), and does all
+string processing in terms of @emph{characters}, not @emph{bytes}.
+This distinction is particularly important to understand for locales
+where one character may be represented by multiple bytes. Thus, for
+example, @code{length()} returns the number of characters in a string,
+and not the number of bytes used to represent those characters. Similarly,
+@code{index()} works with character indices, and not byte indices.
+
+@quotation CAUTION
+A number of functions deal with indices into strings. For these
+functions, the first character of a string is at position (index) one.
+This is different from C and the languages descended from it, where the
+first character is at position zero. You need to remember this when
+doing index calculations, particularly if you are used to C.
+@end quotation
+
+In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
+Several functions perform string substitution; the full discussion is
+provided in the description of the @code{sub()} function, which comes
+toward the end, because the list is presented alphabetically.
+
+Those functions that are specific to @command{gawk} are marked with a
+pound sign (@samp{#}). They are not available in compatibility mode
+(@pxref{Options}):
+
+
+@menu
+* Gory Details:: More than you want to know about @samp{\} and
+ @samp{&} with @code{sub()}, @code{gsub()}, and
+ @code{gensub()}.
+@end menu
+
+@c @asis for docbook
+@table @asis
+@item @code{asort(}@var{source} [@code{,} @var{dest} [@code{,} @var{how} ] ]@code{) #}
+@itemx @code{asorti(}@var{source} [@code{,} @var{dest} [@code{,} @var{how} ] ]@code{) #}
+@cindexgawkfunc{asorti}
+@cindex sort array
+@cindex arrays, elements, retrieving number of
+@cindexgawkfunc{asort}
+@cindex sort array indices
+These two functions are similar in behavior, so they are described
+together.
+
+@quotation NOTE
+The following description ignores the third argument, @var{how}, as it
+requires understanding features that we have not discussed yet. Thus,
+the discussion here is a deliberate simplification. (We do provide all
+the details later on; see @DBREF{Array Sorting Functions} for the full story.)
+@end quotation
+
+Both functions return the number of elements in the array @var{source}.
+For @command{asort()}, @command{gawk} sorts the values of @var{source}
+and replaces the indices of the sorted values of @var{source} with
+sequential integers starting with one. If the optional array @var{dest}
+is specified, then @var{source} is duplicated into @var{dest}. @var{dest}
+is then sorted, leaving the indices of @var{source} unchanged.
+
+@cindex @command{gawk}, @code{IGNORECASE} variable in
+When comparing strings, @code{IGNORECASE} affects the sorting
+(@pxref{Array Sorting Functions}). If the
+@var{source} array contains subarrays as values (@pxref{Arrays of
+Arrays}), they will come last, after all scalar values.
+Subarrays are @emph{not} recursively sorted.
+
+For example, if the contents of @code{a} are as follows:
+
+@example
+a["last"] = "de"
+a["first"] = "sac"
+a["middle"] = "cul"
+@end example
+
+@noindent
+A call to @code{asort()}:
+
+@example
+asort(a)
+@end example
+
+@noindent
+results in the following contents of @code{a}:
+
+@example
+a[1] = "cul"
+a[2] = "de"
+a[3] = "sac"
+@end example
+
+The @code{asorti()} function works similarly to @code{asort()}, however,
+the @emph{indices} are sorted, instead of the values. Thus, in the
+previous example, starting with the same initial set of indices and
+values in @code{a}, calling @samp{asorti(a)} would yield:
+
+@example
+a[1] = "first"
+a[2] = "last"
+a[3] = "middle"
+@end example
+
+@item @code{gensub(@var{regexp}, @var{replacement}, @var{how}} [@code{, @var{target}}]@code{) #}
+@cindexgawkfunc{gensub}
+@cindex search and replace in strings
+@cindex substitute in string
+Search the target string @var{target} for matches of the regular
+expression @var{regexp}. If @var{how} is a string beginning with
+@samp{g} or @samp{G} (short for ``global''), then replace all matches of @var{regexp} with
+@var{replacement}. Otherwise, @var{how} is treated as a number indicating
+which match of @var{regexp} to replace. If no @var{target} is supplied,
+use @code{$0}. It returns the modified string as the result
+of the function and the original target string is @emph{not} changed.
+
+@code{gensub()} is a general substitution function. Its purpose is
+to provide more features than the standard @code{sub()} and @code{gsub()}
+functions.
+
+@code{gensub()} provides an additional feature that is not available
+in @code{sub()} or @code{gsub()}: the ability to specify components of a
+regexp in the replacement text. This is done by using parentheses in
+the regexp to mark the components and then specifying @samp{\@var{N}}
+in the replacement text, where @var{N} is a digit from 1 to 9.
+For example:
+
+@example
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
+> @kbd{a = "abc def"}
+> @kbd{b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)}
+> @kbd{print b}
+> @kbd{@}'}
+@print{} def abc
+@end example
+
+@noindent
+As with @code{sub()}, you must type two backslashes in order
+to get one into the string.
+In the replacement text, the sequence @samp{\0} represents the entire
+matched text, as does the character @samp{&}.
+
+The following example shows how you can use the third argument to control
+which match of the regexp should be changed:
+
+@example
+$ @kbd{echo a b c a b c |}
+> @kbd{gawk '@{ print gensub(/a/, "AA", 2) @}'}
+@print{} a b c AA b c
+@end example
+
+In this case, @code{$0} is the default target string.
+@code{gensub()} returns the new string as its result, which is
+passed directly to @code{print} for printing.
+
+@c @cindex automatic warnings
+@c @cindex warnings, automatic
+If the @var{how} argument is a string that does not begin with @samp{g} or
+@samp{G}, or if it is a number that is less than or equal to zero, only one
+substitution is performed. If @var{how} is zero, @command{gawk} issues
+a warning message.
+
+If @var{regexp} does not match @var{target}, @code{gensub()}'s return value
+is the original unchanged value of @var{target}.
+
+@item @code{gsub(@var{regexp}, @var{replacement}} [@code{, @var{target}}]@code{)}
+@cindexawkfunc{gsub}
+Search @var{target} for
+@emph{all} of the longest, leftmost, @emph{nonoverlapping} matching
+substrings it can find and replace them with @var{replacement}.
+The @samp{g} in @code{gsub()} stands for
+``global,'' which means replace everywhere. For example:
+
+@example
+@{ gsub(/Britain/, "United Kingdom"); print @}
+@end example
+
+@noindent
+replaces all occurrences of the string @samp{Britain} with @samp{United
+Kingdom} for all input records.
+
+The @code{gsub()} function returns the number of substitutions made. If
+the variable to search and alter (@var{target}) is
+omitted, then the entire input record (@code{$0}) is used.
+As in @code{sub()}, the characters @samp{&} and @samp{\} are special,
+and the third argument must be assignable.
+
+@item @code{index(@var{in}, @var{find})}
+@cindexawkfunc{index}
+@cindex search in string
+@cindex find substring in string
+Search the string @var{in} for the first occurrence of the string
+@var{find}, and return the position in characters where that occurrence
+begins in the string @var{in}. Consider the following example:
+
+@example
+$ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
+@print{} 3
+@end example
+
+@noindent
+If @var{find} is not found, @code{index()} returns zero.
+
+@cindex dark corner, regexp as second argument to @code{index()}
+With BWK @command{awk} and @command{gawk},
+it is a fatal error to use a regexp constant for @var{find}.
+Other implementations allow it, simply treating the regexp
+constant as an expression meaning @samp{$0 ~ /regexp/}. @value{DARKCORNER}.
+
+@item @code{length(}[@var{string}]@code{)}
+@cindexawkfunc{length}
+@cindex string length
+@cindex length of string
+Return the number of characters in @var{string}. If
+@var{string} is a number, the length of the digit string representing
+that number is returned. For example, @code{length("abcde")} is five. By
+contrast, @code{length(15 * 35)} works out to three. In this example,
+@iftex
+@math{15 @cdot 35 = 525},
+@end iftex
+@ifnottex
+@ifnotdocbook
+15 * 35 = 525,
+@end ifnotdocbook
+@end ifnottex
+@docbook
+15 &sdot; 35 = 525, @c
+@end docbook
+and 525 is then converted to the string @code{"525"}, which has
+three characters.
+
+@cindex length of input record
+@cindex input record, length of
+If no argument is supplied, @code{length()} returns the length of @code{$0}.
+
+@c @cindex historical features
+@cindex portability, @code{length()} function
+@cindex POSIX @command{awk}, functions and, @code{length()}
+@quotation NOTE
+In older versions of @command{awk}, the @code{length()} function could
+be called
+without any parentheses. Doing so is considered poor practice,
+although the 2008 POSIX standard explicitly allows it, to
+support historical practice. For programs to be maximally portable,
+always supply the parentheses.
+@end quotation
+
+@cindex dark corner, @code{length()} function
+If @code{length()} is called with a variable that has not been used,
+@command{gawk} forces the variable to be a scalar. Other
+implementations of @command{awk} leave the variable without a type.
+@value{DARKCORNER}
+Consider:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print length(x) ; x[1] = 1 @}'}
+@print{} 0
+@error{} gawk: fatal: attempt to use scalar `x' as array
+
+$ @kbd{nawk 'BEGIN @{ print length(x) ; x[1] = 1 @}'}
+@print{} 0
+@end example
+
+@noindent
+If @option{--lint} has
+been specified on the command line, @command{gawk} issues a
+warning about this.
+
+@cindex common extensions, @code{length()} applied to an array
+@cindex extensions, common@comma{} @code{length()} applied to an array
+@cindex differences between @command{gawk} and @command{awk}
+@cindex number of array elements
+@cindex array, number of elements
+With @command{gawk} and several other @command{awk} implementations, when given an
+array argument, the @code{length()} function returns the number of elements
+in the array. @value{COMMONEXT}
+This is less useful than it might seem at first, as the
+array is not guaranteed to be indexed from one to the number of elements
+in it.
+If @option{--lint} is provided on the command line
+(@pxref{Options}),
+@command{gawk} warns that passing an array argument is not portable.
+If @option{--posix} is supplied, using an array argument is a fatal error
+(@pxref{Arrays}).
+
+@item @code{match(@var{string}, @var{regexp}} [@code{, @var{array}}]@code{)}
+@cindexawkfunc{match}
+@cindex string, regular expression match
+@cindex match regexp in string
+Search @var{string} for the
+longest, leftmost substring matched by the regular expression,
+@var{regexp} and return the character position (index)
+at which that substring begins (one, if it starts at the beginning of
+@var{string}). If no match is found, return zero.
+
+The @var{regexp} argument may be either a regexp constant
+(@code{/}@dots{}@code{/}) or a string constant (@code{"}@dots{}@code{"}).
+In the latter case, the string is treated as a regexp to be matched.
+@DBXREF{Computed Regexps} for a
+discussion of the difference between the two forms, and the
+implications for writing your program correctly.
+
+The order of the first two arguments is backwards from most other string
+functions that work with regular expressions, such as
+@code{sub()} and @code{gsub()}. It might help to remember that
+for @code{match()}, the order is the same as for the @samp{~} operator:
+@samp{@var{string} ~ @var{regexp}}.
+
+@cindex @code{RSTART} variable, @code{match()} function and
+@cindex @code{RLENGTH} variable, @code{match()} function and
+@cindex @code{match()} function, @code{RSTART}/@code{RLENGTH} variables
+The @code{match()} function sets the predefined variable @code{RSTART} to
+the index. It also sets the predefined variable @code{RLENGTH} to the
+length in characters of the matched substring. If no match is found,
+@code{RSTART} is set to zero, and @code{RLENGTH} to @minus{}1.
+
+For example:
+
+@example
+@c file eg/misc/findpat.awk
+@{
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
+ @}
+@}
+@c endfile
+@end example
+
+@noindent
+This program looks for lines that match the regular expression stored in
+the variable @code{regex}. This regular expression can be changed. If the
+first word on a line is @samp{FIND}, @code{regex} is changed to be the
+second word on that line. Therefore, if given:
+
+@example
+@c file eg/misc/findpat.data
+FIND ru+n
+My program runs
+but not very quickly
+FIND Melvin
+JF+KM
+This line is property of Reality Engineering Co.
+Melvin was here.
+@c endfile
+@end example
+
+@noindent
+@command{awk} prints:
+
+@example
+Match of ru+n found at 12 in My program runs
+Match of Melvin found at 1 in Melvin was here.
+@end example
+
+@cindex differences in @command{awk} and @command{gawk}, @code{match()} function
+If @var{array} is present, it is cleared, and then the zeroth element
+of @var{array} is set to the entire portion of @var{string}
+matched by @var{regexp}. If @var{regexp} contains parentheses,
+the integer-indexed elements of @var{array} are set to contain the
+portion of @var{string} matching the corresponding parenthesized
+subexpression.
+For example:
+
+@example
+$ @kbd{echo foooobazbarrrrr |}
+> @kbd{gawk '@{ match($0, /(fo+).+(bar*)/, arr)}
+> @kbd{print arr[1], arr[2] @}'}
+@print{} foooo barrrrr
+@end example
+
+In addition,
+multidimensional subscripts are available providing
+the start index and length of each matched subexpression:
+
+@example
+$ @kbd{echo foooobazbarrrrr |}
+> @kbd{gawk '@{ match($0, /(fo+).+(bar*)/, arr)}
+> @kbd{print arr[1], arr[2]}
+> @kbd{print arr[1, "start"], arr[1, "length"]}
+> @kbd{print arr[2, "start"], arr[2, "length"]}
+> @kbd{@}'}
+@print{} foooo barrrrr
+@print{} 1 5
+@print{} 9 7
+@end example
+
+There may not be subscripts for the start and index for every parenthesized
+subexpression, because they may not all have matched text; thus they
+should be tested for with the @code{in} operator
+(@pxref{Reference to Elements}).
+
+@cindex troubleshooting, @code{match()} function
+The @var{array} argument to @code{match()} is a
+@command{gawk} extension. In compatibility mode
+(@pxref{Options}),
+using a third argument is a fatal error.
+
+@item @code{patsplit(@var{string}, @var{array}} [@code{, @var{fieldpat}} [@code{, @var{seps}} ] ]@code{) #}
+@cindexgawkfunc{patsplit}
+@cindex split string into array
+Divide
+@var{string} into pieces defined by @var{fieldpat}
+and store the pieces in @var{array} and the separator strings in the
+@var{seps} array. The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth. The third argument, @var{fieldpat}, is
+a regexp describing the fields in @var{string} (just as @code{FPAT} is
+a regexp describing the fields in input records).
+It may be either a regexp constant or a string.
+If @var{fieldpat} is omitted, the value of @code{FPAT} is used.
+@code{patsplit()} returns the number of elements created.
+@code{@var{seps}[@var{i}]} is
+the separator string
+between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}.
+Any leading separator will be in @code{@var{seps}[0]}.
+
+The @code{patsplit()} function splits strings into pieces in a
+manner similar to the way input lines are split into fields using @code{FPAT}
+(@pxref{Splitting By Content}).
+
+Before splitting the string, @code{patsplit()} deletes any previously existing
+elements in the arrays @var{array} and @var{seps}.
+
+@item @code{split(@var{string}, @var{array}} [@code{, @var{fieldsep}} [@code{, @var{seps}} ] ]@code{)}
+@cindexawkfunc{split}
+Divide @var{string} into pieces separated by @var{fieldsep}
+and store the pieces in @var{array} and the separator strings in the
+@var{seps} array. The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth. The string value of the third argument, @var{fieldsep}, is
+a regexp describing where to split @var{string} (much as @code{FS} can
+be a regexp describing where to split input records).
+If @var{fieldsep} is omitted, the value of @code{FS} is used.
+@code{split()} returns the number of elements created.
+@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]}
+being the separator string
+between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}.
+If @var{fieldsep} is a single
+space then any leading whitespace goes into @code{@var{seps}[0]} and
+any trailing
+whitespace goes into @code{@var{seps}[@var{n}]} where @var{n} is the
+return value of
+@code{split()} (i.e., the number of elements in @var{array}).
+
+The @code{split()} function splits strings into pieces in a
+manner similar to the way input lines are split into fields. For example:
+
+@example
+split("cul-de-sac", a, "-", seps)
+@end example
+
+@noindent
+@cindex strings splitting, example
+splits the string @samp{cul-de-sac} into three fields using @samp{-} as the
+separator. It sets the contents of the array @code{a} as follows:
+
+@example
+a[1] = "cul"
+a[2] = "de"
+a[3] = "sac"
+@end example
+
+and sets the contents of the array @code{seps} as follows:
+
+@example
+seps[1] = "-"
+seps[2] = "-"
+@end example
+
+@noindent
+The value returned by this call to @code{split()} is three.
+
+@cindex differences in @command{awk} and @command{gawk}, @code{split()} function
+As with input field-splitting, when the value of @var{fieldsep} is
+@w{@code{" "}}, leading and trailing whitespace is ignored in values assigned to
+the elements of
+@var{array} but not in @var{seps}, and the elements
+are separated by runs of whitespace.
+Also, as with input field-splitting, if @var{fieldsep} is the null string, each
+individual character in the string is split into its own array element.
+@value{COMMONEXT}
+
+Note, however, that @code{RS} has no effect on the way @code{split()}
+works. Even though @samp{RS = ""} causes newline to also be an input
+field separator, this does not affect how @code{split()} splits strings.
+
+@cindex dark corner, @code{split()} function
+Modern implementations of @command{awk}, including @command{gawk}, allow
+the third argument to be a regexp constant (@code{/abc/}) as well as a
+string.
+@value{DARKCORNER}
+The POSIX standard allows this as well.
+@DBXREF{Computed Regexps} for a
+discussion of the difference between using a string constant or a regexp constant,
+and the implications for writing your program correctly.
+
+Before splitting the string, @code{split()} deletes any previously existing
+elements in the arrays @var{array} and @var{seps}.
+
+If @var{string} is null, the array has no elements. (So this is a portable
+way to delete an entire array with one statement.
+@xref{Delete}.)
+
+If @var{string} does not match @var{fieldsep} at all (but is not null),
+@var{array} has one element only. The value of that element is the original
+@var{string}.
+
+In POSIX mode (@pxref{Options}), the fourth argument is not allowed.
+
+@item @code{sprintf(@var{format}, @var{expression1}, @dots{})}
+@cindexawkfunc{sprintf}
+@cindex formatting strings
+Return (without printing) the string that @code{printf} would
+have printed out with the same arguments
+(@pxref{Printf}).
+For example:
+
+@example
+pival = sprintf("pi = %.2f (approx.)", 22/7)
+@end example
+
+@noindent
+assigns the string @w{@samp{pi = 3.14 (approx.)}} to the variable @code{pival}.
+
+@cindexgawkfunc{strtonum}
+@cindex convert string to number
+@item @code{strtonum(@var{str}) #}
+Examine @var{str} and return its numeric value. If @var{str}
+begins with a leading @samp{0}, @code{strtonum()} assumes that @var{str}
+is an octal number. If @var{str} begins with a leading @samp{0x} or
+@samp{0X}, @code{strtonum()} assumes that @var{str} is a hexadecimal number.
+For example:
+
+@example
+$ @kbd{echo 0x11 |}
+> @kbd{gawk '@{ printf "%d\n", strtonum($1) @}'}
+@print{} 17
+@end example
+
+Using the @code{strtonum()} function is @emph{not} the same as adding zero
+to a string value; the automatic coercion of strings to numbers
+works only for decimal data, not for octal or hexadecimal.@footnote{Unless
+you use the @option{--non-decimal-data} option, which isn't recommended.
+@DBXREF{Nondecimal Data} for more information.}
+
+Note also that @code{strtonum()} uses the current locale's decimal point
+for recognizing numbers (@pxref{Locales}).
+
+@item @code{sub(@var{regexp}, @var{replacement}} [@code{, @var{target}}]@code{)}
+@cindexawkfunc{sub}
+@cindex replace in string
+Search @var{target}, which is treated as a string, for the
+leftmost, longest substring matched by the regular expression @var{regexp}.
+Modify the entire string
+by replacing the matched text with @var{replacement}.
+The modified string becomes the new value of @var{target}.
+Return the number of substitutions made (zero or one).
+
+The @var{regexp} argument may be either a regexp constant
+(@code{/}@dots{}@code{/}) or a string constant (@code{"}@dots{}@code{"}).
+In the latter case, the string is treated as a regexp to be matched.
+@DBXREF{Computed Regexps} for a
+discussion of the difference between the two forms, and the
+implications for writing your program correctly.
+
+This function is peculiar because @var{target} is not simply
+used to compute a value, and not just any expression will do---it
+must be a variable, field, or array element so that @code{sub()} can
+store a modified value there. If this argument is omitted, then the
+default is to use and alter @code{$0}.@footnote{Note that this means
+that the record will first be regenerated using the value of @code{OFS} if
+any fields have been changed, and that the fields will be updated
+after the substitution, even if the operation is a ``no-op'' such
+as @samp{sub(/^/, "")}.}
+For example:
+
+@example
+str = "water, water, everywhere"
+sub(/at/, "ith", str)
+@end example
+
+@noindent
+sets @code{str} to @w{@samp{wither, water, everywhere}}, by replacing the
+leftmost longest occurrence of @samp{at} with @samp{ith}.
+
+If the special character @samp{&} appears in @var{replacement}, it
+stands for the precise substring that was matched by @var{regexp}. (If
+the regexp can match more than one string, then this precise substring
+may vary.) For example:
+
+@example
+@{ sub(/candidate/, "& and his wife"); print @}
+@end example
+
+@noindent
+changes the first occurrence of @samp{candidate} to @samp{candidate
+and his wife} on each input line.
+Here is another example:
+
+@example
+$ @kbd{awk 'BEGIN @{}
+> @kbd{str = "daabaaa"}
+> @kbd{sub(/a+/, "C&C", str)}
+> @kbd{print str}
+> @kbd{@}'}
+@print{} dCaaCbaaa
+@end example
+
+@noindent
+This shows how @samp{&} can represent a nonconstant string and also
+illustrates the ``leftmost, longest'' rule in regexp matching
+(@pxref{Leftmost Longest}).
+
+The effect of this special character (@samp{&}) can be turned off by putting a
+backslash before it in the string. As usual, to insert one backslash in
+the string, you must write two backslashes. Therefore, write @samp{\\&}
+in a string constant to include a literal @samp{&} in the replacement.
+For example, the following shows how to replace the first @samp{|} on each line with
+an @samp{&}:
+
+@example
+@{ sub(/\|/, "\\&"); print @}
+@end example
+
+@cindex @code{sub()} function, arguments of
+@cindex @code{gsub()} function, arguments of
+As mentioned, the third argument to @code{sub()} must
+be a variable, field or array element.
+Some versions of @command{awk} allow the third argument to
+be an expression that is not an lvalue. In such a case, @code{sub()}
+still searches for the pattern and returns zero or one, but the result of
+the substitution (if any) is thrown away because there is no place
+to put it. Such versions of @command{awk} accept expressions
+like the following:
+
+@example
+sub(/USA/, "United States", "the USA and Canada")
+@end example
+
+@noindent
+@cindex troubleshooting, @code{gsub()}/@code{sub()} functions
+For historical compatibility, @command{gawk} accepts such erroneous code.
+However, using any other nonchangeable
+object as the third parameter causes a fatal error and your program
+will not run.
+
+Finally, if the @var{regexp} is not a regexp constant, it is converted into a
+string, and then the value of that string is treated as the regexp to match.
+
+@item @code{substr(@var{string}, @var{start}} [@code{, @var{length}} ]@code{)}
+@cindexawkfunc{substr}
+@cindex substring
+Return a @var{length}-character-long substring of @var{string},
+starting at character number @var{start}. The first character of a
+string is character number one.@footnote{This is different from
+C and C++, in which the first character is number zero.}
+For example, @code{substr("washington", 5, 3)} returns @code{"ing"}.
+
+If @var{length} is not present, @code{substr()} returns the whole suffix of
+@var{string} that begins at character number @var{start}. For example,
+@code{substr("washington", 5)} returns @code{"ington"}. The whole
+suffix is also returned
+if @var{length} is greater than the number of characters remaining
+in the string, counting from character @var{start}.
+
+@cindex Brian Kernighan's @command{awk}
+If @var{start} is less than one, @code{substr()} treats it as
+if it was one. (POSIX doesn't specify what to do in this case:
+BWK @command{awk} acts this way, and therefore @command{gawk}
+does too.)
+If @var{start} is greater than the number of characters
+in the string, @code{substr()} returns the null string.
+Similarly, if @var{length} is present but less than or equal to zero,
+the null string is returned.
+
+@cindex troubleshooting, @code{substr()} function
+The string returned by @code{substr()} @emph{cannot} be
+assigned. Thus, it is a mistake to attempt to change a portion of
+a string, as shown in the following example:
+
+@example
+string = "abcdef"
+# try to get "abCDEf", won't work
+substr(string, 3, 3) = "CDE"
+@end example
+
+@noindent
+It is also a mistake to use @code{substr()} as the third argument
+of @code{sub()} or @code{gsub()}:
+
+@example
+gsub(/xyz/, "pdq", substr($0, 5, 20)) # WRONG
+@end example
+
+@cindex portability, @code{substr()} function
+(Some commercial versions of @command{awk} treat
+@code{substr()} as assignable, but doing so is not portable.)
+
+If you need to replace bits and pieces of a string, combine @code{substr()}
+with string concatenation, in the following manner:
+
+@example
+string = "abcdef"
+@dots{}
+string = substr(string, 1, 2) "CDE" substr(string, 6)
+@end example
+
+@cindex case sensitivity, converting case
+@cindex strings, converting letter case
+@item @code{tolower(@var{string})}
+@cindexawkfunc{tolower}
+@cindex convert string to lower case
+Return a copy of @var{string}, with each uppercase character
+in the string replaced with its corresponding lowercase character.
+Nonalphabetic characters are left unchanged. For example,
+@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
+
+@item @code{toupper(@var{string})}
+@cindexawkfunc{toupper}
+@cindex convert string to upper case
+Return a copy of @var{string}, with each lowercase character
+in the string replaced with its corresponding uppercase character.
+Nonalphabetic characters are left unchanged. For example,
+@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
+@end table
+
+@sidebar Matching the Null String
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+@end sidebar
+
+
+@node Gory Details
+@subsubsection More about @samp{\} and @samp{&} with @code{sub()}, @code{gsub()}, and @code{gensub()}
+
+@cindex escape processing, @code{gsub()}/@code{gensub()}/@code{sub()} functions
+@cindex @code{sub()} function, escape processing
+@cindex @code{gsub()} function, escape processing
+@cindex @code{gensub()} function (@command{gawk}), escape processing
+@cindex @code{\} (backslash), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+@cindex backslash (@code{\}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+@cindex @code{&} (ampersand), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+
+@quotation CAUTION
+This subsubsection has been reported to cause headaches.
+You might want to skip it upon first reading.
+@end quotation
+
+When using @code{sub()}, @code{gsub()}, or @code{gensub()}, and trying to get literal
+backslashes and ampersands into the replacement text, you need to remember
+that there are several levels of @dfn{escape processing} going on.
+
+First, there is the @dfn{lexical} level, which is when @command{awk} reads
+your program
+and builds an internal copy of it to execute.
+Then there is the runtime level, which is when @command{awk} actually scans the
+replacement string to determine what to generate.
+
+@cindex Brian Kernighan's @command{awk}
+At both levels, @command{awk} looks for a defined set of characters that
+can come after a backslash. At the lexical level, it looks for the
+escape sequences listed in @ref{Escape Sequences}.
+Thus, for every @samp{\} that @command{awk} processes at the runtime
+level, you must type two backslashes at the lexical level.
+When a character that is not valid for an escape sequence follows the
+@samp{\}, BWK @command{awk} and @command{gawk} both simply remove the initial
+@samp{\} and put the next character into the string. Thus, for
+example, @code{"a\qb"} is treated as @code{"aqb"}.
+
+At the runtime level, the various functions handle sequences of
+@samp{\} and @samp{&} differently. The situation is (sadly) somewhat complex.
+Historically, the @code{sub()} and @code{gsub()} functions treated the two
+character sequence @samp{\&} specially; this sequence was replaced in
+the generated text with a single @samp{&}. Any other @samp{\} within
+the @var{replacement} string that did not precede an @samp{&} was passed
+through unchanged. This is illustrated in @ref{table-sub-escapes}.
+
+@c Thank to Karl Berry for help with the TeX stuff.
+@float Table,table-sub-escapes
+@caption{Historical escape sequence processing for @code{sub()} and @code{gsub()}}
+@tex
+\vbox{\bigskip
+% We need more characters for escape and tab ...
+\catcode`_ = 0
+\catcode`! = 4
+% ... since this table has lots of &'s and \'s, so we unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
+ You type!@code{sub()} sees!@code{sub()} generates_cr
+_hrulefill!_hrulefill!_hrulefill_cr
+ @code{\&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+ @code{\\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
+}
+_bigskip}
+@end tex
+@ifdocbook
+@multitable @columnfractions .20 .20 .60
+@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
+@item @code{\&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
+@end multitable
+@end ifdocbook
+@ifnottex
+@ifnotdocbook
+@display
+ You type @code{sub()} sees @code{sub()} generates
+ -------- ---------- ---------------
+ @code{\&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\&} @code{\&} A literal @samp{&}
+ @code{\\\\&} @code{\\&} A literal @samp{\&}
+ @code{\\\\\&} @code{\\&} A literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\\&}
+ @code{\\q} @code{\q} A literal @samp{\q}
+@end display
+@end ifnotdocbook
+@end ifnottex
+@end float
+
+@noindent
+This table shows both the lexical-level processing, where
+an odd number of backslashes becomes an even number at the runtime level,
+as well as the runtime processing done by @code{sub()}.
+(For the sake of simplicity, the rest of the following tables only show the
+case of even numbers of backslashes entered at the lexical level.)
+
+The problem with the historical approach is that there is no way to get
+a literal @samp{\} followed by the matched text.
+
+Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in time.
+
+At one point, the @command{gawk} maintainer submitted
+proposed text for a revised standard that
+reverts to rules that correspond more closely to the original existing
+practice. The proposed rules have special cases that make it possible
+to produce a @samp{\} preceding the matched text.
+This is shown in
+@ref{table-sub-proposed}.
+
+@float Table,table-sub-proposed
+@caption{GNU @command{awk} rules for @code{sub()} and backslash}
+@tex
+\vbox{\bigskip
+% We need more characters for escape and tab ...
+\catcode`_ = 0
+\catcode`! = 4
+% ... since this table has lots of &'s and \'s, so we unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
+ You type!@code{sub()} sees!@code{sub()} generates_cr
+_hrulefill!_hrulefill!_hrulefill_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
+ @code{\\\\}! @code{\\}!@code{\\}_cr
+}
+_bigskip}
+@end tex
+@ifdocbook
+@multitable @columnfractions .20 .20 .60
+@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
+@item @code{\\\\} @tab @code{\\} @tab @code{\\}
+@end multitable
+@end ifdocbook
+@ifnottex
+@ifnotdocbook
+@display
+ You type @code{sub()} sees @code{sub()} generates
+ -------- ---------- ---------------
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
+ @code{\\\\} @code{\\} @code{\\}
+@end display
+@end ifnotdocbook
+@end ifnottex
+@end float
+
+In a nutshell, at the runtime level, there are now three special sequences
+of characters (@samp{\\\&}, @samp{\\&} and @samp{\&}) whereas historically
+there was only one. However, as in the historical case, any @samp{\} that
+is not part of one of these three sequences is not special and appears
+in the output literally.
+
+@command{gawk} 3.0 and 3.1 follow these rules for @code{sub()} and
+@code{gsub()}. The POSIX standard took much longer to be revised than
+was expected. In addition, the @command{gawk} maintainer's proposal was
+lost during the standardization process. The final rules are
+somewhat simpler. The results are similar except for one case.
+
+@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
+The POSIX rules state that @samp{\&} in the replacement string produces
+a literal @samp{&}, @samp{\\} produces a literal @samp{\}, and @samp{\} followed
+by anything else is not special; the @samp{\} is placed straight into the output.
+These rules are presented in @ref{table-posix-sub}.
+
+@float Table,table-posix-sub
+@caption{POSIX rules for @code{sub()} and @code{gsub()}}
+@tex
+\vbox{\bigskip
+% We need more characters for escape and tab ...
+\catcode`_ = 0
+\catcode`! = 4
+% ... since this table has lots of &'s and \'s, so we unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
+ You type!@code{sub()} sees!@code{sub()} generates_cr
+_hrulefill!_hrulefill!_hrulefill_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
+ @code{\\\\}! @code{\\}!@code{\}_cr
+}
+_bigskip}
+@end tex
+@ifdocbook
+@multitable @columnfractions .20 .20 .60
+@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
+@item @code{\\\\} @tab @code{\\} @tab @code{\}
+@end multitable
+@end ifdocbook
+@ifnottex
+@ifnotdocbook
+@display
+ You type @code{sub()} sees @code{sub()} generates
+ -------- ---------- ---------------
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
+ @code{\\\\} @code{\\} @code{\}
+@end display
+@end ifnotdocbook
+@end ifnottex
+@end float
+
+The only case where the difference is noticeable is the last one: @samp{\\\\}
+is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}.
+
+Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules
+when @option{--posix} is specified (@pxref{Options}). Otherwise,
+it continued to follow the proposed rules, as
+that had been its behavior for many years.
+
+When @value{PVERSION} 4.0.0 was released, the @command{gawk} maintainer
+made the POSIX rules the default, breaking well over a decade's worth
+of backward compatibility.@footnote{This was rather naive of him, despite
+there being a note in this section indicating that the next major version
+would move to the POSIX rules.} Needless to say, this was a bad idea,
+and as of @value{PVERSION} 4.0.1, @command{gawk} resumed its historical
+behavior, and only follows the POSIX rules when @option{--posix} is given.
+
+The rules for @code{gensub()} are considerably simpler. At the runtime
+level, whenever @command{gawk} sees a @samp{\}, if the following character
+is a digit, then the text that matched the corresponding parenthesized
+subexpression is placed in the generated output. Otherwise,
+no matter what character follows the @samp{\}, it
+appears in the generated text and the @samp{\} does not,
+as shown in @ref{table-gensub-escapes}.
+
+@float Table,table-gensub-escapes
+@caption{Escape sequence processing for @code{gensub()}}
+@tex
+\vbox{\bigskip
+% We need more characters for escape and tab ...
+\catcode`_ = 0
+\catcode`! = 4
+% ... since this table has lots of &'s and \'s, so we unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
+ You type!@code{gensub()} sees!@code{gensub()} generates_cr
+_hrulefill!_hrulefill!_hrulefill_cr
+ @code{&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\}! @code{\\}!A literal @samp{\}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\}, then the matched text_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{q}_cr
+}
+_bigskip}
+@end tex
+@ifdocbook
+@multitable @columnfractions .20 .20 .60
+@headitem You type @tab @code{gensub()} sees @tab @code{gensub()} generates
+@item @code{&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\} @tab @code{\\} @tab A literal @samp{\}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, then the matched text
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{q}
+@end multitable
+@end ifdocbook
+@ifnottex
+@ifnotdocbook
+@display
+ You type @code{gensub()} sees @code{gensub()} generates
+ -------- ------------- ------------------
+ @code{&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\\} @code{\\} A literal @samp{\}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\q} @code{\q} A literal @samp{q}
+@end display
+@end ifnotdocbook
+@end ifnottex
+@end float
+
+Because of the complexity of the lexical and runtime level processing
+and the special cases for @code{sub()} and @code{gsub()},
+we recommend the use of @command{gawk} and @code{gensub()} when you have
+to do substitutions.
+
+@node I/O Functions
+@subsection Input/Output Functions
+@cindex input/output functions
+
+The following functions relate to input/output (I/O).
+Optional parameters are enclosed in square brackets ([ ]):
+
+@table @asis
+@item @code{close(}@var{filename} [@code{,} @var{how}]@code{)}
+@cindexawkfunc{close}
+@cindex files, closing
+@cindex close file or coprocess
+Close the file @var{filename} for input or output. Alternatively, the
+argument may be a shell command that was used for creating a coprocess, or
+for redirecting to or from a pipe; then the coprocess or pipe is closed.
+@DBXREF{Close Files And Pipes}
+for more information.
+
+When closing a coprocess, it is occasionally useful to first close
+one end of the two-way pipe and then to close the other. This is done
+by providing a second argument to @code{close()}. This second argument
+should be one of the two string values @code{"to"} or @code{"from"},
+indicating which end of the pipe to close. Case in the string does
+not matter.
+@xref{Two-way I/O},
+which discusses this feature in more detail and gives an example.
+
+Note that the second argument to @code{close()} is a @command{gawk}
+extension; it is not available in compatibility mode (@pxref{Options}).
+
+@item @code{fflush(}[@var{filename}]@code{)}
+@cindexawkfunc{fflush}
+@cindex flush buffered output
+Flush any buffered output associated with @var{filename}, which is either a
+file opened for writing or a shell command for redirecting output to
+a pipe or coprocess.
+
+@cindex buffers, flushing
+@cindex output, buffering
+Many utility programs @dfn{buffer} their output (i.e., they save information
+to write to a disk file or the screen in memory until there is enough
+for it to be worthwhile to send the data to the output device).
+This is often more efficient than writing
+every little bit of information as soon as it is ready. However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers (i.e.,
+write the information to its destination, even if a buffer is not full).
+This is the purpose of the @code{fflush()} function---@command{gawk} also
+buffers its output and the @code{fflush()} function forces
+@command{gawk} to flush its buffers.
+
+@cindex extensions, common@comma{} @code{fflush()} function
+@cindex Brian Kernighan's @command{awk}
+Brian Kernighan added @code{fflush()} to his @command{awk} in April
+1992. For two decades, it was a common extension. In December
+2012, it was accepted for inclusion into the POSIX standard.
+See @uref{http://austingroupbugs.net/view.php?id=634, the Austin Group website}.
+
+POSIX standardizes @code{fflush()} as follows: if there
+is no argument, or if the argument is the null string (@w{@code{""}}),
+then @command{awk} flushes the buffers for @emph{all} open output files
+and pipes.
+
+@quotation NOTE
+Prior to @value{PVERSION} 4.0.2, @command{gawk}
+would flush only the standard output if there was no argument,
+and flush all output files and pipes if the argument was the null
+string. This was changed in order to be compatible with Brian
+Kernighan's @command{awk}, in the hope that standardizing this
+feature in POSIX would then be easier (which indeed helped).
+
+With @command{gawk},
+you can use @samp{fflush("/dev/stdout")} if you wish to flush
+only the standard output.
+@end quotation
+
+@c @cindex automatic warnings
+@c @cindex warnings, automatic
+@cindex troubleshooting, @code{fflush()} function
+@code{fflush()} returns zero if the buffer is successfully flushed;
+otherwise, it returns non-zero. (@command{gawk} returns @minus{}1.)
+In the case where all buffers are flushed, the return value is zero
+only if all buffers were flushed successfully. Otherwise, it is
+@minus{}1, and @command{gawk} warns about the problem @var{filename}.
+
+@command{gawk} also issues a warning message if you attempt to flush
+a file or pipe that was opened for reading (such as with @code{getline}),
+or if @var{filename} is not an open file, pipe, or coprocess.
+In such a case, @code{fflush()} returns @minus{}1, as well.
+
+@sidebar Interactive Versus Noninteractive Buffering
+@cindex buffering, interactive vs.@: noninteractive
+
+As a side point, buffering issues can be even more confusing, depending
+upon whether your program is @dfn{interactive} (i.e., communicating
+with a user sitting at a keyboard).@footnote{A program is interactive
+if the standard output is connected to a terminal device. On modern
+systems, this means your keyboard and screen.}
+
+@c Thanks to Walter.Mecky@dresdnerbank.de for this example, and for
+@c motivating me to write this section.
+Interactive programs generally @dfn{line buffer} their output (i.e., they
+write out every line). Noninteractive programs wait until they have
+a full buffer, which may be many lines of output.
+Here is an example of the difference:
+
+@example
+$ @kbd{awk '@{ print $1 + $2 @}'}
+@kbd{1 1}
+@print{} 2
+@kbd{2 3}
+@print{} 5
+@kbd{Ctrl-d}
+@end example
+
+@noindent
+Each line of output is printed immediately. Compare that behavior
+with this example:
+
+@example
+$ @kbd{awk '@{ print $1 + $2 @}' | cat}
+@kbd{1 1}
+@kbd{2 3}
+@kbd{Ctrl-d}
+@print{} 2
+@print{} 5
+@end example
+
+@noindent
+Here, no output is printed until after the @kbd{Ctrl-d} is typed, because
+it is all buffered and sent down the pipe to @command{cat} in one shot.
+@end sidebar
+
+@item @code{system(@var{command})}
+@cindexawkfunc{system}
+@cindex invoke shell command
+@cindex interacting with other programs
+Execute the operating-system
+command @var{command} and then return to the @command{awk} program.
+Return @var{command}'s exit status.
+
+For example, if the following fragment of code is put in your @command{awk}
+program:
+
+@example
+END @{
+ system("date | mail -s 'awk run done' root")
+@}
+@end example
+
+@noindent
+the system administrator is sent mail when the @command{awk} program
+finishes processing input and begins its end-of-input processing.
+
+Note that redirecting @code{print} or @code{printf} into a pipe is often
+enough to accomplish your task. If you need to run many commands, it
+is more efficient to simply print them down a pipeline to the shell:
+
+@example
+while (@var{more stuff to do})
+ print @var{command} | "/bin/sh"
+close("/bin/sh")
+@end example
+
+@noindent
+@cindex troubleshooting, @code{system()} function
+@cindex @option{--sandbox} option, disabling @code{system()} function
+However, if your @command{awk}
+program is interactive, @code{system()} is useful for running large
+self-contained programs, such as a shell or an editor.
+Some operating systems cannot implement the @code{system()} function.
+@code{system()} causes a fatal error if it is not supported.
+
+@quotation NOTE
+When @option{--sandbox} is specified, the @code{system()} function is disabled
+(@pxref{Options}).
+@end quotation
+
+@end table
+
+@sidebar Controlling Output Buffering with @code{system()}
+@cindex buffers, flushing
+@cindex buffering, input/output
+@cindex output, buffering
+
+The @code{fflush()} function provides explicit control over output buffering for
+individual files and pipes. However, its use is not portable to many older
+@command{awk} implementations. An alternative method to flush output
+buffers is to call @code{system()} with a null string as its argument:
+
+@example
+system("") # flush output
+@end example
+
+@noindent
+@command{gawk} treats this use of the @code{system()} function as a special
+case and is smart enough not to run a shell (or other command
+interpreter) with the empty command. Therefore, with @command{gawk}, this
+idiom is not only useful, it is also efficient. Although this method should work
+with other @command{awk} implementations, it does not necessarily avoid
+starting an unnecessary shell. (Other implementations may only
+flush the buffer associated with the standard output and not necessarily
+all buffered output.)
+
+If you think about what a programmer expects, it makes sense that
+@code{system()} should flush any pending output. The following program:
+
+@example
+BEGIN @{
+ print "first print"
+ system("echo system echo")
+ print "second print"
+@}
+@end example
+
+@noindent
+must print:
+
+@example
+first print
+system echo
+second print
+@end example
+
+@noindent
+and not:
+
+@example
+system echo
+first print
+second print
+@end example
+
+If @command{awk} did not flush its buffers before calling @code{system()},
+you would see the latter (undesirable) output.
+@end sidebar
+
+@node Time Functions
+@subsection Time Functions
+@cindex time functions
+
+@cindex timestamps
+@cindex log files, timestamps in
+@cindex files, log@comma{} timestamps in
+@cindex @command{gawk}, timestamps
+@cindex POSIX @command{awk}, timestamps and
+@code{awk} programs are commonly used to process log files
+containing timestamp information, indicating when a
+particular log record was written. Many programs log their timestamp
+in the form returned by the @code{time()} system call, which is the
+number of seconds since a particular epoch. On POSIX-compliant systems,
+it is the number of seconds since
+1970-01-01 00:00:00 UTC, not counting leap
+@ifclear FOR_PRINT
+seconds.@footnote{@xref{Glossary}, especially the entries ``Epoch'' and ``UTC.''}
+@end ifclear
+@ifset FOR_PRINT
+seconds.
+@end ifset
+All known POSIX-compliant systems support timestamps from 0 through
+@iftex
+@math{2^{31} - 1},
+@end iftex
+@ifnottex
+@ifnotdocbook
+2^31 - 1,
+@end ifnotdocbook
+@end ifnottex
+@docbook
+2<superscript>31</superscript> &minus; 1, @c
+@end docbook
+which is sufficient to represent times through
+2038-01-19 03:14:07 UTC. Many systems support a wider range of timestamps,
+including negative timestamps that represent times before the
+epoch.
+
+@cindex @command{date} utility, GNU
+@cindex time, retrieving
+In order to make it easier to process such log files and to produce
+useful reports, @command{gawk} provides the following functions for
+working with timestamps. They are @command{gawk} extensions; they are
+not specified in the POSIX standard.@footnote{The GNU @command{date} utility can
+also do many of the things described here. Its use may be preferable
+for simple time-related operations in shell scripts.}
+However, recent versions
+of @command{mawk} (@pxref{Other Versions}) also support these functions.
+Optional parameters are enclosed in square brackets ([ ]):
+
+@c @asis for docbook
+@table @asis
+@item @code{mktime(@var{datespec})}
+@cindexgawkfunc{mktime}
+@cindex generate time values
+Turn @var{datespec} into a timestamp in the same form
+as is returned by @code{systime()}. It is similar to the function of the
+same name in ISO C. The argument, @var{datespec}, is a string of the form
+@w{@code{"@var{YYYY} @var{MM} @var{DD} @var{HH} @var{MM} @var{SS} [@var{DST}]"}}.
+The string consists of six or seven numbers representing, respectively,
+the full year including century, the month from 1 to 12, the day of the month
+from 1 to 31, the hour of the day from 0 to 23, the minute from 0 to
+59, the second from 0 to 60,@footnote{Occasionally there are
+minutes in a year with a leap second, which is why the
+seconds can go up to 60.}
+and an optional daylight-savings flag.
+
+The values of these numbers need not be within the ranges specified;
+for example, an hour of @minus{}1 means 1 hour before midnight.
+The origin-zero Gregorian calendar is assumed, with year 0 preceding
+year 1 and year @minus{}1 preceding year 0.
+The time is assumed to be in the local timezone.
+If the daylight-savings flag is positive, the time is assumed to be
+daylight savings time; if zero, the time is assumed to be standard
+time; and if negative (the default), @code{mktime()} attempts to determine
+whether daylight savings time is in effect for the specified time.
+
+If @var{datespec} does not contain enough elements or if the resulting time
+is out of range, @code{mktime()} returns @minus{}1.
+
+@cindex @command{gawk}, @code{PROCINFO} array in
+@cindex @code{PROCINFO} array
+@item @code{strftime(}[@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
+@cindexgawkfunc{strftime}
+@cindex format time string
+Format the time specified by @var{timestamp}
+based on the contents of the @var{format} string and return the result.
+It is similar to the function of the same name in ISO C.
+If @var{utc-flag} is present and is either nonzero or non-null, the value
+is formatted as UTC (Coordinated Universal Time, formerly GMT or Greenwich
+Mean Time). Otherwise, the value is formatted for the local time zone.
+The @var{timestamp} is in the same format as the value returned by the
+@code{systime()} function. If no @var{timestamp} argument is supplied,
+@command{gawk} uses the current time of day as the timestamp.
+Without a @var{format} argument, @code{strftime()} uses
+the value of @code{PROCINFO["strftime"]} as the format string
+(@pxref{Built-in Variables}).
+The default string value is
+@code{@w{"%a %b %e %H:%M:%S %Z %Y"}}. This format string produces
+output that is equivalent to that of the @command{date} utility.
+You can assign a new value to @code{PROCINFO["strftime"]} to
+change the default format; see the following list for the various format directives.
+
+@item @code{systime()}
+@cindexgawkfunc{systime}
+@cindex timestamps
+@cindex current system time
+Return the current time as the number of seconds since
+the system epoch. On POSIX systems, this is the number of seconds
+since 1970-01-01 00:00:00 UTC, not counting leap seconds.
+It may be a different number on other systems.
+@end table
+
+The @code{systime()} function allows you to compare a timestamp from a
+log file with the current time of day. In particular, it is easy to
+determine how long ago a particular record was logged. It also allows
+you to produce log records using the ``seconds since the epoch'' format.
+
+@cindex converting, dates to timestamps
+@cindex dates, converting to timestamps
+@cindex timestamps, converting dates to
+The @code{mktime()} function allows you to convert a textual representation
+of a date and time into a timestamp. This makes it easy to do before/after
+comparisons of dates and times, particularly when dealing with date and
+time data coming from an external source, such as a log file.
+
+The @code{strftime()} function allows you to easily turn a timestamp
+into human-readable information. It is similar in nature to the @code{sprintf()}
+function
+(@pxref{String Functions}),
+in that it copies nonformat specification characters verbatim to the
+returned string, while substituting date and time values for format
+specifications in the @var{format} string.
+
+@cindex format specifiers, @code{strftime()} function (@command{gawk})
+@code{strftime()} is guaranteed by the 1999 ISO C
+standard@footnote{Unfortunately,
+not every system's @code{strftime()} necessarily
+supports all of the conversions listed here.}
+to support the following date format specifications:
+
+@table @code
+@item %a
+The locale's abbreviated weekday name.
+
+@item %A
+The locale's full weekday name.
+
+@item %b
+The locale's abbreviated month name.
+
+@item %B
+The locale's full month name.
+
+@item %c
+The locale's ``appropriate'' date and time representation.
+(This is @samp{%A %B %d %T %Y} in the @code{"C"} locale.)
+
+@item %C
+The century part of the current year.
+This is the year divided by 100 and truncated to the next
+lower integer.
+
+@item %d
+The day of the month as a decimal number (01--31).
+
+@item %D
+Equivalent to specifying @samp{%m/%d/%y}.
+
+@item %e
+The day of the month, padded with a space if it is only one digit.
+
+@item %F
+Equivalent to specifying @samp{%Y-%m-%d}.
+This is the ISO 8601 date format.
+
+@item %g
+The year modulo 100 of the ISO 8601 week number, as a decimal number (00--99).
+For example, January 1, 2012, is in week 53 of 2011. Thus, the year
+of its ISO 8601 week number is 2011, even though its year is 2012.
+Similarly, December 31, 2012, is in week 1 of 2013. Thus, the year
+of its ISO week number is 2013, even though its year is 2012.
+
+@item %G
+The full year of the ISO week number, as a decimal number.
+
+@item %h
+Equivalent to @samp{%b}.
+
+@item %H
+The hour (24-hour clock) as a decimal number (00--23).
+
+@item %I
+The hour (12-hour clock) as a decimal number (01--12).
+
+@item %j
+The day of the year as a decimal number (001--366).
+
+@item %m
+The month as a decimal number (01--12).
+
+@item %M
+The minute as a decimal number (00--59).
+
+@item %n
+A newline character (ASCII LF).
+
+@item %p
+The locale's equivalent of the AM/PM designations associated
+with a 12-hour clock.
+
+@item %r
+The locale's 12-hour clock time.
+(This is @samp{%I:%M:%S %p} in the @code{"C"} locale.)
+
+@item %R
+Equivalent to specifying @samp{%H:%M}.
+
+@item %S
+The second as a decimal number (00--60).
+
+@item %t
+A TAB character.
+
+@item %T
+Equivalent to specifying @samp{%H:%M:%S}.
+
+@item %u
+The weekday as a decimal number (1--7). Monday is day one.
+
+@item %U
+The week number of the year (the first Sunday as the first day of week one)
+as a decimal number (00--53).
+
+@c @cindex ISO 8601
+@item %V
+The week number of the year (the first Monday as the first
+day of week one) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601.
+(To wit: if the week containing January 1 has four or more days in the
+new year, then it is week one; otherwise it is week 53 of the previous year
+and the next week is week one.)
+
+@item %w
+The weekday as a decimal number (0--6). Sunday is day zero.
+
+@item %W
+The week number of the year (the first Monday as the first day of week one)
+as a decimal number (00--53).
+
+@item %x
+The locale's ``appropriate'' date representation.
+(This is @samp{%A %B %d %Y} in the @code{"C"} locale.)
+
+@item %X
+The locale's ``appropriate'' time representation.
+(This is @samp{%T} in the @code{"C"} locale.)
+
+@item %y
+The year modulo 100 as a decimal number (00--99).
+
+@item %Y
+The full year as a decimal number (e.g., 2015).
+
+@c @cindex RFC 822
+@c @cindex RFC 1036
+@item %z
+The timezone offset in a +HHMM format (e.g., the format necessary to
+produce RFC 822/RFC 1036 date headers).
+
+@item %Z
+The time zone name or abbreviation; no characters if
+no time zone is determinable.
+
+@item %Ec %EC %Ex %EX %Ey %EY %Od %Oe %OH
+@itemx %OI %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
+``Alternative representations'' for the specifications
+that use only the second letter (@samp{%c}, @samp{%C},
+and so on).@footnote{If you don't understand any of this, don't worry about
+it; these facilities are meant to make it easier to ``internationalize''
+programs.
+Other internationalization features are described in
+@ref{Internationalization}.}
+(These facilitate compliance with the POSIX @command{date} utility.)
+
+@item %%
+A literal @samp{%}.
+@end table
+
+If a conversion specifier is not one of those just listed, the behavior is
+undefined.@footnote{This is because ISO C leaves the
+behavior of the C version of @code{strftime()} undefined and @command{gawk}
+uses the system's version of @code{strftime()} if it's there.
+Typically, the conversion specifier either does not appear in the
+returned string or appears literally.}
+
+For systems that are not yet fully standards-compliant,
+@command{gawk} supplies a copy of
+@code{strftime()} from the GNU C Library.
+It supports all of the just-listed format specifications.
+If that version is
+used to compile @command{gawk} (@pxref{Installation}),
+then the following additional format specifications are available:
+
+@table @code
+@item %k
+The hour (24-hour clock) as a decimal number (0--23).
+Single-digit numbers are padded with a space.
+
+@item %l
+The hour (12-hour clock) as a decimal number (1--12).
+Single-digit numbers are padded with a space.
+
+@ignore
+@item %N
+The ``Emperor/Era'' name.
+Equivalent to @samp{%C}.
+
+@item %o
+The ``Emperor/Era'' year.
+Equivalent to @samp{%y}.
+@end ignore
+
+@item %s
+The time as a decimal timestamp in seconds since the epoch.
+
+@ignore
+@item %v
+The date in VMS format (e.g., @samp{20-JUN-1991}).
+@end ignore
+@end table
+
+Additionally, the alternative representations are recognized but their
+normal representations are used.
+
+@cindex @code{date} utility, POSIX
+@cindex POSIX @command{awk}, @code{date} utility and
+The following example is an @command{awk} implementation of the POSIX
+@command{date} utility. Normally, the @command{date} utility prints the
+current date and time of day in a well-known format. However, if you
+provide an argument to it that begins with a @samp{+}, @command{date}
+copies nonformat specifier characters to the standard output and
+interprets the current time according to the format specifiers in
+the string. For example:
+
+@example
+$ @kbd{date '+Today is %A, %B %d, %Y.'}
+@print{} Today is Monday, September 22, 2014.
+@end example
+
+Here is the @command{gawk} version of the @command{date} utility.
+It has a shell ``wrapper'' to handle the @option{-u} option,
+which requires that @command{date} run as if the time zone
+is set to UTC:
+
+@example
+#! /bin/sh
+#
+# date --- approximate the POSIX 'date' command
+
+case $1 in
+-u) TZ=UTC0 # use UTC
+ export TZ
+ shift ;;
+esac
+
+gawk 'BEGIN @{
+ format = PROCINFO["strftime"]
+ exitval = 0
+
+ if (ARGC > 2)
+ exitval = 1
+ else if (ARGC == 2) @{
+ format = ARGV[1]
+ if (format ~ /^\+/)
+ format = substr(format, 2) # remove leading +
+ @}
+ print strftime(format)
+ exit exitval
+@}' "$@@"
+@end example
+
+@node Bitwise Functions
+@subsection Bit-Manipulation Functions
+@cindex bit-manipulation functions
+@cindex bitwise, operations
+@cindex AND bitwise operation
+@cindex OR bitwise operation
+@cindex XOR bitwise operation
+@cindex operations, bitwise
+@quotation
+@i{I can explain it for you, but I can't understand it for you.}
+@author Anonymous
+@end quotation
+
+Many languages provide the ability to perform @dfn{bitwise} operations
+on two integer numbers. In other words, the operation is performed on
+each successive pair of bits in the operands.
+Three common operations are bitwise AND, OR, and XOR.
+The operations are described in @ref{table-bitwise-ops}.
+
+@c 11/2014: Postprocessing turns the docbook informaltable
+@c into a table. Hurray for scripting!
+@float Table,table-bitwise-ops
+@caption{Bitwise operations}
+@ifnottex
+@ifnotdocbook
+@display
+ Bit Operator
+ | AND | OR | XOR
+ |---+---+---+---+---+---
+Operands | 0 | 1 | 0 | 1 | 0 | 1
+----------+---+---+---+---+---+---
+ 0 | 0 0 | 0 1 | 0 1
+ 1 | 0 1 | 1 1 | 1 0
+@end display
+@end ifnotdocbook
+@end ifnottex
+@tex
+\centerline{
+\vbox{\bigskip % space above the table (about 1 linespace)
+% Because we have vertical rules, we can't let TeX insert interline space
+% in its usual way.
+\offinterlineskip
+\halign{\strut\hfil#\quad\hfil % operands
+ &\vrule#&\quad#\quad % rule, 0 (of and)
+ &\vrule#&\quad#\quad % rule, 1 (of and)
+ &\vrule# % rule between and and or
+ &\quad#\quad % 0 (of or)
+ &\vrule#&\quad#\quad % rule, 1 (of of)
+ &\vrule# % rule between or and xor
+ &\quad#\quad % 0 of xor
+ &\vrule#&\quad#\quad % rule, 1 of xor
+ \cr
+&\omit&\multispan{11}\hfil\bf Bit operator\hfil\cr
+\noalign{\smallskip}
+& &\multispan3\hfil AND\hfil&&\multispan3\hfil OR\hfil
+ &&\multispan3\hfil XOR\hfil\cr
+\bf Operands&&0&&1&&0&&1&&0&&1\cr
+\noalign{\hrule}
+\omit&height 2pt&&\omit&&&&\omit&&&&\omit\cr
+\noalign{\hrule height0pt}% without this the rule does not extend; why?
+0&&0&\omit&0&&0&\omit&1&&0&\omit&1\cr
+1&&0&\omit&1&&1&\omit&1&&1&\omit&0\cr
+}}}
+@end tex
+
+@docbook
+<informaltable>
+
+<tgroup cols="7" colsep="1">
+<colspec colname="c1"/>
+<colspec colname="c2"/>
+<colspec colname="c3"/>
+<colspec colname="c4"/>
+<colspec colname="c5"/>
+<colspec colname="c6"/>
+<colspec colname="c7"/>
+<spanspec spanname="optitle" namest="c2" nameend="c7" align="center"/>
+<spanspec spanname="andspan" namest="c2" nameend="c3" align="center"/>
+<spanspec spanname="orspan" namest="c4" nameend="c5" align="center"/>
+<spanspec spanname="xorspan" namest="c6" nameend="c7" align="center"/>
+
+<tbody>
+<row>
+<entry colsep="0"></entry>
+<entry spanname="optitle"><emphasis role="bold">Bit Operator</emphasis></entry>
+</row>
+
+<row rowsep="1">
+<entry rowsep="0"></entry>
+<entry spanname="andspan">AND</entry>
+<entry spanname="orspan">OR</entry>
+<entry spanname="xorspan">XOR</entry>
+</row>
+
+<row rowsep="1">
+<entry ><emphasis role="bold">Operands</emphasis></entry>
+<entry colsep="0">0</entry>
+<entry colsep="1">1</entry>
+<entry colsep="0">0</entry>
+<entry colsep="1">1</entry>
+<entry colsep="0">0</entry>
+<entry colsep="1">1</entry>
+</row>
+
+<row>
+<entry align="center">0</entry>
+<entry colsep="0">0</entry>
+<entry>0</entry>
+<entry colsep="0">0</entry>
+<entry>1</entry>
+<entry colsep="0">0</entry>
+<entry>1</entry>
+</row>
+
+<row>
+<entry align="center">1</entry>
+<entry colsep="0">0</entry>
+<entry>1</entry>
+<entry colsep="0">1</entry>
+<entry>1</entry>
+<entry colsep="0">1</entry>
+<entry>0</entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+@end docbook
+@end float
+
+@cindex bitwise, complement
+@cindex complement, bitwise
+As you can see, the result of an AND operation is 1 only when @emph{both}
+bits are 1.
+The result of an OR operation is 1 if @emph{either} bit is 1.
+The result of an XOR operation is 1 if either bit is 1,
+but not both.
+The next operation is the @dfn{complement}; the complement of 1 is 0 and
+the complement of 0 is 1. Thus, this operation ``flips'' all the bits
+of a given value.
+
+@cindex bitwise, shift
+@cindex left shift, bitwise
+@cindex right shift, bitwise
+@cindex shift, bitwise
+Finally, two other common operations are to shift the bits left or right.
+For example, if you have a bit string @samp{10111001} and you shift it
+right by three bits, you end up with @samp{00010111}.@footnote{This example
+shows that 0's come in on the left side. For @command{gawk}, this is
+always true, but in some languages, it's possible to have the left side
+fill with 1's.}
+@c Purposely decided to use 0's and 1's here. 2/2001.
+If you start over again with @samp{10111001} and shift it left by three
+bits, you end up with @samp{11001000}. The following list describes
+@command{gawk}'s built-in functions that implement the bitwise operations.
+Optional parameters are enclosed in square brackets ([ ]):
+
+@cindex @command{gawk}, bitwise operations in
+@table @code
+@cindexgawkfunc{and}
+@cindex bitwise AND
+@item @code{and(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
+Return the bitwise AND of the arguments. There must be at least two.
+
+@cindexgawkfunc{compl}
+@cindex bitwise complement
+@item @code{compl(@var{val})}
+Return the bitwise complement of @var{val}.
+
+@cindexgawkfunc{lshift}
+@cindex left shift
+@item @code{lshift(@var{val}, @var{count})}
+Return the value of @var{val}, shifted left by @var{count} bits.
+
+@cindexgawkfunc{or}
+@cindex bitwise OR
+@item @code{or(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
+Return the bitwise OR of the arguments. There must be at least two.
+
+@cindexgawkfunc{rshift}
+@cindex right shift
+@item @code{rshift(@var{val}, @var{count})}
+Return the value of @var{val}, shifted right by @var{count} bits.
+
+@cindexgawkfunc{xor}
+@cindex bitwise XOR
+@item @code{xor(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
+Return the bitwise XOR of the arguments. There must be at least two.
+@end table
+
+For all of these functions, first the double-precision floating-point value is
+converted to the widest C unsigned integer type, then the bitwise operation is
+performed. If the result cannot be represented exactly as a C @code{double},
+leading nonzero bits are removed one by one until it can be represented
+exactly. The result is then converted back into a C @code{double}. (If
+you don't understand this paragraph, don't worry about it.)
+
+Here is a user-defined function (@pxref{User-defined})
+that illustrates the use of these functions:
+
+@cindex @code{bits2str()} user-defined function
+@cindex @code{testbits.awk} program
+@example
+@group
+@c file eg/lib/bits2str.awk
+# bits2str --- turn a byte into readable 1's and 0's
+
+function bits2str(bits, data, mask)
+@{
+ if (bits == 0)
+ return "0"
+
+ mask = 1
+ for (; bits != 0; bits = rshift(bits, 1))
+ data = (and(bits, mask) ? "1" : "0") data
+
+ while ((length(data) % 8) != 0)
+ data = "0" data
+
+ return data
+@}
+@c endfile
+@end group
+
+@c this is a hack to make testbits.awk self-contained
+@ignore
+@c file eg/prog/testbits.awk
+# bits2str --- turn a byte into readable 1's and 0's
+
+function bits2str(bits, data, mask)
+@{
+ if (bits == 0)
+ return "0"
+
+ mask = 1
+ for (; bits != 0; bits = rshift(bits, 1))
+ data = (and(bits, mask) ? "1" : "0") data
+
+ while ((length(data) % 8) != 0)
+ data = "0" data
+
+ return data
+@}
+@c endfile
+@end ignore
+@c file eg/prog/testbits.awk
+BEGIN @{
+ printf "123 = %s\n", bits2str(123)
+ printf "0123 = %s\n", bits2str(0123)
+ printf "0x99 = %s\n", bits2str(0x99)
+ comp = compl(0x99)
+ printf "compl(0x99) = %#x = %s\n", comp, bits2str(comp)
+ shift = lshift(0x99, 2)
+ printf "lshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift)
+ shift = rshift(0x99, 2)
+ printf "rshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift)
+@}
+@c endfile
+@end example
+
+@noindent
+This program produces the following output when run:
+
+@example
+$ @kbd{gawk -f testbits.awk}
+@print{} 123 = 01111011
+@print{} 0123 = 01010011
+@print{} 0x99 = 10011001
+@print{} compl(0x99) = 0xffffff66 = 11111111111111111111111101100110
+@print{} lshift(0x99, 2) = 0x264 = 0000001001100100
+@print{} rshift(0x99, 2) = 0x26 = 00100110
+@end example
+
+@cindex converting, strings to numbers
+@cindex strings, converting
+@cindex numbers, converting
+@cindex converting, numbers to strings
+@cindex number as string of bits
+The @code{bits2str()} function turns a binary number into a string.
+The number @code{1} represents a binary value where the rightmost bit
+is set to 1. Using this mask,
+the function repeatedly checks the rightmost bit.
+ANDing the mask with the value indicates whether the
+rightmost bit is 1 or not. If so, a @code{"1"} is concatenated onto the front
+of the string.
+Otherwise, a @code{"0"} is added.
+The value is then shifted right by one bit and the loop continues
+until there are no more 1 bits.
+
+If the initial value is zero, it returns a simple @code{"0"}.
+Otherwise, at the end, it pads the value with zeros to represent multiples
+of 8-bit quantities. This is typical in modern computers.
+
+The main code in the @code{BEGIN} rule shows the difference between the
+decimal and octal values for the same numbers
+(@pxref{Nondecimal-numbers}),
+and then demonstrates the
+results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
+
+@node Type Functions
+@subsection Getting Type Information
+
+@command{gawk} provides a single function that lets you distinguish
+an array from a scalar variable. This is necessary for writing code
+that traverses every element of an array of arrays
+(@pxref{Arrays of Arrays}).
+
+@table @code
+@cindexgawkfunc{isarray}
+@cindex scalar or array
+@item isarray(@var{x})
+Return a true value if @var{x} is an array. Otherwise return false.
+@end table
+
+@code{isarray()} is meant for use in two circumstances. The first is when
+traversing a multidimensional array: you can test if an element is itself
+an array or not. The second is inside the body of a user-defined function
+(not discussed yet; @pxref{User-defined}), to test if a parameter is an
+array or not.
+
+@quotation NOTE
+Using @code{isarray()} at the global level to test
+variables makes no sense. Because you are the one writing the program, you
+are supposed to know if your variables are arrays or not. And in fact,
+due to the way @command{gawk} works, if you pass the name of a variable
+that has not been previously used to @code{isarray()}, @command{gawk}
+ends up turning it into a scalar.
+@end quotation
+
+@node I18N Functions
+@subsection String-Translation Functions
+@cindex @command{gawk}, string-translation functions
+@cindex functions, string-translation
+@cindex string-translation functions
+@cindex internationalization
+@cindex @command{awk} programs, internationalizing
+
+@command{gawk} provides facilities for internationalizing @command{awk} programs.
+These include the functions described in the following list.
+The descriptions here are purposely brief.
+@xref{Internationalization},
+for the full story.
+Optional parameters are enclosed in square brackets ([ ]):
+
+@table @asis
+@cindexgawkfunc{bindtextdomain}
+@cindex set directory of message catalogs
+@item @code{bindtextdomain(@var{directory}} [@code{,} @var{domain}]@code{)}
+Set the directory in which
+@command{gawk} will look for message translation files, in case they
+will not or cannot be placed in the ``standard'' locations
+(e.g., during testing).
+It returns the directory in which @var{domain} is ``bound.''
+
+The default @var{domain} is the value of @code{TEXTDOMAIN}.
+If @var{directory} is the null string (@code{""}), then
+@code{bindtextdomain()} returns the current binding for the
+given @var{domain}.
+
+@cindexgawkfunc{dcgettext}
+@cindex translate string
+@item @code{dcgettext(@var{string}} [@code{,} @var{domain} [@code{,} @var{category}] ]@code{)}
+Return the translation of @var{string} in
+text domain @var{domain} for locale category @var{category}.
+The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
+The default value for @var{category} is @code{"LC_MESSAGES"}.
+
+@cindexgawkfunc{dcngettext}
+@item @code{dcngettext(@var{string1}, @var{string2}, @var{number}} [@code{,} @var{domain} [@code{,} @var{category}] ]@code{)}
+Return the plural form used for @var{number} of the
+translation of @var{string1} and @var{string2} in text domain
+@var{domain} for locale category @var{category}. @var{string1} is the
+English singular variant of a message, and @var{string2} the English plural
+variant of the same message.
+The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
+The default value for @var{category} is @code{"LC_MESSAGES"}.
+@end table
+
+@node User-defined
+@section User-Defined Functions
+
+@cindex user-defined functions
+@cindex functions, user-defined
+Complicated @command{awk} programs can often be simplified by defining
+your own functions. User-defined functions can be called just like
+built-in ones (@pxref{Function Calls}), but it is up to you to define
+them (i.e., to tell @command{awk} what they should do).
+
+@menu
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and what it
+ does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
+* Dynamic Typing:: How variable types can change at runtime.
+@end menu
+
+@node Definition Syntax
+@subsection Function Definition Syntax
+
+@quotation
+@i{It's entirely fair to say that the @command{awk} syntax for local
+variable definitions is appallingly awful.}
+@author Brian Kernighan
+@end quotation
+
+@cindex functions, defining
+Definitions of functions can appear anywhere between the rules of an
+@command{awk} program. Thus, the general form of an @command{awk} program is
+extended to include sequences of rules @emph{and} user-defined function
+definitions.
+There is no need to put the definition of a function
+before all uses of the function. This is because @command{awk} reads the
+entire program before starting to execute any of it.
+
+The definition of a function named @var{name} looks like this:
+
+@display
+@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)}
+@code{@{}
+ @var{body-of-function}
+@code{@}}
+@end display
+
+@cindex names, functions
+@cindex functions, names of
+@cindex namespace issues, functions
+@noindent
+Here, @var{name} is the name of the function to define. A valid function
+name is like a valid variable name: a sequence of letters, digits, and
+underscores that doesn't start with a digit.
+Here too, only the 52 upper- and lowercase English letters may
+be used in a function name.
+Within a single @command{awk} program, any particular name can only be
+used as a variable, array, or function.
+
+@var{parameter-list} is an optional list of the function's arguments and local
+variable names, separated by commas. When the function is called,
+the argument names are used to hold the argument values given in
+the call.
+
+A function cannot have two parameters with the same name, nor may it
+have a parameter with the same name as the function itself.
+In addition, according to the POSIX standard, function parameters
+cannot have the same name as one of the special predefined variables
+(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce
+this restriction.
+
+Local variables act like the empty string if referenced where a string
+value is required, and like zero if referenced where a numeric value
+is required. This is the same as regular variables that have never been
+assigned a value. (There is more to understand about local variables;
+@pxref{Dynamic Typing}.)
+
+The @var{body-of-function} consists of @command{awk} statements. It is the
+most important part of the definition, because it says what the function
+should actually @emph{do}. The argument names exist to give the body a
+way to talk about the arguments; local variables exist to give the body
+places to keep temporary values.
+
+Argument names are not distinguished syntactically from local variable
+names. Instead, the number of arguments supplied when the function is
+called determines how many argument variables there are. Thus, if three
+argument values are given, the first three names in @var{parameter-list}
+are arguments and the rest are local variables.
+
+It follows that if the number of arguments is not the same in all calls
+to the function, some of the names in @var{parameter-list} may be
+arguments on some occasions and local variables on others. Another
+way to think of this is that omitted arguments default to the
+null string.
+
+@cindex programming conventions, functions, writing
+Usually when you write a function, you know how many names you intend to
+use for arguments and how many you intend to use as local variables. It is
+conventional to place some extra space between the arguments and
+the local variables, in order to document how your function is supposed to be used.
+
+@cindex variables, shadowing
+@cindex shadowing of variable values
+During execution of the function body, the arguments and local variable
+values hide, or @dfn{shadow}, any variables of the same names used in the
+rest of the program. The shadowed variables are not accessible in the
+function definition, because there is no way to name them while their
+names have been taken away for the local variables. All other variables
+used in the @command{awk} program can be referenced or set normally in the
+function's body.
+
+The arguments and local variables last only as long as the function body
+is executing. Once the body finishes, you can once again access the
+variables that were shadowed while the function was running.
+
+@cindex recursive functions
+@cindex functions, recursive
+The function body can contain expressions that call functions. They
+can even call this function, either directly or by way of another
+function. When this happens, we say the function is @dfn{recursive}.
+The act of a function calling itself is called @dfn{recursion}.
+
+All the built-in functions return a value to their caller.
+User-defined functions can do so also, using the @code{return} statement,
+which is described in detail in @ref{Return Statement}.
+Many of the subsequent examples in this @value{SECTION} use
+the @code{return} statement.
+
+@cindex common extensions, @code{func} keyword
+@cindex extensions, common@comma{} @code{func} keyword
+@c @cindex @command{awk} language, POSIX version
+@c @cindex POSIX @command{awk}
+@cindex POSIX @command{awk}, @code{function} keyword in
+In many @command{awk} implementations, including @command{gawk},
+the keyword @code{function} may be
+abbreviated @code{func}. @value{COMMONEXT}
+However, POSIX only specifies the use of
+the keyword @code{function}. This actually has some practical implications.
+If @command{gawk} is in POSIX-compatibility mode
+(@pxref{Options}), then the following
+statement does @emph{not} define a function:
+
+@example
+func foo() @{ a = sqrt($1) ; print a @}
+@end example
+
+@noindent
+Instead, it defines a rule that, for each record, concatenates the value
+of the variable @samp{func} with the return value of the function @samp{foo}.
+If the resulting string is non-null, the action is executed.
+This is probably not what is desired. (@command{awk} accepts this input as
+syntactically valid, because functions may be used before they are defined
+in @command{awk} programs.@footnote{This program won't actually run,
+because @code{foo()} is undefined.})
+
+@cindex portability, functions@comma{} defining
+To ensure that your @command{awk} programs are portable, always use the
+keyword @code{function} when defining a function.
+
+@node Function Example
+@subsection Function Definition Examples
+@cindex function definition example
+
+Here is an example of a user-defined function, called @code{myprint()}, that
+takes a number and prints it in a specific format:
+
+@example
+function myprint(num)
+@{
+ printf "%6.3g\n", num
+@}
+@end example
+
+@noindent
+To illustrate, here is an @command{awk} rule that uses our @code{myprint}
+function:
+
+@example
+$3 > 0 @{ myprint($3) @}
+@end example
+
+@noindent
+This program prints, in our special format, all the third fields that
+contain a positive number in our input. Therefore, when given the following input:
+
+@example
+ 1.2 3.4 5.6 7.8
+ 9.10 11.12 -13.14 15.16
+17.18 19.20 21.22 23.24
+@end example
+
+@noindent
+this program, using our function to format the results, prints:
+
+@example
+ 5.6
+ 21.2
+@end example
+
+This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
+
+@example
+function delarray(a, i)
+@{
+ for (i in a)
+ delete a[i]
+@}
+@end example
+
+When working with arrays, it is often necessary to delete all the elements
+in an array and start over with a new list of elements
+(@pxref{Delete}).
+Instead of having
+to repeat this loop everywhere that you need to clear out
+an array, your program can just call @code{delarray}.
+(This guarantees portability. The use of @samp{delete @var{array}} to delete
+the contents of an entire array is a relatively recent@footnote{Late in 2012.}
+addition to the POSIX standard.)
+
+The following is an example of a recursive function. It takes a string
+as an input parameter and returns the string in backwards order.
+Recursive functions must always have a test that stops the recursion.
+In this case, the recursion terminates when the input string is
+already empty:
+
+@c 8/2014: Thanks to Mike Brennan for the improved formulation
+@cindex @code{rev()} user-defined function
+@example
+function rev(str)
+@{
+ if (str == "")
+ return ""
+
+ return (rev(substr(str, 2)) substr(str, 1, 1))
+@}
+@end example
+
+If this function is in a file named @file{rev.awk}, it can be tested
+this way:
+
+@example
+$ @kbd{echo "Don't Panic!" |}
+> @kbd{gawk -e '@{ print rev($0) @}' -f rev.awk}
+@print{} !cinaP t'noD
+@end example
+
+The C @code{ctime()} function takes a timestamp and returns it as a string,
+formatted in a well-known fashion.
+The following example uses the built-in @code{strftime()} function
+(@pxref{Time Functions})
+to create an @command{awk} version of @code{ctime()}:
+
+@cindex @code{ctime()} user-defined function
+@example
+@c file eg/lib/ctime.awk
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts, format)
+@{
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
+@}
+@c endfile
+@end example
+
+You might think that @code{ctime()} could use @code{PROCINFO["strftime"]}
+for its format string. That would be a mistake, because @code{ctime()} is
+supposed to return the time formatted in a standard fashion, and user-level
+code could have changed @code{PROCINFO["strftime"]}.
+
+@node Function Caveats
+@subsection Calling User-Defined Functions
+
+@cindex functions, user-defined, calling
+@dfn{Calling a function} means causing the function to run and do its job.
+A function call is an expression and its value is the value returned by
+the function.
+
+@menu
+* Calling A Function:: Don't use spaces.
+* Variable Scope:: Controlling variable scope.
+* Pass By Value/Reference:: Passing parameters.
+@end menu
+
+@node Calling A Function
+@subsubsection Writing a Function Call
+
+A function call consists of the function name followed by the arguments
+in parentheses. @command{awk} expressions are what you write in the
+call for the arguments. Each time the call is executed, these
+expressions are evaluated, and the values become the actual arguments. For
+example, here is a call to @code{foo()} with three arguments (the first
+being a string concatenation):
+
+@example
+foo(x y, "lose", 4 * z)
+@end example
+
+@quotation CAUTION
+Whitespace characters (spaces and TABs) are not allowed
+between the function name and the opening parenthesis of the argument list.
+If you write whitespace by mistake, @command{awk} might think that you mean
+to concatenate a variable with an expression in parentheses. However, it
+notices that you used a function name and not a variable name, and reports
+an error.
+@end quotation
+
+@node Variable Scope
+@subsubsection Controlling Variable Scope
+
+@cindex local variables, in a function
+@cindex variables, local to a function
+Unlike many languages,
+there is no way to make a variable local to a @code{@{} @dots{} @code{@}} block in
+@command{awk}, but you can make a variable local to a function. It is
+good practice to do so whenever a variable is needed only in that
+function.
+
+To make a variable local to a function, simply declare the variable as
+an argument after the actual function arguments
+(@pxref{Definition Syntax}).
+Look at the following example where variable
+@code{i} is a global variable used by both functions @code{foo()} and
+@code{bar()}:
+
+@example
+function bar()
+@{
+ for (i = 0; i < 3; i++)
+ print "bar's i=" i
+@}
+
+function foo(j)
+@{
+ i = j + 1
+ print "foo's i=" i
+ bar()
+ print "foo's i=" i
+@}
+
+BEGIN @{
+ i = 10
+ print "top's i=" i
+ foo(0)
+ print "top's i=" i
+@}
+@end example
+
+Running this script produces the following, because the @code{i} in
+functions @code{foo()} and @code{bar()} and at the top level refer to the same
+variable instance:
+
+@example
+top's i=10
+foo's i=1
+bar's i=0
+bar's i=1
+bar's i=2
+foo's i=3
+top's i=3
+@end example
+
+If you want @code{i} to be local to both @code{foo()} and @code{bar()} do as
+follows (the extra space before @code{i} is a coding convention to
+indicate that @code{i} is a local variable, not an argument):
+
+@example
+function bar( i)
+@{
+ for (i = 0; i < 3; i++)
+ print "bar's i=" i
+@}
+
+function foo(j, i)
+@{
+ i = j + 1
+ print "foo's i=" i
+ bar()
+ print "foo's i=" i
+@}
+
+BEGIN @{
+ i = 10
+ print "top's i=" i
+ foo(0)
+ print "top's i=" i
+@}
+@end example
+
+Running the corrected script produces the following:
+
+@example
+top's i=10
+foo's i=1
+bar's i=0
+bar's i=1
+bar's i=2
+foo's i=1
+top's i=10
+@end example
+
+Besides scalar values (strings and numbers), you may also have
+local arrays. By using a parameter name as an array, @command{awk}
+treats it as an array, and it is local to the function.
+In addition, recursive calls create new arrays.
+Consider this example:
+
+@example
+function some_func(p1, a)
+@{
+ if (p1++ > 3)
+ return
+
+ a[p1] = p1
+
+ some_func(p1)
+
+ printf("At level %d, index %d %s found in a\n",
+ p1, (p1 - 1), (p1 - 1) in a ? "is" : "is not")
+ printf("At level %d, index %d %s found in a\n",
+ p1, p1, p1 in a ? "is" : "is not")
+ print ""
+@}
+
+BEGIN @{
+ some_func(1)
+@}
+@end example
+
+When run, this program produces the following output:
+
+@example
+At level 4, index 3 is not found in a
+At level 4, index 4 is found in a
+
+At level 3, index 2 is not found in a
+At level 3, index 3 is found in a
+
+At level 2, index 1 is not found in a
+At level 2, index 2 is found in a
+@end example
+
+@node Pass By Value/Reference
+@subsubsection Passing Function Arguments by Value Or by Reference
+
+In @command{awk}, when you declare a function, there is no way to
+declare explicitly whether the arguments are passed @dfn{by value} or
+@dfn{by reference}.
+
+Instead, the passing convention is determined at runtime when
+the function is called according to the following rule:
+if the argument is an array variable, then it is passed by reference.
+Otherwise, the argument is passed by value.
+
+@cindex call by value
+Passing an argument by value means that when a function is called, it
+is given a @emph{copy} of the value of this argument.
+The caller may use a variable as the expression for the argument, but
+the called function does not know this---it only knows what value the
+argument had. For example, if you write the following code:
+
+@example
+foo = "bar"
+z = myfunc(foo)
+@end example
+
+@noindent
+then you should not think of the argument to @code{myfunc()} as being
+``the variable @code{foo}.'' Instead, think of the argument as the
+string value @code{"bar"}.
+If the function @code{myfunc()} alters the values of its local variables,
+this has no effect on any other variables. Thus, if @code{myfunc()}
+does this:
+
+@example
+function myfunc(str)
+@{
+ print str
+ str = "zzz"
+ print str
+@}
+@end example
+
+@noindent
+to change its first argument variable @code{str}, it does @emph{not}
+change the value of @code{foo} in the caller. The role of @code{foo} in
+calling @code{myfunc()} ended when its value (@code{"bar"}) was computed.
+If @code{str} also exists outside of @code{myfunc()}, the function body
+cannot alter this outer value, because it is shadowed during the
+execution of @code{myfunc()} and cannot be seen or changed from there.
+
+@cindex call by reference
+@cindex arrays, as parameters to functions
+@cindex functions, arrays as parameters to
+However, when arrays are the parameters to functions, they are @emph{not}
+copied. Instead, the array itself is made available for direct manipulation
+by the function. This is usually termed @dfn{call by reference}.
+Changes made to an array parameter inside the body of a function @emph{are}
+visible outside that function.
+
+@quotation NOTE
+Changing an array parameter inside a function
+can be very dangerous if you do not watch what you are doing.
+For example:
+
+@example
+function changeit(array, ind, nvalue)
+@{
+ array[ind] = nvalue
+@}
+
+BEGIN @{
+ a[1] = 1; a[2] = 2; a[3] = 3
+ changeit(a, 2, "two")
+ printf "a[1] = %s, a[2] = %s, a[3] = %s\n",
+ a[1], a[2], a[3]
+@}
+@end example
+
+@noindent
+prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
+@code{changeit()} stores @code{"two"} in the second element of @code{a}.
+@end quotation
+
+@cindex undefined functions
+@cindex functions, undefined
+Some @command{awk} implementations allow you to call a function that
+has not been defined. They only report a problem at runtime when the
+program actually tries to call the function. For example:
+
+@example
+BEGIN @{
+ if (0)
+ foo()
+ else
+ bar()
+@}
+function bar() @{ @dots{} @}
+# note that `foo' is not defined
+@end example
+
+@noindent
+Because the @samp{if} statement will never be true, it is not really a
+problem that @code{foo()} has not been defined. Usually, though, it is a
+problem if a program calls an undefined function.
+
+@cindex lint checking, undefined functions
+If @option{--lint} is specified
+(@pxref{Options}),
+@command{gawk} reports calls to undefined functions.
+
+@cindex portability, @code{next} statement in user-defined functions
+Some @command{awk} implementations generate a runtime
+error if you use either the @code{next} statement
+or the @code{nextfile} statement
+(@pxref{Next Statement}, and
+@ifdocbook
+@ref{Nextfile Statement})
+@end ifdocbook
+@ifnotdocbook
+@pxref{Nextfile Statement})
+@end ifnotdocbook
+inside a user-defined function.
+@command{gawk} does not have this limitation.
+
+@node Return Statement
+@subsection The @code{return} Statement
+@cindex @code{return} statement@comma{} user-defined functions
+
+As seen in several earlier examples,
+the body of a user-defined function can contain a @code{return} statement.
+This statement returns control to the calling part of the @command{awk} program. It
+can also be used to return a value for use in the rest of the @command{awk}
+program. It looks like this:
+
+@display
+@code{return} [@var{expression}]
+@end display
+
+The @var{expression} part is optional.
+Due most likely to an oversight, POSIX does not define what the return
+value is if you omit the @var{expression}. Technically speaking, this
+makes the returned value undefined, and therefore, unpredictable.
+In practice, though, all versions of @command{awk} simply return the
+null string, which acts like zero if used in a numeric context.
+
+A @code{return} statement with no value expression is assumed at the end of
+every function definition. So if control reaches the end of the function
+body, then technically, the function returns an unpredictable value.
+In practice, it returns the empty string. @command{awk}
+does @emph{not} warn you if you use the return value of such a function.
+
+Sometimes, you want to write a function for what it does, not for
+what it returns. Such a function corresponds to a @code{void} function
+in C, C++ or Java, or to a @code{procedure} in Ada. Thus, it may be appropriate to not
+return any value; simply bear in mind that you should not be using the
+return value of such a function.
+
+The following is an example of a user-defined function that returns a value
+for the largest number among the elements of an array:
+
+@example
+function maxelt(vec, i, ret)
+@{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end example
+
+@cindex programming conventions, function parameters
+@noindent
+You call @code{maxelt()} with one argument, which is an array name. The local
+variables @code{i} and @code{ret} are not intended to be arguments;
+there is nothing to stop you from passing more than one argument
+to @code{maxelt()} but the results would be strange. The extra space before
+@code{i} in the function parameter list indicates that @code{i} and
+@code{ret} are local variables.
+You should follow this convention when defining functions.
+
+The following program uses the @code{maxelt()} function. It loads an
+array, calls @code{maxelt()}, and then reports the maximum number in that
+array:
+
+@example
+function maxelt(vec, i, ret)
+@{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+
+# Load all fields of each record into nums.
+@{
+ for(i = 1; i <= NF; i++)
+ nums[NR, i] = $i
+@}
+
+END @{
+ print maxelt(nums)
+@}
+@end example
+
+Given the following input:
+
+@example
+ 1 5 23 8 16
+44 3 5 2 8 26
+256 291 1396 2962 100
+-6 467 998 1101
+99385 11 0 225
+@end example
+
+@noindent
+the program reports (predictably) that 99,385 is the largest value
+in the array.
+
+@node Dynamic Typing
+@subsection Functions and Their Effects on Variable Typing
+
+@command{awk} is a very fluid language.
+It is possible that @command{awk} can't tell if an identifier
+represents a scalar variable or an array until runtime.
+Here is an annotated sample program:
+
+@example
+function foo(a)
+@{
+ a[1] = 1 # parameter is an array
+@}
+
+BEGIN @{
+ b = 1
+ foo(b) # invalid: fatal type mismatch
+
+ foo(x) # x uninitialized, becomes an array dynamically
+ x = 1 # now not allowed, runtime error
+@}
+@end example
+
+In this example, the first call to @code{foo()} generates
+a fatal error, so @command{awk} will not report the second
+error. If you comment out that call, though, then @command{awk}
+does report the second error.
+
+Usually, such things aren't a big issue, but it's worth
+being aware of them.
+
+@node Indirect Calls
+@section Indirect Function Calls
+
+@cindex indirect function calls
+@cindex function calls, indirect
+@cindex function pointers
+@cindex pointers to functions
+@cindex differences in @command{awk} and @command{gawk}, indirect function calls
+
+This section describes an advanced, @command{gawk}-specific extension.
+
+Often, you may wish to defer the choice of function to call until runtime.
+For example, you may have different kinds of records, each of which
+should be processed differently.
+
+Normally, you would have to use a series of @code{if}-@code{else}
+statements to decide which function to call. By using @dfn{indirect}
+function calls, you can specify the name of the function to call as a
+string variable, and then call the function. Let's look at an example.
+
+Suppose you have a file with your test scores for the classes you
+are taking. The first field is the class name. The following fields
+are the functions to call to process the data, up to a ``marker''
+field @samp{data:}. Following the marker, to the end of the record,
+are the various numeric test scores.
+
+Here is the initial file; you wish to get the sum and the average of
+your test scores:
+
+@example
+@c file eg/data/class_data1
+Biology_101 sum average data: 87.0 92.4 78.5 94.9
+Chemistry_305 sum average data: 75.2 98.3 94.7 88.2
+English_401 sum average data: 100.0 95.6 87.1 93.4
+@c endfile
+@end example
+
+To process the data, you might write initially:
+
+@example
+@{
+ class = $1
+ for (i = 2; $i != "data:"; i++) @{
+ if ($i == "sum")
+ sum() # processes the whole record
+ else if ($i == "average")
+ average()
+ @dots{} # and so on
+ @}
+@}
+@end example
+
+@noindent
+This style of programming works, but can be awkward. With @dfn{indirect}
+function calls, you tell @command{gawk} to use the @emph{value} of a
+variable as the @emph{name} of the function to call.
+
+@cindex @code{@@}-notation for indirect function calls
+@cindex indirect function calls, @code{@@}-notation
+@cindex function calls, indirect, @code{@@}-notation for
+The syntax is similar to that of a regular function call: an identifier
+immediately followed by an opening parenthesis, any arguments, and then
+a closing parenthesis, with the addition of a leading @samp{@@}
+character:
+
+@example
+the_func = "sum"
+result = @@the_func() # calls the sum() function
+@end example
+
+Here is a full program that processes the previously shown data,
+using indirect function calls:
+
+@example
+@c file eg/prog/indirectcall.awk
+# indirectcall.awk --- Demonstrate indirect function calls
+@c endfile
+@ignore
+@c file eg/prog/indirectcall.awk
+#
+# Arnold Robbins, arnold@skeeve.com, Public Domain
+# January 2009
+@c endfile
+@end ignore
+
+@c file eg/prog/indirectcall.awk
+# average --- return the average of the values in fields $first - $last
+
+function average(first, last, sum, i)
+@{
+ sum = 0;
+ for (i = first; i <= last; i++)
+ sum += $i
+
+ return sum / (last - first + 1)
+@}
+
+# sum --- return the sum of the values in fields $first - $last
+
+function sum(first, last, ret, i)
+@{
+ ret = 0;
+ for (i = first; i <= last; i++)
+ ret += $i
+
+ return ret
+@}
+@c endfile
+@end example
+
+These two functions expect to work on fields; thus the parameters
+@code{first} and @code{last} indicate where in the fields to start and end.
+Otherwise they perform the expected computations and are not unusual:
+
+@example
+@c file eg/prog/indirectcall.awk
+# For each record, print the class name and the requested statistics
+@{
+ class_name = $1
+ gsub(/_/, " ", class_name) # Replace _ with spaces
+
+ # find start
+ for (i = 1; i <= NF; i++) @{
+ if ($i == "data:") @{
+ start = i + 1
+ break
+ @}
+ @}
+
+ printf("%s:\n", class_name)
+ for (i = 2; $i != "data:"; i++) @{
+ the_function = $i
+ printf("\t%s: <%s>\n", $i, @@the_function(start, NF) "")
+ @}
+ print ""
+@}
+@c endfile
+@end example
+
+This is the main processing for each record. It prints the class name (with
+underscores replaced with spaces). It then finds the start of the actual data,
+saving it in @code{start}.
+The last part of the code loops through each function name (from @code{$2} up to
+the marker, @samp{data:}), calling the function named by the field. The indirect
+function call itself occurs as a parameter in the call to @code{printf}.
+(The @code{printf} format string uses @samp{%s} as the format specifier so that we
+can use functions that return strings, as well as numbers. Note that the result
+from the indirect call is concatenated with the empty string, in order to force
+it to be a string value.)
+
+Here is the result of running the program:
+
+@example
+$ @kbd{gawk -f indirectcall.awk class_data1}
+@print{} Biology 101:
+@print{} sum: <352.8>
+@print{} average: <88.2>
+@print{}
+@print{} Chemistry 305:
+@print{} sum: <356.4>
+@print{} average: <89.1>
+@print{}
+@print{} English 401:
+@print{} sum: <376.1>
+@print{} average: <94.025>
+@end example
+
+The ability to use indirect function calls is more powerful than you may
+think at first. The C and C++ languages provide ``function pointers,'' which
+are a mechanism for calling a function chosen at runtime. One of the most
+well-known uses of this ability is the C @code{qsort()} function, which sorts
+an array using the famous ``quick sort'' algorithm
+(see @uref{http://en.wikipedia.org/wiki/Quick_sort, the Wikipedia article}
+for more information). To use this function, you supply a pointer to a comparison
+function. This mechanism allows you to sort arbitrary data in an arbitrary
+fashion.
+
+We can do something similar using @command{gawk}, like this:
+
+@example
+@c file eg/lib/quicksort.awk
+# quicksort.awk --- Quicksort algorithm, with user-supplied
+# comparison function
+@c endfile
+@ignore
+@c file eg/lib/quicksort.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# January 2009
+
+@c endfile
+
+@end ignore
+@c file eg/lib/quicksort.awk
+# quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia
+# or almost any algorithms or computer science text
+@c endfile
+@ignore
+@c file eg/lib/quicksort.awk
+#
+# Adapted from K&R-II, page 110
+@c endfile
+@end ignore
+@c file eg/lib/quicksort.awk
+
+function quicksort(data, left, right, less_than, i, last)
+@{
+ if (left >= right) # do nothing if array contains fewer
+ return # than two elements
+
+ quicksort_swap(data, left, int((left + right) / 2))
+ last = left
+ for (i = left + 1; i <= right; i++)
+ if (@@less_than(data[i], data[left]))
+ quicksort_swap(data, ++last, i)
+ quicksort_swap(data, left, last)
+ quicksort(data, left, last - 1, less_than)
+ quicksort(data, last + 1, right, less_than)
+@}
+
+# quicksort_swap --- helper function for quicksort, should really be inline
+
+function quicksort_swap(data, i, j, temp)
+@{
+ temp = data[i]
+ data[i] = data[j]
+ data[j] = temp
+@}
+@c endfile
+@end example
+
+The @code{quicksort()} function receives the @code{data} array, the starting and ending
+indices to sort (@code{left} and @code{right}), and the name of a function that
+performs a ``less than'' comparison. It then implements the quick sort algorithm.
+
+To make use of the sorting function, we return to our previous example. The
+first thing to do is write some comparison functions:
+
+@example
+@c file eg/prog/indirectcall.awk
+# num_lt --- do a numeric less than comparison
+
+function num_lt(left, right)
+@{
+ return ((left + 0) < (right + 0))
+@}
+
+# num_ge --- do a numeric greater than or equal to comparison
+
+function num_ge(left, right)
+@{
+ return ((left + 0) >= (right + 0))
+@}
+@c endfile
+@end example
+
+The @code{num_ge()} function is needed to perform a descending sort; when used
+to perform a ``less than'' test, it actually does the opposite (greater than
+or equal to), which yields data sorted in descending order.
+
+Next comes a sorting function. It is parameterized with the starting and
+ending field numbers and the comparison function. It builds an array with
+the data and calls @code{quicksort()} appropriately, and then formats the
+results as a single string:
+
+@example
+@c file eg/prog/indirectcall.awk
+# do_sort --- sort the data according to `compare'
+# and return it as a string
+
+function do_sort(first, last, compare, data, i, retval)
+@{
+ delete data
+ for (i = 1; first <= last; first++) @{
+ data[i] = $first
+ i++
+ @}
+
+ quicksort(data, 1, i-1, compare)
+
+ retval = data[1]
+ for (i = 2; i in data; i++)
+ retval = retval " " data[i]
+
+ return retval
+@}
+@c endfile
+@end example
+
+Finally, the two sorting functions call @code{do_sort()}, passing in the
+names of the two comparison functions:
+
+@example
+@c file eg/prog/indirectcall.awk
+# sort --- sort the data in ascending order and return it as a string
+
+function sort(first, last)
+@{
+ return do_sort(first, last, "num_lt")
+@}
+
+# rsort --- sort the data in descending order and return it as a string
+
+function rsort(first, last)
+@{
+ return do_sort(first, last, "num_ge")
+@}
+@c endfile
+@end example
+
+Here is an extended version of the @value{DF}:
+
+@example
+@c file eg/data/class_data2
+Biology_101 sum average sort rsort data: 87.0 92.4 78.5 94.9
+Chemistry_305 sum average sort rsort data: 75.2 98.3 94.7 88.2
+English_401 sum average sort rsort data: 100.0 95.6 87.1 93.4
+@c endfile
+@end example
+
+Finally, here are the results when the enhanced program is run:
+
+@example
+$ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2}
+@print{} Biology 101:
+@print{} sum: <352.8>
+@print{} average: <88.2>
+@print{} sort: <78.5 87.0 92.4 94.9>
+@print{} rsort: <94.9 92.4 87.0 78.5>
+@print{}
+@print{} Chemistry 305:
+@print{} sum: <356.4>
+@print{} average: <89.1>
+@print{} sort: <75.2 88.2 94.7 98.3>
+@print{} rsort: <98.3 94.7 88.2 75.2>
+@print{}
+@print{} English 401:
+@print{} sum: <376.1>
+@print{} average: <94.025>
+@print{} sort: <87.1 93.4 95.6 100.0>
+@print{} rsort: <100.0 95.6 93.4 87.1>
+@end example
+
+Another example where indirect functions calls are useful can be found in
+processing arrays. @DBREF{Walking Arrays} presented a simple function
+for ``walking'' an array of arrays. That function simply printed the
+name and value of each scalar array element. However, it is easy to
+generalize that function, by passing in the name of a function to call
+when walking an array. The modified function looks like this:
+
+@example
+@c file eg/lib/processarray.awk
+function process_array(arr, name, process, do_arrays, i, new_name)
+@{
+ for (i in arr) @{
+ new_name = (name "[" i "]")
+ if (isarray(arr[i])) @{
+ if (do_arrays)
+ @@process(new_name, arr[i])
+ process_array(arr[i], new_name, process, do_arrays)
+ @} else
+ @@process(new_name, arr[i])
+ @}
+@}
+@c endfile
+@end example
+
+The arguments are as follows:
+
+@table @code
+@item arr
+The array.
+
+@item name
+The name of the array (a string).
+
+@item process
+The name of the function to call.
+
+@item do_arrays
+If this is true, the function can handle elements that are subarrays.
+@end table
+
+If subarrays are to be processed, that is done before walking them further.
+
+When run with the following scaffolding, the function produces the same
+results as does the earlier @code{walk_array()} function:
+
+@example
+BEGIN @{
+ a[1] = 1
+ a[2][1] = 21
+ a[2][2] = 22
+ a[3] = 3
+ a[4][1][1] = 411
+ a[4][2] = 42
+
+ process_array(a, "a", "do_print", 0)
+@}
+
+function do_print(name, element)
+@{
+ printf "%s = %s\n", name, element
+@}
+@end example
+
+Remember that you must supply a leading @samp{@@} in front of an indirect function call.
+
+Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function
+calls may also be used with built-in functions and with extension functions
+(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular
+expression constant to a built-in function through an indirect function
+call.@footnote{This may change in a future version; recheck the documentation that
+comes with your version of @command{gawk} to see if it has.}
+
+@command{gawk} does its best to make indirect function calls efficient.
+For example, in the following case:
+
+@example
+for (i = 1; i <= n; i++)
+ @@the_func()
+@end example
+
+@noindent
+@code{gawk} looks up the actual function to call only once.
+
+@node Functions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+@command{awk} provides built-in functions and lets you define your own
+functions.
+
+@item
+POSIX @command{awk} provides three kinds of built-in functions: numeric,
+string, and I/O. @command{gawk} provides functions that sort arrays, work
+with values representing time, do bit manipulation, determine variable
+type (array versus scalar), and internationalize and localize programs.
+@command{gawk} also provides several extensions to some of standard
+functions, typically in the form of additional arguments.
+
+@item
+Functions accept zero or more arguments and return a value. The
+expressions that provide the argument values are completely evaluated
+before the function is called. Order of evaluation is not defined.
+The return value can be ignored.
+
+@item
+The handling of backslash in @code{sub()} and @code{gsub()} is not simple.
+It is more straightforward in @command{gawk}'s @code{gensub()} function,
+but that function still requires care in its use.
+
+@item
+User-defined functions provide important capabilities but come with
+some syntactic inelegancies. In a function call, there cannot be any
+space between the function name and the opening left parenthesis of the
+argument list. Also, there is no provision for local variables, so the
+convention is to add extra parameters, and to separate them visually
+from the real parameters by extra whitespace.
+
+@item
+User-defined functions may call other user-defined (and built-in)
+functions and may call themselves recursively. Function parameters
+``hide'' any global variables of the same names.
+You cannot use the name of a reserved variable (such as @code{ARGC})
+as the name of a parameter in user-defined functions.
+
+@item
+Scalar values are passed to user-defined functions by value. Array
+parameters are passed by reference; any changes made by the function to
+array parameters are thus visible after the function has returned.
+
+@item
+Use the @code{return} statement to return from a user-defined function.
+An optional expression becomes the function's return value. Only scalar
+values may be returned by a function.
+
+@item
+If a variable that has never been used is passed to a user-defined
+function, how that function treats the variable can set its nature:
+either scalar or array.
+
+@item
+@command{gawk} provides indirect function calls using a special syntax.
+By setting a variable to the name of a function, you can
+determine at runtime what function will be called at that point in the
+program. This is equivalent to function pointers in C and C++.
+
+@end itemize
+
+
+@ifnotinfo
+@part @value{PART2}Problem Solving with @command{awk}
+@end ifnotinfo
+
+@ifdocbook
+Part II shows how to use @command{awk} and @command{gawk} for problem solving.
+There is lots of code here for you to read and learn from.
+It contains the following chapters:
+
+@itemize @value{BULLET}
+@item
+@ref{Library Functions}
+
+@item
+@ref{Sample Programs}
+@end itemize
+@end ifdocbook
+
+@node Library Functions
+@chapter A Library of @command{awk} Functions
+@cindex libraries of @command{awk} functions
+@cindex functions, library
+@cindex functions, user-defined, library of
+
+@DBREF{User-defined} describes how to write
+your own @command{awk} functions. Writing functions is important, because
+it allows you to encapsulate algorithms and program tasks in a single
+place. It simplifies programming, making program development more
+manageable, and making programs more readable.
+
+@cindex Kernighan, Brian
+@cindex Plauger, P.J.@:
+In their seminal 1976 book, @cite{Software Tools},@footnote{Sadly, over 35
+years later, many of the lessons taught by this book have yet to be
+learned by a vast number of practicing programmers.} Brian Kernighan
+and P.J.@: Plauger wrote:
+
+@quotation
+Good Programming is not learned from generalities, but by seeing how
+significant programs can be made clean, easy to read, easy to maintain and
+modify, human-engineered, efficient and reliable, by the application of
+common sense and good programming practices. Careful study and imitation
+of good programs leads to better writing.
+@end quotation
+
+In fact, they felt this idea was so important that they placed this
+statement on the cover of their book. Because we believe strongly
+that their statement is correct, this @value{CHAPTER} and @ref{Sample
+Programs}, provide a good-sized body of code for you to read and, we hope,
+to learn from.
+
+This @value{CHAPTER} presents a library of useful @command{awk} functions.
+Many of the sample programs presented later in this @value{DOCUMENT}
+use these functions.
+The functions are presented here in a progression from simple to complex.
+
+@cindex Texinfo
+@DBREF{Extract Program}
+presents a program that you can use to extract the source code for
+these example library functions and programs from the Texinfo source
+for this @value{DOCUMENT}.
+(This has already been done as part of the @command{gawk} distribution.)
+
+@ifclear FOR_PRINT
+If you have written one or more useful, general-purpose @command{awk} functions
+and would like to contribute them to the @command{awk} user community, see
+@ref{How To Contribute}, for more information.
+@end ifclear
+
+@cindex portability, example programs
+The programs in this @value{CHAPTER} and in
+@ref{Sample Programs},
+freely use @command{gawk}-specific features.
+Rewriting these programs for different implementations of @command{awk}
+is pretty straightforward:
+
+@itemize @value{BULLET}
+@item
+Diagnostic error messages are sent to @file{/dev/stderr}.
+Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"} if your system
+does not have a @file{/dev/stderr}, or if you cannot use @command{gawk}.
+
+@item
+A number of programs use @code{nextfile}
+(@pxref{Nextfile Statement})
+to skip any remaining input in the input file.
+
+@item
+@c 12/2000: Thanks to Nelson Beebe for pointing out the output issue.
+@cindex case sensitivity, example programs
+@cindex @code{IGNORECASE} variable, in example programs
+Finally, some of the programs choose to ignore upper- and lowercase
+distinctions in their input. They do so by assigning one to @code{IGNORECASE}.
+You can achieve almost the same effect@footnote{The effects are
+not identical. Output of the transformed
+record will be in all lowercase, while @code{IGNORECASE} preserves the original
+contents of the input record.} by adding the following rule to the
+beginning of the program:
+
+@example
+# ignore case
+@{ $0 = tolower($0) @}
+@end example
+
+@noindent
+Also, verify that all regexp and string constants used in
+comparisons use only lowercase letters.
+@end itemize
+
+@menu
+* Library Names:: How to best name private global variables in
+ library functions.
+* General Functions:: Functions that are of general use.
+* Data File Management:: Functions for managing command-line data
+ files.
+* Getopt Function:: A function for processing command-line
+ arguments.
+* Passwd Functions:: Functions for getting user information.
+* Group Functions:: Functions for getting group information.
+* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
+@end menu
+
+@node Library Names
+@section Naming Library Function Global Variables
+
+@cindex names, arrays/variables
+@cindex names, functions
+@cindex namespace issues
+@cindex @command{awk} programs, documenting
+@cindex documentation, of @command{awk} programs
+Due to the way the @command{awk} language evolved, variables are either
+@dfn{global} (usable by the entire program) or @dfn{local} (usable just by
+a specific function). There is no intermediate state analogous to
+@code{static} variables in C.
+
+@cindex variables, global, for library functions
+@cindex private variables
+@cindex variables, private
+Library functions often need to have global variables that they can use to
+preserve state information between calls to the function---for example,
+@code{getopt()}'s variable @code{_opti}
+(@pxref{Getopt Function}).
+Such variables are called @dfn{private}, as the only functions that need to
+use them are the ones in the library.
+
+When writing a library function, you should try to choose names for your
+private variables that will not conflict with any variables used by
+either another library function or a user's main program. For example, a
+name like @code{i} or @code{j} is not a good choice, because user programs
+often use variable names like these for their own purposes.
+
+@cindex programming conventions, private variable names
+The example programs shown in this @value{CHAPTER} all start the names of their
+private variables with an underscore (@samp{_}). Users generally don't use
+leading underscores in their variable names, so this convention immediately
+decreases the chances that the variable name will be accidentally shared
+with the user's program.
+
+@cindex @code{_} (underscore), in names of private variables
+@cindex underscore (@code{_}), in names of private variables
+In addition, several of the library functions use a prefix that helps
+indicate what function or set of functions use the variables---for example,
+@code{_pw_byname()} in the user database routines
+(@pxref{Passwd Functions}).
+This convention is recommended, as it even further decreases the
+chance of inadvertent conflict among variable names. Note that this
+convention is used equally well for variable names and for private
+function names.@footnote{Although all the library routines could have
+been rewritten to use this convention, this was not done, in order to
+show how our own @command{awk} programming style has evolved and to
+provide some basis for this discussion.}
+
+As a final note on variable naming, if a function makes global variables
+available for use by a main program, it is a good convention to start that
+variable's name with a capital letter---for
+example, @code{getopt()}'s @code{Opterr} and @code{Optind} variables
+(@pxref{Getopt Function}).
+The leading capital letter indicates that it is global, while the fact that
+the variable name is not all capital letters indicates that the variable is
+not one of @command{awk}'s predefined variables, such as @code{FS}.
+
+@cindex @option{--dump-variables} option, using for library functions
+It is also important that @emph{all} variables in library
+functions that do not need to save state are, in fact, declared
+local.@footnote{@command{gawk}'s @option{--dump-variables} command-line
+option is useful for verifying this.} If this is not done, the variable
+could accidentally be used in the user's program, leading to bugs that
+are very difficult to track down:
+
+@example
+function lib_func(x, y, l1, l2)
+@{
+ @dots{}
+ # some_var should be local but by oversight is not
+ @var{use variable} some_var
+ @dots{}
+@}
+@end example
+
+@cindex arrays, associative, library functions and
+@cindex libraries of @command{awk} functions, associative arrays and
+@cindex functions, library, associative arrays and
+@cindex Tcl
+A different convention, common in the Tcl community, is to use a single
+associative array to hold the values needed by the library function(s), or
+``package.'' This significantly decreases the number of actual global names
+in use. For example, the functions described in
+@DBREF{Passwd Functions}
+might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
+@code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of
+@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
+and @code{@w{_pw_count}}.
+
+The conventions presented in this @value{SECTION} are exactly
+that: conventions. You are not required to write your programs this
+way---we merely recommend that you do so.
+
+@node General Functions
+@section General Programming
+
+This @value{SECTION} presents a number of functions that are of general
+programming use.
+
+@menu
+* Strtonum Function:: A replacement for the built-in
+ @code{strtonum()} function.
+* Assert Function:: A function for assertions in @command{awk}
+ programs.
+* Round Function:: A function for rounding if @code{sprintf()}
+ does not do it correctly.
+* Cliff Random Function:: The Cliff Random Number Generator.
+* Ordinal Functions:: Functions for using characters as numbers and
+ vice versa.
+* Join Function:: A function to join an array into a string.
+* Getlocaltime Function:: A function to get formatted times.
+* Readfile Function:: A function to read an entire file at once.
+* Shell Quoting:: A function to quote strings for the shell.
+@end menu
+
+@node Strtonum Function
+@subsection Converting Strings to Numbers
+
+The @code{strtonum()} function (@pxref{String Functions})
+is a @command{gawk} extension. The following function
+provides an implementation for other versions of @command{awk}:
+
+@example
+@c file eg/lib/strtonum.awk
+# mystrtonum --- convert string to number
+
+@c endfile
+@ignore
+@c file eg/lib/strtonum.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# February, 2004
+# Revised June, 2014
+
+@c endfile
+@end ignore
+@c file eg/lib/strtonum.awk
+function mystrtonum(str, ret, n, i, k, c)
+@{
+ if (str ~ /^0[0-7]*$/) @{
+ # octal
+ n = length(str)
+ ret = 0
+ for (i = 1; i <= n; i++) @{
+ c = substr(str, i, 1)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
+
+ ret = ret * 8 + k
+ @}
+ @} else if (str ~ /^0[xX][[:xdigit:]]+$/) @{
+ # hexadecimal
+ str = substr(str, 3) # lop off leading 0x
+ n = length(str)
+ ret = 0
+ for (i = 1; i <= n; i++) @{
+ c = substr(str, i, 1)
+ c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("123456789abcdef", c)
+
+ ret = ret * 16 + k
+ @}
+ @} else if (str ~ \
+ /^[-+]?([0-9]+([.][0-9]*([Ee][0-9]+)?)?|([.][0-9]+([Ee][-+]?[0-9]+)?))$/) @{
+ # decimal number, possibly floating point
+ ret = str + 0
+ @} else
+ ret = "NOT-A-NUMBER"
+
+ return ret
+@}
+
+# BEGIN @{ # gawk test harness
+# a[1] = "25"
+# a[2] = ".31"
+# a[3] = "0123"
+# a[4] = "0xdeadBEEF"
+# a[5] = "123.45"
+# a[6] = "1.e3"
+# a[7] = "1.32"
+# a[8] = "1.32E2"
+#
+# for (i = 1; i in a; i++)
+# print a[i], strtonum(a[i]), mystrtonum(a[i])
+# @}
+@c endfile
+@end example
+
+The function first looks for C-style octal numbers (base 8).
+If the input string matches a regular expression describing octal
+numbers, then @code{mystrtonum()} loops through each character in the
+string. It sets @code{k} to the index in @code{"1234567"} of the current
+octal digit.
+The return value will either be the same number as the digit, or zero
+if the character is not there, which will be true for a @samp{0}.
+This is safe, because the regexp test in the @code{if} ensures that
+only octal values are converted.
+
+Similar logic applies to the code that checks for and converts a
+hexadecimal value, which starts with @samp{0x} or @samp{0X}.
+The use of @code{tolower()} simplifies the computation for finding
+the correct numeric value for each hexadecimal digit.
+
+Finally, if the string matches the (rather complicated) regexp for a
+regular decimal integer or floating-point number, the computation
+@samp{ret = str + 0} lets @command{awk} convert the value to a
+number.
+
+A commented-out test program is included, so that the function can
+be tested with @command{gawk} and the results compared to the built-in
+@code{strtonum()} function.
+
+@node Assert Function
+@subsection Assertions
+
+@cindex assertions
+@cindex @code{assert()} function (C library)
+@cindex libraries of @command{awk} functions, assertions
+@cindex functions, library, assertions
+@cindex @command{awk} programs, lengthy, assertions
+When writing large programs, it is often useful to know
+that a condition or set of conditions is true. Before proceeding with a
+particular computation, you make a statement about what you believe to be
+the case. Such a statement is known as an
+@dfn{assertion}. The C language provides an @code{<assert.h>} header file
+and corresponding @code{assert()} macro that a programmer can use to make
+assertions. If an assertion fails, the @code{assert()} macro arranges to
+print a diagnostic message describing the condition that should have
+been true but was not, and then it kills the program. In C, using
+@code{assert()} looks this:
+
+@example
+#include <assert.h>
+
+int myfunc(int a, double b)
+@{
+ assert(a <= 5 && b >= 17.1);
+ @dots{}
+@}
+@end example
+
+If the assertion fails, the program prints a message similar to this:
+
+@example
+prog.c:5: assertion failed: a <= 5 && b >= 17.1
+@end example
+
+@cindex @code{assert()} user-defined function
+The C language makes it possible to turn the condition into a string for use
+in printing the diagnostic message. This is not possible in @command{awk}, so
+this @code{assert()} function also requires a string version of the condition
+that is being tested.
+Following is the function:
+
+@example
+@c file eg/lib/assert.awk
+# assert --- assert that a condition is true. Otherwise exit.
+
+@c endfile
+@ignore
+@c file eg/lib/assert.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May, 1993
+
+@c endfile
+@end ignore
+@c file eg/lib/assert.awk
+function assert(condition, string)
+@{
+ if (! condition) @{
+ printf("%s:%d: assertion failed: %s\n",
+ FILENAME, FNR, string) > "/dev/stderr"
+ _assert_exit = 1
+ exit 1
+ @}
+@}
+
+@group
+END @{
+ if (_assert_exit)
+ exit 1
+@}
+@end group
+@c endfile
+@end example
+
+The @code{assert()} function tests the @code{condition} parameter. If it
+is false, it prints a message to standard error, using the @code{string}
+parameter to describe the failed condition. It then sets the variable
+@code{_assert_exit} to one and executes the @code{exit} statement.
+The @code{exit} statement jumps to the @code{END} rule. If the @code{END}
+rules finds @code{_assert_exit} to be true, it exits immediately.
+
+The purpose of the test in the @code{END} rule is to
+keep any other @code{END} rules from running. When an assertion fails, the
+program should exit immediately.
+If no assertions fail, then @code{_assert_exit} is still
+false when the @code{END} rule is run normally, and the rest of the
+program's @code{END} rules execute.
+For all of this to work correctly, @file{assert.awk} must be the
+first source file read by @command{awk}.
+The function can be used in a program in the following way:
+
+@example
+function myfunc(a, b)
+@{
+ assert(a <= 5 && b >= 17.1, "a <= 5 && b >= 17.1")
+ @dots{}
+@}
+@end example
+
+@noindent
+If the assertion fails, you see a message similar to the following:
+
+@example
+mydata:1357: assertion failed: a <= 5 && b >= 17.1
+@end example
+
+@cindex @code{END} pattern, @code{assert()} user-defined function and
+There is a small problem with this version of @code{assert()}.
+An @code{END} rule is automatically added
+to the program calling @code{assert()}. Normally, if a program consists
+of just a @code{BEGIN} rule, the input files and/or standard input are
+not read. However, now that the program has an @code{END} rule, @command{awk}
+attempts to read the input @value{DF}s or standard input
+(@pxref{Using BEGIN/END}),
+most likely causing the program to hang as it waits for input.
+
+@cindex @code{BEGIN} pattern, @code{assert()} user-defined function and
+There is a simple workaround to this:
+make sure that such a @code{BEGIN} rule always ends
+with an @code{exit} statement.
+
+@node Round Function
+@subsection Rounding Numbers
+
+@cindex rounding numbers
+@cindex numbers, rounding
+@cindex libraries of @command{awk} functions, rounding numbers
+@cindex functions, library, rounding numbers
+@cindex @code{print} statement, @code{sprintf()} function and
+@cindex @code{printf} statement, @code{sprintf()} function and
+@cindex @code{sprintf()} function, @code{print}/@code{printf} statements and
+The way @code{printf} and @code{sprintf()}
+(@pxref{Printf})
+perform rounding often depends upon the system's C @code{sprintf()}
+subroutine. On many machines, @code{sprintf()} rounding is @dfn{unbiased},
+which means it doesn't always round a trailing .5 up, contrary
+to naive expectations. In unbiased rounding, .5 rounds to even,
+rather than always up, so 1.5 rounds to 2 but 4.5 rounds to 4. This means
+that if you are using a format that does rounding (e.g., @code{"%.0f"}),
+you should check what your system does. The following function does
+traditional rounding; it might be useful if your @command{awk}'s @code{printf}
+does unbiased rounding:
+
+@cindex @code{round()} user-defined function
+@example
+@c file eg/lib/round.awk
+# round.awk --- do normal rounding
+@c endfile
+@ignore
+@c file eg/lib/round.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# August, 1996
+@c endfile
+@end ignore
+@c file eg/lib/round.awk
+
+function round(x, ival, aval, fraction)
+@{
+ ival = int(x) # integer part, int() truncates
+
+ # see if fractional part
+ if (ival == x) # no fraction
+ return ival # ensure no decimals
+
+ if (x < 0) @{
+ aval = -x # absolute value
+ ival = int(aval)
+ fraction = aval - ival
+ if (fraction >= .5)
+ return int(x) - 1 # -2.5 --> -3
+ else
+ return int(x) # -2.3 --> -2
+ @} else @{
+ fraction = x - ival
+ if (fraction >= .5)
+ return ival + 1
+ else
+ return ival
+ @}
+@}
+@c endfile
+@c don't include test harness in the file that gets installed
+
+# test harness
+# @{ print $0, round($0) @}
+@end example
+
+@node Cliff Random Function
+@subsection The Cliff Random Number Generator
+@cindex random numbers, Cliff
+@cindex Cliff random numbers
+@cindex numbers, Cliff random
+@cindex functions, library, Cliff random numbers
+
+The
+@uref{http://mathworld.wolfram.com/CliffRandomNumberGenerator.html, Cliff random number generator}
+is a very simple random number generator that ``passes the noise sphere test
+for randomness by showing no structure.''
+It is easily programmed, in less than 10 lines of @command{awk} code:
+
+@cindex @code{cliff_rand()} user-defined function
+@example
+@c file eg/lib/cliff_rand.awk
+# cliff_rand.awk --- generate Cliff random numbers
+@c endfile
+@ignore
+@c file eg/lib/cliff_rand.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# December 2000
+@c endfile
+@end ignore
+@c file eg/lib/cliff_rand.awk
+
+BEGIN @{ _cliff_seed = 0.1 @}
+
+function cliff_rand()
+@{
+ _cliff_seed = (100 * log(_cliff_seed)) % 1
+ if (_cliff_seed < 0)
+ _cliff_seed = - _cliff_seed
+ return _cliff_seed
+@}
+@c endfile
+@end example
+
+This algorithm requires an initial ``seed'' of 0.1. Each new value
+uses the current seed as input for the calculation.
+If the built-in @code{rand()} function
+(@pxref{Numeric Functions})
+isn't random enough, you might try using this function instead.
+
+@node Ordinal Functions
+@subsection Translating Between Characters and Numbers
+
+@cindex libraries of @command{awk} functions, character values as numbers
+@cindex functions, library, character values as numbers
+@cindex characters, values of as numbers
+@cindex numbers, as values of characters
+One commercial implementation of @command{awk} supplies a built-in function,
+@code{ord()}, which takes a character and returns the numeric value for that
+character in the machine's character set. If the string passed to
+@code{ord()} has more than one character, only the first one is used.
+
+The inverse of this function is @code{chr()} (from the function of the same
+name in Pascal), which takes a number and returns the corresponding character.
+Both functions are written very nicely in @command{awk}; there is no real
+reason to build them into the @command{awk} interpreter:
+
+@cindex @code{ord()} user-defined function
+@cindex @code{chr()} user-defined function
+@cindex @code{_ord_init()} user-defined function
+@example
+@c file eg/lib/ord.awk
+# ord.awk --- do ord and chr
+
+# Global identifiers:
+# _ord_: numerical values indexed by characters
+# _ord_init: function to initialize _ord_
+@c endfile
+@ignore
+@c file eg/lib/ord.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# 16 January, 1992
+# 20 July, 1992, revised
+@c endfile
+@end ignore
+@c file eg/lib/ord.awk
+
+BEGIN @{ _ord_init() @}
+
+function _ord_init( low, high, i, t)
+@{
+ low = sprintf("%c", 7) # BEL is ascii 7
+ if (low == "\a") @{ # regular ascii
+ low = 0
+ high = 127
+ @} else if (sprintf("%c", 128 + 7) == "\a") @{
+ # ascii, mark parity
+ low = 128
+ high = 255
+ @} else @{ # ebcdic(!)
+ low = 0
+ high = 255
+ @}
+
+ for (i = low; i <= high; i++) @{
+ t = sprintf("%c", i)
+ _ord_[t] = i
+ @}
+@}
+@c endfile
+@end example
+
+@cindex character sets (machine character encodings)
+@cindex ASCII
+@cindex EBCDIC
+@cindex Unicode
+@cindex mark parity
+Some explanation of the numbers used by @code{_ord_init()} is worthwhile.
+The most prominent character set in use today is ASCII.@footnote{This
+is changing; many systems use Unicode, a very large character set
+that includes ASCII as a subset. On systems with full Unicode support,
+a character can occupy up to 32 bits, making simple tests such as
+used here prohibitively expensive.}
+Although an
+8-bit byte can hold 256 distinct values (from 0 to 255), ASCII only
+defines characters that use the values from 0 to 127.@footnote{ASCII
+has been extended in many countries to use the values from 128 to 255
+for country-specific characters. If your system uses these extensions,
+you can simplify @code{_ord_init()} to loop from 0 to 255.}
+In the now distant past,
+at least one minicomputer manufacturer
+@c Pr1me, blech
+used ASCII, but with mark parity, meaning that the leftmost bit in the byte
+is always 1. This means that on those systems, characters
+have numeric values from 128 to 255.
+Finally, large mainframe systems use the EBCDIC character set, which
+uses all 256 values.
+There are other character sets in use on some older systems, but
+they are not really worth worrying about:
+
+@example
+@c file eg/lib/ord.awk
+function ord(str, c)
+@{
+ # only first character is of interest
+ c = substr(str, 1, 1)
+ return _ord_[c]
+@}
+
+function chr(c)
+@{
+ # force c to be numeric by adding 0
+ return sprintf("%c", c + 0)
+@}
+@c endfile
+
+#### test code ####
+# BEGIN @{
+# for (;;) @{
+# printf("enter a character: ")
+# if (getline var <= 0)
+# break
+# printf("ord(%s) = %d\n", var, ord(var))
+# @}
+# @}
+@c endfile
+@end example
+
+An obvious improvement to these functions is to move the code for the
+@code{@w{_ord_init}} function into the body of the @code{BEGIN} rule. It was
+written this way initially for ease of development.
+There is a ``test program'' in a @code{BEGIN} rule, to test the
+function. It is commented out for production use.
+
+@node Join Function
+@subsection Merging an Array into a String
+
+@cindex libraries of @command{awk} functions, merging arrays into strings
+@cindex functions, library, merging arrays into strings
+@cindex strings, merging arrays into
+@cindex arrays, merging into strings
+When doing string processing, it is often useful to be able to join
+all the strings in an array into one long string. The following function,
+@code{join()}, accomplishes this task. It is used later in several of
+the application programs
+(@pxref{Sample Programs}).
+
+Good function design is important; this function needs to be general but it
+should also have a reasonable default behavior. It is called with an array
+as well as the beginning and ending indices of the elements in the array to be
+merged. This assumes that the array indices are numeric---a reasonable
+assumption, as the array was likely created with @code{split()}
+(@pxref{String Functions}):
+
+@cindex @code{join()} user-defined function
+@example
+@c file eg/lib/join.awk
+# join.awk --- join an array into a string
+@c endfile
+@ignore
+@c file eg/lib/join.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+@c endfile
+@end ignore
+@c file eg/lib/join.awk
+
+function join(array, start, end, sep, result, i)
+@{
+ if (sep == "")
+ sep = " "
+ else if (sep == SUBSEP) # magic value
+ sep = ""
+ result = array[start]
+ for (i = start + 1; i <= end; i++)
+ result = result sep array[i]
+ return result
+@}
+@c endfile
+@end example
+
+An optional additional argument is the separator to use when joining the
+strings back together. If the caller supplies a nonempty value,
+@code{join()} uses it; if it is not supplied, it has a null
+value. In this case, @code{join()} uses a single space as a default
+separator for the strings. If the value is equal to @code{SUBSEP},
+then @code{join()} joins the strings with no separator between them.
+@code{SUBSEP} serves as a ``magic'' value to indicate that there should
+be no separation between the component strings.@footnote{It would
+be nice if @command{awk} had an assignment operator for concatenation.
+The lack of an explicit operator for concatenation makes string operations
+more difficult than they really need to be.}
+
+@node Getlocaltime Function
+@subsection Managing the Time of Day
+
+@cindex libraries of @command{awk} functions, managing, time
+@cindex functions, library, managing time
+@cindex timestamps, formatted
+@cindex time, managing
+The @code{systime()} and @code{strftime()} functions described in
+@DBREF{Time Functions}
+provide the minimum functionality necessary for dealing with the time of day
+in human-readable form. Although @code{strftime()} is extensive, the control
+formats are not necessarily easy to remember or intuitively obvious when
+reading a program.
+
+The following function, @code{getlocaltime()}, populates a user-supplied array
+with preformatted time information. It returns a string with the current
+time formatted in the same way as the @command{date} utility:
+
+@cindex @code{getlocaltime()} user-defined function
+@example
+@c file eg/lib/gettime.awk
+# getlocaltime.awk --- get the time of day in a usable format
+@c endfile
+@ignore
+@c file eg/lib/gettime.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain, May 1993
+#
+@c endfile
+@end ignore
+@c file eg/lib/gettime.awk
+
+# Returns a string in the format of output of date(1)
+# Populates the array argument time with individual values:
+# time["second"] -- seconds (0 - 59)
+# time["minute"] -- minutes (0 - 59)
+# time["hour"] -- hours (0 - 23)
+# time["althour"] -- hours (0 - 12)
+# time["monthday"] -- day of month (1 - 31)
+# time["month"] -- month of year (1 - 12)
+# time["monthname"] -- name of the month
+# time["shortmonth"] -- short name of the month
+# time["year"] -- year modulo 100 (0 - 99)
+# time["fullyear"] -- full year
+# time["weekday"] -- day of week (Sunday = 0)
+# time["altweekday"] -- day of week (Monday = 0)
+# time["dayname"] -- name of weekday
+# time["shortdayname"] -- short name of weekday
+# time["yearday"] -- day of year (0 - 365)
+# time["timezone"] -- abbreviation of timezone name
+# time["ampm"] -- AM or PM designation
+# time["weeknum"] -- week number, Sunday first day
+# time["altweeknum"] -- week number, Monday first day
+
+function getlocaltime(time, ret, now, i)
+@{
+ # get time once, avoids unnecessary system calls
+ now = systime()
+
+ # return date(1)-style output
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+
+ # clear out target array
+ delete time
+
+ # fill in values, force numeric values to be
+ # numeric by adding 0
+ time["second"] = strftime("%S", now) + 0
+ time["minute"] = strftime("%M", now) + 0
+ time["hour"] = strftime("%H", now) + 0
+ time["althour"] = strftime("%I", now) + 0
+ time["monthday"] = strftime("%d", now) + 0
+ time["month"] = strftime("%m", now) + 0
+ time["monthname"] = strftime("%B", now)
+ time["shortmonth"] = strftime("%b", now)
+ time["year"] = strftime("%y", now) + 0
+ time["fullyear"] = strftime("%Y", now) + 0
+ time["weekday"] = strftime("%w", now) + 0
+ time["altweekday"] = strftime("%u", now) + 0
+ time["dayname"] = strftime("%A", now)
+ time["shortdayname"] = strftime("%a", now)
+ time["yearday"] = strftime("%j", now) + 0
+ time["timezone"] = strftime("%Z", now)
+ time["ampm"] = strftime("%p", now)
+ time["weeknum"] = strftime("%U", now) + 0
+ time["altweeknum"] = strftime("%W", now) + 0
+
+ return ret
+@}
+@c endfile
+@end example
+
+The string indices are easier to use and read than the various formats
+required by @code{strftime()}. The @code{alarm} program presented in
+@DBREF{Alarm Program}
+uses this function.
+A more general design for the @code{getlocaltime()} function would have
+allowed the user to supply an optional timestamp value to use instead
+of the current time.
+
+@node Readfile Function
+@subsection Reading a Whole File At Once
+
+Often, it is convenient to have the entire contents of a file available
+in memory as a single string. A straightforward but naive way to
+do that might be as follows:
+
+@example
+function readfile(file, tmp, contents)
+@{
+ if ((getline tmp < file) < 0)
+ return
+
+ contents = tmp
+ while (getline tmp < file) > 0)
+ contents = contents RT tmp
+
+ close(file)
+ return contents
+@}
+@end example
+
+This function reads from @code{file} one record at a time, building
+up the full contents of the file in the local variable @code{contents}.
+It works, but is not necessarily efficient.
+
+The following function, based on a suggestion by Denis Shirokov,
+reads the entire contents of the named file in one shot:
+
+@cindex @code{readfile()} user-defined function
+@example
+@c file eg/lib/readfile.awk
+# readfile.awk --- read an entire file at once
+@c endfile
+@ignore
+@c file eg/lib/readfile.awk
+#
+# Original idea by Denis Shirokov, cosmogen@@gmail.com, April 2013
+#
+@c endfile
+@end ignore
+@c file eg/lib/readfile.awk
+
+function readfile(file, tmp, save_rs)
+@{
+ save_rs = RS
+ RS = "^$"
+ getline tmp < file
+ close(file)
+ RS = save_rs
+
+ return tmp
+@}
+@c endfile
+@end example
+
+It works by setting @code{RS} to @samp{^$}, a regular expression that
+will never match if the file has contents. @command{gawk} reads data from
+the file into @code{tmp} attempting to match @code{RS}. The match fails
+after each read, but fails quickly, such that @command{gawk} fills
+@code{tmp} with the entire contents of the file.
+(@DBXREF{Records} for information on @code{RT} and @code{RS}.)
+
+In the case that @code{file} is empty, the return value is the null
+string. Thus calling code may use something like:
+
+@example
+contents = readfile("/some/path")
+if (length(contents) == 0)
+ # file was empty @dots{}
+@end example
+
+This tests the result to see if it is empty or not. An equivalent
+test would be @samp{contents == ""}.
+
+@xref{Extension Sample Readfile}, for an extension function that
+also reads an entire file into memory.
+
+@node Shell Quoting
+@subsection Quoting Strings to Pass to the Shell
+
+@c included by permission
+@ignore
+Date: Sun, 27 Jul 2014 17:16:16 -0700
+Message-ID: <CAKuGj+iCF_obaCLDUX60aSAgbfocFVtguG39GyeoNxTFby5sqQ@mail.gmail.com>
+Subject: Useful awk function
+From: Mike Brennan <mike@madronabluff.com>
+To: Arnold Robbins <arnold@skeeve.com>
+@end ignore
+
+Michael Brennan offers the following programming pattern,
+which he uses frequently:
+
+@example
+#! /bin/sh
+
+awkp='
+ @dots{}
+ '
+
+@var{input_program} | awk "$awkp" | /bin/sh
+@end example
+
+For example, a program of his named @command{flac-edit} has this form:
+
+@example
+$ @kbd{flac-edit -song="Whoope! That's Great" file.flac}
+@end example
+
+It generates the following output, which is to be piped to
+the shell (@file{/bin/sh}):
+
+@example
+chmod +w file.flac
+metaflac --remove-tag=TITLE file.flac
+LANG=en_US.88591 metaflac --set-tag=TITLE='Whoope! That'"'"'s Great' file.flac
+chmod -w file.flac
+@end example
+
+Note the need for shell quoting. The function @code{shell_quote()}
+does it. @code{SINGLE} is the one-character string @code{"'"} and
+@code{QSINGLE} is the three-character string @code{"\"'\""}:
+
+@example
+@c file eg/lib/shellquote.awk
+# shell_quote --- quote an argument for passing to the shell
+@c endfile
+@ignore
+@c file eg/lib/shellquote.awk
+#
+# Michael Brennan
+# brennan@@madronabluff.com
+# September 2014
+@c endfile
+@end ignore
+@c file eg/lib/shellquote.awk
+
+function shell_quote(s, # parameter
+ SINGLE, QSINGLE, i, X, n, ret) # locals
+@{
+ if (s == "")
+ return "\"\""
+
+ SINGLE = "\x27" # single quote
+ QSINGLE = "\"\x27\""
+ n = split(s, X, SINGLE)
+
+ ret = SINGLE X[1] SINGLE
+ for (i = 2; i <= n; i++)
+ ret = ret QSINGLE SINGLE X[i] SINGLE
+
+ return ret
+@}
+@c endfile
+@end example
+
+@node Data File Management
+@section @value{DDF} Management
+
+@cindex files, managing
+@cindex libraries of @command{awk} functions, managing, data files
+@cindex functions, library, managing data files
+This @value{SECTION} presents functions that are useful for managing
+command-line @value{DF}s.
+
+@menu
+* Filetrans Function:: A function for handling data file transitions.
+* Rewind Function:: A function for rereading the current file.
+* File Checking:: Checking that data files are readable.
+* Empty Files:: Checking for zero-length files.
+* Ignoring Assigns:: Treating assignments as file names.
+@end menu
+
+@node Filetrans Function
+@subsection Noting @value{DDF} Boundaries
+
+@cindex files, managing, data file boundaries
+@cindex files, initialization and cleanup
+The @code{BEGIN} and @code{END} rules are each executed exactly once, at
+the beginning and end of your @command{awk} program, respectively
+(@pxref{BEGIN/END}).
+We (the @command{gawk} authors) once had a user who mistakenly thought that the
+@code{BEGIN} rule is executed at the beginning of each @value{DF} and the
+@code{END} rule is executed at the end of each @value{DF}.
+
+When informed
+that this was not the case, the user requested that we add new special
+patterns to @command{gawk}, named @code{BEGIN_FILE} and @code{END_FILE}, that
+would have the desired behavior. He even supplied us the code to do so.
+
+Adding these special patterns to @command{gawk} wasn't necessary;
+the job can be done cleanly in @command{awk} itself, as illustrated
+by the following library program.
+It arranges to call two user-supplied functions, @code{beginfile()} and
+@code{endfile()}, at the beginning and end of each @value{DF}.
+Besides solving the problem in only nine(!) lines of code, it does so
+@emph{portably}; this works with any implementation of @command{awk}:
+
+@example
+# transfile.awk
+#
+# Give the user a hook for filename transitions
+#
+# The user must supply functions beginfile() and endfile()
+# that each take the name of the file being started or
+# finished, respectively.
+@c #
+@c # Arnold Robbins, arnold@@skeeve.com, Public Domain
+@c # January 1992
+
+FILENAME != _oldfilename @{
+ if (_oldfilename != "")
+ endfile(_oldfilename)
+ _oldfilename = FILENAME
+ beginfile(FILENAME)
+@}
+
+END @{ endfile(FILENAME) @}
+@end example
+
+This file must be loaded before the user's ``main'' program, so that the
+rule it supplies is executed first.
+
+This rule relies on @command{awk}'s @code{FILENAME} variable that
+automatically changes for each new @value{DF}. The current @value{FN} is
+saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does
+not equal @code{_oldfilename}, then a new @value{DF} is being processed and
+it is necessary to call @code{endfile()} for the old file. Because
+@code{endfile()} should only be called if a file has been processed, the
+program first checks to make sure that @code{_oldfilename} is not the null
+string. The program then assigns the current @value{FN} to
+@code{_oldfilename} and calls @code{beginfile()} for the file.
+Because, like all @command{awk} variables, @code{_oldfilename} is
+initialized to the null string, this rule executes correctly even for the
+first @value{DF}.
+
+The program also supplies an @code{END} rule to do the final processing for
+the last file. Because this @code{END} rule comes before any @code{END} rules
+supplied in the ``main'' program, @code{endfile()} is called first. Once
+again the value of multiple @code{BEGIN} and @code{END} rules should be clear.
+
+@cindex @code{beginfile()} user-defined function
+@cindex @code{endfile()} user-defined function
+If the same @value{DF} occurs twice in a row on the command line, then
+@code{endfile()} and @code{beginfile()} are not executed at the end of the
+first pass and at the beginning of the second pass.
+The following version solves the problem:
+
+@example
+@c file eg/lib/ftrans.awk
+# ftrans.awk --- handle datafile transitions
+#
+# user supplies beginfile() and endfile() functions
+@c endfile
+@ignore
+@c file eg/lib/ftrans.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# November 1992
+@c endfile
+@end ignore
+@c file eg/lib/ftrans.awk
+
+FNR == 1 @{
+ if (_filename_ != "")
+ endfile(_filename_)
+ _filename_ = FILENAME
+ beginfile(FILENAME)
+@}
+
+END @{ endfile(_filename_) @}
+@c endfile
+@end example
+
+@DBREF{Wc Program}
+shows how this library function can be used and
+how it simplifies writing the main program.
+
+@sidebar So Why Does @command{gawk} Have @code{BEGINFILE} and @code{ENDFILE}?
+
+You are probably wondering, if @code{beginfile()} and @code{endfile()}
+functions can do the job, why does @command{gawk} have
+@code{BEGINFILE} and @code{ENDFILE} patterns (@pxref{BEGINFILE/ENDFILE})?
+
+Good question. Normally, if @command{awk} cannot open a file, this
+causes an immediate fatal error. In this case, there is no way for a
+user-defined function to deal with the problem, as the mechanism for
+calling it relies on the file being open and at the first record. Thus,
+the main reason for @code{BEGINFILE} is to give you a ``hook'' to catch
+files that cannot be processed. @code{ENDFILE} exists for symmetry,
+and because it provides an easy way to do per-file cleanup processing.
+@end sidebar
+
+@node Rewind Function
+@subsection Rereading the Current File
+
+@cindex files, reading
+Another request for a new built-in function was for a @code{rewind()}
+function that would make it possible to reread the current file.
+The requesting user didn't want to have to use @code{getline}
+(@pxref{Getline})
+inside a loop.
+
+However, as long as you are not in the @code{END} rule, it is
+quite easy to arrange to immediately close the current input file
+and then start over with it from the top.
+For lack of a better name, we'll call it @code{rewind()}:
+
+@cindex @code{rewind()} user-defined function
+@example
+@c file eg/lib/rewind.awk
+# rewind.awk --- rewind the current file and start over
+@c endfile
+@ignore
+@c file eg/lib/rewind.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# September 2000
+@c endfile
+@end ignore
+@c file eg/lib/rewind.awk
+
+function rewind( i)
+@{
+ # shift remaining arguments up
+ for (i = ARGC; i > ARGIND; i--)
+ ARGV[i] = ARGV[i-1]
+
+ # make sure gawk knows to keep going
+ ARGC++
+
+ # make current file next to get done
+ ARGV[ARGIND+1] = FILENAME
+
+ # do it
+ nextfile
+@}
+@c endfile
+@end example
+
+The @code{rewind()} function relies on the @code{ARGIND} variable
+(@pxref{Auto-set}), which is specific to @command{gawk}. It also
+relies on the @code{nextfile} keyword (@pxref{Nextfile Statement}).
+Because of this, you should not call it from an @code{ENDFILE} rule.
+(This isn't necessary anyway, because @command{gawk} goes to the next
+file as soon as an @code{ENDFILE} rule finishes!)
+
+@node File Checking
+@subsection Checking for Readable @value{DDF}s
+
+@cindex troubleshooting, readable data files
+@cindex readable data files@comma{} checking
+@cindex files, skipping
+Normally, if you give @command{awk} a @value{DF} that isn't readable,
+it stops with a fatal error. There are times when you might want to
+just ignore such files and keep going.@footnote{The @code{BEGINFILE}
+special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative
+mechanism for dealing with files that can't be opened. However, the
+code here provides a portable solution.} You can do this by prepending
+the following program to your @command{awk} program:
+
+@cindex @code{readable.awk} program
+@example
+@c file eg/lib/readable.awk
+# readable.awk --- library file to skip over unreadable files
+@c endfile
+@ignore
+@c file eg/lib/readable.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# October 2000
+# December 2010
+@c endfile
+@end ignore
+@c file eg/lib/readable.awk
+
+BEGIN @{
+ for (i = 1; i < ARGC; i++) @{
+ if (ARGV[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/ \
+ || ARGV[i] == "-" || ARGV[i] == "/dev/stdin")
+ continue # assignment or standard input
+ else if ((getline junk < ARGV[i]) < 0) # unreadable
+ delete ARGV[i]
+ else
+ close(ARGV[i])
+ @}
+@}
+@c endfile
+@end example
+
+@cindex troubleshooting, @code{getline} function
+This works, because the @code{getline} won't be fatal.
+Removing the element from @code{ARGV} with @code{delete}
+skips the file (because it's no longer in the list).
+See also @ref{ARGC and ARGV}.
+
+Because @command{awk} variable names only allow the English letters,
+the regular expression check purposely does not use character classes
+such as @samp{[:alpha:]} and @samp{[:alnum:]}
+(@pxref{Bracket Expressions})
+
+@node Empty Files
+@subsection Checking for Zero-length Files
+
+All known @command{awk} implementations silently skip over zero-length files.
+This is a by-product of @command{awk}'s implicit
+read-a-record-and-match-against-the-rules loop: when @command{awk}
+tries to read a record from an empty file, it immediately receives an
+end of file indication, closes the file, and proceeds on to the next
+command-line @value{DF}, @emph{without} executing any user-level
+@command{awk} program code.
+
+Using @command{gawk}'s @code{ARGIND} variable
+(@pxref{Built-in Variables}), it is possible to detect when an empty
+@value{DF} has been skipped. Similar to the library file presented
+in @ref{Filetrans Function}, the following library file calls a function named
+@code{zerofile()} that the user must provide. The arguments passed are
+the @value{FN} and the position in @code{ARGV} where it was found:
+
+@cindex @code{zerofile.awk} program
+@example
+@c file eg/lib/zerofile.awk
+# zerofile.awk --- library file to process empty input files
+@c endfile
+@ignore
+@c file eg/lib/zerofile.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# June 2003
+@c endfile
+@end ignore
+@c file eg/lib/zerofile.awk
+
+BEGIN @{ Argind = 0 @}
+
+ARGIND > Argind + 1 @{
+ for (Argind++; Argind < ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+
+ARGIND != Argind @{ Argind = ARGIND @}
+
+END @{
+ if (ARGIND > Argind)
+ for (Argind++; Argind <= ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+@c endfile
+@end example
+
+The user-level variable @code{Argind} allows the @command{awk} program
+to track its progress through @code{ARGV}. Whenever the program detects
+that @code{ARGIND} is greater than @samp{Argind + 1}, it means that one or
+more empty files were skipped. The action then calls @code{zerofile()} for
+each such file, incrementing @code{Argind} along the way.
+
+The @samp{Argind != ARGIND} rule simply keeps @code{Argind} up to date
+in the normal case.
+
+Finally, the @code{END} rule catches the case of any empty files at
+the end of the command-line arguments. Note that the test in the
+condition of the @code{for} loop uses the @samp{<=} operator,
+not @samp{<}.
+
+@node Ignoring Assigns
+@subsection Treating Assignments as @value{FFN}s
+
+@cindex assignments as filenames
+@cindex filenames, assignments as
+Occasionally, you might not want @command{awk} to process command-line
+variable assignments
+(@pxref{Assignment Options}).
+In particular, if you have a @value{FN} that contains an @samp{=} character,
+@command{awk} treats the @value{FN} as an assignment, and does not process it.
+
+Some users have suggested an additional command-line option for @command{gawk}
+to disable command-line assignments. However, some simple programming with
+a library file does the trick:
+
+@cindex @code{noassign.awk} program
+@example
+@c file eg/lib/noassign.awk
+# noassign.awk --- library file to avoid the need for a
+# special option that disables command-line assignments
+@c endfile
+@ignore
+@c file eg/lib/noassign.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# October 1999
+@c endfile
+@end ignore
+@c file eg/lib/noassign.awk
+
+function disable_assigns(argc, argv, i)
+@{
+ for (i = 1; i < argc; i++)
+ if (argv[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/)
+ argv[i] = ("./" argv[i])
+@}
+
+BEGIN @{
+ if (No_command_assign)
+ disable_assigns(ARGC, ARGV)
+@}
+@c endfile
+@end example
+
+You then run your program this way:
+
+@example
+awk -v No_command_assign=1 -f noassign.awk -f yourprog.awk *
+@end example
+
+The function works by looping through the arguments.
+It prepends @samp{./} to
+any argument that matches the form
+of a variable assignment, turning that argument into a @value{FN}.
+
+The use of @code{No_command_assign} allows you to disable command-line
+assignments at invocation time, by giving the variable a true value.
+When not set, it is initially zero (i.e., false), so the command-line arguments
+are left alone.
+
+@node Getopt Function
+@section Processing Command-Line Options
+
+@cindex libraries of @command{awk} functions, command-line options
+@cindex functions, library, command-line options
+@cindex command-line options, processing
+@cindex options, command-line, processing
+@cindex functions, library, C library
+@cindex arguments, processing
+Most utilities on POSIX-compatible systems take options on
+the command line that can be used to change the way a program behaves.
+@command{awk} is an example of such a program
+(@pxref{Options}).
+Often, options take @dfn{arguments} (i.e., data that the program needs to
+correctly obey the command-line option). For example, @command{awk}'s
+@option{-F} option requires a string to use as the field separator.
+The first occurrence on the command line of either @option{--} or a
+string that does not begin with @samp{-} ends the options.
+
+@cindex @code{getopt()} function (C library)
+Modern Unix systems provide a C function named @code{getopt()} for processing
+command-line arguments. The programmer provides a string describing the
+one-letter options. If an option requires an argument, it is followed in the
+string with a colon. @code{getopt()} is also passed the
+count and values of the command-line arguments and is called in a loop.
+@code{getopt()} processes the command-line arguments for option letters.
+Each time around the loop, it returns a single character representing the
+next option letter that it finds, or @samp{?} if it finds an invalid option.
+When it returns @minus{}1, there are no options left on the command line.
+
+When using @code{getopt()}, options that do not take arguments can be
+grouped together. Furthermore, options that take arguments require that the
+argument be present. The argument can immediately follow the option letter,
+or it can be a separate command-line argument.
+
+Given a hypothetical program that takes
+three command-line options, @option{-a}, @option{-b}, and @option{-c}, where
+@option{-b} requires an argument, all of the following are valid ways of
+invoking the program:
+
+@example
+prog -a -b foo -c data1 data2 data3
+prog -ac -bfoo -- data1 data2 data3
+prog -acbfoo data1 data2 data3
+@end example
+
+Notice that when the argument is grouped with its option, the rest of
+the argument is considered to be the option's argument.
+In this example, @option{-acbfoo} indicates that all of the
+@option{-a}, @option{-b}, and @option{-c} options were supplied,
+and that @samp{foo} is the argument to the @option{-b} option.
+
+@code{getopt()} provides four external variables that the programmer can use:
+
+@table @code
+@item optind
+The index in the argument value array (@code{argv}) where the first
+nonoption command-line argument can be found.
+
+@item optarg
+The string value of the argument to an option.
+
+@item opterr
+Usually @code{getopt()} prints an error message when it finds an invalid
+option. Setting @code{opterr} to zero disables this feature. (An
+application might want to print its own error message.)
+
+@item optopt
+The letter representing the command-line option.
+@end table
+
+The following C fragment shows how @code{getopt()} might process command-line
+arguments for @command{awk}:
+
+@example
+int
+main(int argc, char *argv[])
+@{
+ @dots{}
+ /* print our own message */
+ opterr = 0;
+ while ((c = getopt(argc, argv, "v:f:F:W:")) != -1) @{
+ switch (c) @{
+ case 'f': /* file */
+ @dots{}
+ break;
+ case 'F': /* field separator */
+ @dots{}
+ break;
+ case 'v': /* variable assignment */
+ @dots{}
+ break;
+ case 'W': /* extension */
+ @dots{}
+ break;
+ case '?':
+ default:
+ usage();
+ break;
+ @}
+ @}
+ @dots{}
+@}
+@end example
+
+As a side point, @command{gawk} actually uses the GNU @code{getopt_long()}
+function to process both normal and GNU-style long options
+(@pxref{Options}).
+
+The abstraction provided by @code{getopt()} is very useful and is quite
+handy in @command{awk} programs as well. Following is an @command{awk}
+version of @code{getopt()}. This function highlights one of the
+greatest weaknesses in @command{awk}, which is that it is very poor at
+manipulating single characters. Repeated calls to @code{substr()} are
+necessary for accessing individual characters
+(@pxref{String Functions}).@footnote{This
+function was written before @command{gawk} acquired the ability to
+split strings into single characters using @code{""} as the separator.
+We have left it alone, as using @code{substr()} is more portable.}
+
+The discussion that follows walks through the code a bit at a time:
+
+@cindex @code{getopt()} user-defined function
+@example
+@c file eg/lib/getopt.awk
+# getopt.awk --- Do C library getopt(3) function in awk
+@c endfile
+@ignore
+@c file eg/lib/getopt.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+#
+# Initial version: March, 1991
+# Revised: May, 1993
+@c endfile
+@end ignore
+@c file eg/lib/getopt.awk
+
+# External variables:
+# Optind -- index in ARGV of first nonoption argument
+# Optarg -- string value of argument to current option
+# Opterr -- if nonzero, print our own diagnostic
+# Optopt -- current option letter
+
+# Returns:
+# -1 at end of options
+# "?" for unrecognized option
+# <c> a character representing the current option
+
+# Private Data:
+# _opti -- index in multiflag option, e.g., -abc
+@c endfile
+@end example
+
+The function starts out with comments presenting
+a list of the global variables it uses,
+what the return values are, what they mean, and any global variables that
+are ``private'' to this library function. Such documentation is essential
+for any program, and particularly for library functions.
+
+The @code{getopt()} function first checks that it was indeed called with
+a string of options (the @code{options} parameter). If @code{options}
+has a zero length, @code{getopt()} immediately returns @minus{}1:
+
+@cindex @code{getopt()} user-defined function
+@example
+@c file eg/lib/getopt.awk
+function getopt(argc, argv, options, thisopt, i)
+@{
+ if (length(options) == 0) # no options given
+ return -1
+
+@group
+ if (argv[Optind] == "--") @{ # all done
+ Optind++
+ _opti = 0
+ return -1
+@end group
+ @} else if (argv[Optind] !~ /^-[^:[:space:]]/) @{
+ _opti = 0
+ return -1
+ @}
+@c endfile
+@end example
+
+The next thing to check for is the end of the options. A @option{--}
+ends the command-line options, as does any command-line argument that
+does not begin with a @samp{-}. @code{Optind} is used to step through
+the array of command-line arguments; it retains its value across calls
+to @code{getopt()}, because it is a global variable.
+
+The regular expression that is used, @code{@w{/^-[^:[:space:]/}},
+checks for a @samp{-} followed by anything
+that is not whitespace and not a colon.
+If the current command-line argument does not match this pattern,
+it is not an option, and it ends option processing. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) @{
+ if (Opterr)
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return "?"
+ @}
+@c endfile
+@end example
+
+The @code{_opti} variable tracks the position in the current command-line
+argument (@code{argv[Optind]}). If multiple options are
+grouped together with one @samp{-} (e.g., @option{-abx}), it is necessary
+to return them to the user one at a time.
+
+If @code{_opti} is equal to zero, it is set to two, which is the index in
+the string of the next character to look at (we skip the @samp{-}, which
+is at position one). The variable @code{thisopt} holds the character,
+obtained with @code{substr()}. It is saved in @code{Optopt} for the main
+program to use.
+
+If @code{thisopt} is not in the @code{options} string, then it is an
+invalid option. If @code{Opterr} is nonzero, @code{getopt()} prints an error
+message on the standard error that is similar to the message from the C
+version of @code{getopt()}.
+
+Because the option is invalid, it is necessary to skip it and move on to the
+next option character. If @code{_opti} is greater than or equal to the
+length of the current command-line argument, it is necessary to move on
+to the next argument, so @code{Optind} is incremented and @code{_opti} is reset
+to zero. Otherwise, @code{Optind} is left alone and @code{_opti} is merely
+incremented.
+
+In any case, because the option is invalid, @code{getopt()} returns @code{"?"}.
+The main program can examine @code{Optopt} if it needs to know what the
+invalid option letter actually is. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+ if (substr(options, i + 1, 1) == ":") @{
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ @} else
+ Optarg = ""
+@c endfile
+@end example
+
+If the option requires an argument, the option letter is followed by a colon
+in the @code{options} string. If there are remaining characters in the
+current command-line argument (@code{argv[Optind]}), then the rest of that
+string is assigned to @code{Optarg}. Otherwise, the next command-line
+argument is used (@samp{-xFOO} versus @samp{@w{-x FOO}}). In either case,
+@code{_opti} is reset to zero, because there are no more characters left to
+examine in the current command-line argument. Continuing:
+
+@example
+@c file eg/lib/getopt.awk
+ if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return thisopt
+@}
+@c endfile
+@end example
+
+Finally, if @code{_opti} is either zero or greater than the length of the
+current command-line argument, it means this element in @code{argv} is
+through being processed, so @code{Optind} is incremented to point to the
+next element in @code{argv}. If neither condition is true, then only
+@code{_opti} is incremented, so that the next option letter can be processed
+on the next call to @code{getopt()}.
+
+The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
+@code{Opterr} is set to one, because the default behavior is for @code{getopt()}
+to print a diagnostic message upon seeing an invalid option. @code{Optind}
+is set to one, because there's no reason to look at the program name, which is
+in @code{ARGV[0]}:
+
+@example
+@c file eg/lib/getopt.awk
+BEGIN @{
+ Opterr = 1 # default is to diagnose
+ Optind = 1 # skip ARGV[0]
+
+ # test program
+ if (_getopt_test) @{
+ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
+ printf("c = <%c>, Optarg = <%s>\n",
+ _go_c, Optarg)
+ printf("non-option arguments:\n")
+ for (; Optind < ARGC; Optind++)
+ printf("\tARGV[%d] = <%s>\n",
+ Optind, ARGV[Optind])
+ @}
+@}
+@c endfile
+@end example
+
+The rest of the @code{BEGIN} rule is a simple test program. Here is the
+result of two sample runs of the test program:
+
+@example
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
+@print{} c = <a>, Optarg = <>
+@print{} c = <c>, Optarg = <>
+@print{} c = <b>, Optarg = <ARG>
+@print{} non-option arguments:
+@print{} ARGV[3] = <bax>
+@print{} ARGV[4] = <-x>
+
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
+@print{} c = <a>, Optarg = <>
+@error{} x -- invalid option
+@print{} c = <?>, Optarg = <>
+@print{} non-option arguments:
+@print{} ARGV[4] = <xyz>
+@print{} ARGV[5] = <abc>
+@end example
+
+In both runs, the first @option{--} terminates the arguments to
+@command{awk}, so that it does not try to interpret the @option{-a},
+etc., as its own options.
+
+@quotation NOTE
+After @code{getopt()} is through,
+user-level code must clear out all the elements of @code{ARGV} from 1
+to @code{Optind}, so that @command{awk} does not try to process the
+command-line options as @value{FN}s.
+@end quotation
+
+Using @samp{#!} with the @option{-E} option may help avoid
+conflicts between your program's options and @command{gawk}'s options,
+as @option{-E} causes @command{gawk} to abandon processing of
+further options
+(@DBPXREF{Executable Scripts} and
+@ifnotdocbook
+@pxref{Options}).
+@end ifnotdocbook
+@ifdocbook
+@ref{Options}).
+@end ifdocbook
+
+Several of the sample programs presented in
+@ref{Sample Programs},
+use @code{getopt()} to process their arguments.
+
+@node Passwd Functions
+@section Reading the User Database
+
+@cindex libraries of @command{awk} functions, user database, reading
+@cindex functions, library, user database@comma{} reading
+@cindex user database@comma{} reading
+@cindex database, users@comma{} reading
+@cindex @code{PROCINFO} array
+The @code{PROCINFO} array
+(@pxref{Built-in Variables})
+provides access to the current user's real and effective user and group ID
+numbers, and if available, the user's supplementary group set.
+However, because these are numbers, they do not provide very useful
+information to the average user. There needs to be some way to find the
+user information associated with the user and group ID numbers. This
+@value{SECTION} presents a suite of functions for retrieving information from the
+user database. @DBXREF{Group Functions}
+for a similar suite that retrieves information from the group database.
+
+@cindex @code{getpwent()} function (C library)
+@cindex @code{getpwent()} user-defined function
+@cindex users, information about, retrieving
+@cindex login information
+@cindex account information
+@cindex password file
+@cindex files, password
+The POSIX standard does not define the file where user information is
+kept. Instead, it provides the @code{<pwd.h>} header file
+and several C language subroutines for obtaining user information.
+The primary function is @code{getpwent()}, for ``get password entry.''
+The ``password'' comes from the original user database file,
+@file{/etc/passwd}, which stores user information, along with the
+encrypted passwords (hence the name).
+
+@cindex @command{pwcat} program
+Although an @command{awk} program could simply read @file{/etc/passwd}
+directly, this file may not contain complete information about the
+system's set of users.@footnote{It is often the case that password
+information is stored in a network database.} To be sure you are able to
+produce a readable and complete version of the user database, it is necessary
+to write a small C program that calls @code{getpwent()}. @code{getpwent()}
+is defined as returning a pointer to a @code{struct passwd}. Each time it
+is called, it returns the next entry in the database. When there are
+no more entries, it returns @code{NULL}, the null pointer. When this
+happens, the C program should call @code{endpwent()} to close the database.
+Following is @command{pwcat}, a C program that ``cats'' the password database:
+
+@example
+@c file eg/lib/pwcat.c
+/*
+ * pwcat.c
+ *
+ * Generate a printable version of the password database.
+ */
+@c endfile
+@ignore
+@c file eg/lib/pwcat.c
+/*
+ * Arnold Robbins, arnold@@skeeve.com, May 1993
+ * Public Domain
+ * December 2010, move to ANSI C definition for main().
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+@c endfile
+@end ignore
+@c file eg/lib/pwcat.c
+#include <stdio.h>
+#include <pwd.h>
+
+@c endfile
+@ignore
+@c file eg/lib/pwcat.c
+#if defined (STDC_HEADERS)
+#include <stdlib.h>
+#endif
+
+@c endfile
+@end ignore
+@c file eg/lib/pwcat.c
+int
+main(int argc, char **argv)
+@{
+ struct passwd *p;
+
+ while ((p = getpwent()) != NULL)
+@c endfile
+@ignore
+@c file eg/lib/pwcat.c
+#ifdef ZOS_USS
+ printf("%s:%ld:%ld:%s:%s\n",
+ p->pw_name, (long) p->pw_uid,
+ (long) p->pw_gid, p->pw_dir, p->pw_shell);
+#else
+@c endfile
+@end ignore
+@c file eg/lib/pwcat.c
+ printf("%s:%s:%ld:%ld:%s:%s:%s\n",
+ p->pw_name, p->pw_passwd, (long) p->pw_uid,
+ (long) p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell);
+@c endfile
+@ignore
+@c file eg/lib/pwcat.c
+#endif
+@c endfile
+@end ignore
+@c file eg/lib/pwcat.c
+
+ endpwent();
+ return 0;
+@}
+@c endfile
+@end example
+
+If you don't understand C, don't worry about it.
+The output from @command{pwcat} is the user database, in the traditional
+@file{/etc/passwd} format of colon-separated fields. The fields are:
+
+@table @asis
+@item Login name
+The user's login name.
+
+@item Encrypted password
+The user's encrypted password. This may not be available on some systems.
+
+@item User-ID
+The user's numeric user ID number.
+(On some systems, it's a C @code{long}, and not an @code{int}. Thus
+we cast it to @code{long} for all cases.)
+
+@item Group-ID
+The user's numeric group ID number.
+(Similar comments about @code{long} versus @code{int} apply here.)
+
+@item Full name
+The user's full name, and perhaps other information associated with the
+user.
+
+@item Home directory
+The user's login (or ``home'') directory (familiar to shell programmers as
+@code{$HOME}).
+
+@item Login shell
+The program that is run when the user logs in. This is usually a
+shell, such as Bash.
+@end table
+
+A few lines representative of @command{pwcat}'s output are as follows:
+
+@cindex Jacobs, Andrew
+@cindex Robbins, Arnold
+@cindex Robbins, Miriam
+@example
+$ @kbd{pwcat}
+@print{} root:x:0:1:Operator:/:/bin/sh
+@print{} nobody:*:65534:65534::/:
+@print{} daemon:*:1:1::/:
+@print{} sys:*:2:2::/:/bin/csh
+@print{} bin:*:3:3::/bin:
+@print{} arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@print{} miriam:yxaay:112:10:Miriam Robbins:/home/miriam:/bin/sh
+@print{} andy:abcca2:113:10:Andy Jacobs:/home/andy:/bin/sh
+@dots{}
+@end example
+
+With that introduction, following is a group of functions for getting user
+information. There are several functions here, corresponding to the C
+functions of the same names:
+
+@cindex @code{_pw_init()} user-defined function
+@example
+@c file eg/lib/passwdawk.in
+# passwd.awk --- access password file information
+@c endfile
+@ignore
+@c file eg/lib/passwdawk.in
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised October 2000
+# Revised December 2010
+@c endfile
+@end ignore
+@c file eg/lib/passwdawk.in
+
+BEGIN @{
+ # tailor this to suit your system
+ _pw_awklib = "/usr/local/libexec/awk/"
+@}
+
+function _pw_init( oldfs, oldrs, olddol0, pwcat, using_fw, using_fpat)
+@{
+ if (_pw_inited)
+ return
+
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ using_fw = (PROCINFO["FS"] == "FIELDWIDTHS")
+ using_fpat = (PROCINFO["FS"] == "FPAT")
+ FS = ":"
+ RS = "\n"
+
+ pwcat = _pw_awklib "pwcat"
+ while ((pwcat | getline) > 0) @{
+ _pw_byname[$1] = $0
+ _pw_byuid[$3] = $0
+ _pw_bycount[++_pw_total] = $0
+ @}
+ close(pwcat)
+ _pw_count = 0
+ _pw_inited = 1
+ FS = oldfs
+ if (using_fw)
+ FIELDWIDTHS = FIELDWIDTHS
+ else if (using_fpat)
+ FPAT = FPAT
+ RS = oldrs
+ $0 = olddol0
+@}
+@c endfile
+@end example
+
+@cindex @code{BEGIN} pattern, @code{pwcat} program
+The @code{BEGIN} rule sets a private variable to the directory where
+@command{pwcat} is stored. Because it is used to help out an @command{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk};
+however, you might want it to be in a different directory on your system.
+
+The function @code{_pw_init()} fills three copies of the user information
+into three associative arrays. The arrays are indexed by username
+(@code{_pw_byname}), by user ID number (@code{_pw_byuid}), and by order of
+occurrence (@code{_pw_bycount}).
+The variable @code{_pw_inited} is used for efficiency, as @code{_pw_init()}
+needs to be called only once.
+
+@cindex @code{PROCINFO} array, testing the field splitting
+@cindex @code{getline} command, @code{_pw_init()} function
+Because this function uses @code{getline} to read information from
+@command{pwcat}, it first saves the values of @code{FS}, @code{RS}, and @code{$0}.
+It notes in the variable @code{using_fw} whether field splitting
+with @code{FIELDWIDTHS} is in effect or not.
+Doing so is necessary, as these functions could be called
+from anywhere within a user's program, and the user may have his
+or her own way of splitting records and fields.
+This makes it possible to restore the correct
+field-splitting mechanism later. The test can only be true for
+@command{gawk}. It is false if using @code{FS} or @code{FPAT},
+or on some other @command{awk} implementation.
+
+The code that checks for using @code{FPAT}, using @code{using_fpat}
+and @code{PROCINFO["FS"]}, is similar.
+
+The main part of the function uses a loop to read database lines, split
+the line into fields, and then store the line into each array as necessary.
+When the loop is done, @code{@w{_pw_init()}} cleans up by closing the pipeline,
+setting @code{@w{_pw_inited}} to one, and restoring @code{FS}
+(and @code{FIELDWIDTHS} or @code{FPAT}
+if necessary), @code{RS}, and @code{$0}.
+The use of @code{@w{_pw_count}} is explained shortly.
+
+@cindex @code{getpwnam()} function (C library)
+The @code{getpwnam()} function takes a username as a string argument. If that
+user is in the database, it returns the appropriate line. Otherwise, it
+relies on the array reference to a nonexistent
+element to create the element with the null string as its value:
+
+@cindex @code{getpwnam()} user-defined function
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwnam(name)
+@{
+ _pw_init()
+ return _pw_byname[name]
+@}
+@c endfile
+@end group
+@end example
+
+@cindex @code{getpwuid()} function (C library)
+Similarly, the @code{getpwuid()} function takes a user ID number
+argument. If that user number is in the database, it returns the
+appropriate line. Otherwise, it returns the null string:
+
+@cindex @code{getpwuid()} user-defined function
+@example
+@c file eg/lib/passwdawk.in
+function getpwuid(uid)
+@{
+ _pw_init()
+ return _pw_byuid[uid]
+@}
+@c endfile
+@end example
+
+@cindex @code{getpwent()} function (C library)
+The @code{getpwent()} function simply steps through the database, one entry at
+a time. It uses @code{_pw_count} to track its current position in the
+@code{_pw_bycount} array:
+
+@cindex @code{getpwent()} user-defined function
+@example
+@c file eg/lib/passwdawk.in
+function getpwent()
+@{
+ _pw_init()
+ if (_pw_count < _pw_total)
+ return _pw_bycount[++_pw_count]
+ return ""
+@}
+@c endfile
+@end example
+
+@cindex @code{endpwent()} function (C library)
+The @code{@w{endpwent()}} function resets @code{@w{_pw_count}} to zero, so that
+subsequent calls to @code{getpwent()} start over again:
+
+@cindex @code{endpwent()} user-defined function
+@example
+@c file eg/lib/passwdawk.in
+function endpwent()
+@{
+ _pw_count = 0
+@}
+@c endfile
+@end example
+
+A conscious design decision in this suite is that each subroutine calls
+@code{@w{_pw_init()}} to initialize the database arrays.
+The overhead of running
+a separate process to generate the user database, and the I/O to scan it,
+are only incurred if the user's main program actually calls one of these
+functions. If this library file is loaded along with a user's program, but
+none of the routines are ever called, then there is no extra runtime overhead.
+(The alternative is move the body of @code{@w{_pw_init()}} into a
+@code{BEGIN} rule, which always runs @command{pwcat}. This simplifies the
+code but runs an extra process that may never be needed.)
+
+In turn, calling @code{_pw_init()} is not too expensive, because the
+@code{_pw_inited} variable keeps the program from reading the data more than
+once. If you are worried about squeezing every last cycle out of your
+@command{awk} program, the check of @code{_pw_inited} could be moved out of
+@code{_pw_init()} and duplicated in all the other functions. In practice,
+this is not necessary, as most @command{awk} programs are I/O-bound,
+and such a change would clutter up the code.
+
+The @command{id} program in @DBREF{Id Program}
+uses these functions.
+
+@node Group Functions
+@section Reading the Group Database
+
+@cindex libraries of @command{awk} functions, group database, reading
+@cindex functions, library, group database@comma{} reading
+@cindex group database, reading
+@cindex database, group, reading
+@cindex @code{PROCINFO} array, and group membership
+@cindex @code{getgrent()} function (C library)
+@cindex @code{getgrent()} user-defined function
+@cindex groups@comma{} information about
+@cindex account information
+@cindex group file
+@cindex files, group
+Much of the discussion presented in
+@DBREF{Passwd Functions}
+applies to the group database as well. Although there has traditionally
+been a well-known file (@file{/etc/group}) in a well-known format, the POSIX
+standard only provides a set of C library routines
+(@code{<grp.h>} and @code{getgrent()})
+for accessing the information.
+Even though this file may exist, it may not have
+complete information. Therefore, as with the user database, it is necessary
+to have a small C program that generates the group database as its output.
+@command{grcat}, a C program that ``cats'' the group database,
+is as follows:
+
+@cindex @command{grcat} program
+@example
+@c file eg/lib/grcat.c
+/*
+ * grcat.c
+ *
+ * Generate a printable version of the group database.
+ */
+@c endfile
+@ignore
+@c file eg/lib/grcat.c
+/*
+ * Arnold Robbins, arnold@@skeeve.com, May 1993
+ * Public Domain
+ * December 2010, move to ANSI C definition for main().
+ */
+
+/* For OS/2, do nothing. */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if defined (STDC_HEADERS)
+#include <stdlib.h>
+#endif
+
+#ifndef HAVE_GETGRENT
+int main() { return 0; }
+#else
+@c endfile
+@end ignore
+@c file eg/lib/grcat.c
+#include <stdio.h>
+#include <grp.h>
+
+int
+main(int argc, char **argv)
+@{
+ struct group *g;
+ int i;
+
+ while ((g = getgrent()) != NULL) @{
+@c endfile
+@ignore
+@c file eg/lib/grcat.c
+#ifdef ZOS_USS
+ printf("%s:%ld:", g->gr_name, (long) g->gr_gid);
+#else
+@c endfile
+@end ignore
+@c file eg/lib/grcat.c
+ printf("%s:%s:%ld:", g->gr_name, g->gr_passwd,
+ (long) g->gr_gid);
+@c endfile
+@ignore
+@c file eg/lib/grcat.c
+#endif
+@c endfile
+@end ignore
+@c file eg/lib/grcat.c
+ for (i = 0; g->gr_mem[i] != NULL; i++) @{
+ printf("%s", g->gr_mem[i]);
+@group
+ if (g->gr_mem[i+1] != NULL)
+ putchar(',');
+ @}
+@end group
+ putchar('\n');
+ @}
+ endgrent();
+ return 0;
+@}
+@c endfile
+@ignore
+@c file eg/lib/grcat.c
+#endif /* HAVE_GETGRENT */
+@c endfile
+@end ignore
+@end example
+
+Each line in the group database represents one group. The fields are
+separated with colons and represent the following information:
+
+@table @asis
+@item Group Name
+The group's name.
+
+@item Group Password
+The group's encrypted password. In practice, this field is never used;
+it is usually empty or set to @samp{*}.
+
+@item Group ID Number
+The group's numeric group ID number;
+the association of name to number must be unique within the file.
+(On some systems it's a C @code{long}, and not an @code{int}. Thus
+we cast it to @code{long} for all cases.)
+
+@item Group Member List
+A comma-separated list of usernames. These users are members of the group.
+Modern Unix systems allow users to be members of several groups
+simultaneously. If your system does, then there are elements
+@code{"group1"} through @code{"group@var{N}"} in @code{PROCINFO}
+for those group ID numbers.
+(Note that @code{PROCINFO} is a @command{gawk} extension;
+@pxref{Built-in Variables}.)
+@end table
+
+Here is what running @command{grcat} might produce:
+
+@example
+$ @kbd{grcat}
+@print{} wheel:*:0:arnold
+@print{} nogroup:*:65534:
+@print{} daemon:*:1:
+@print{} kmem:*:2:
+@print{} staff:*:10:arnold,miriam,andy
+@print{} other:*:20:
+@dots{}
+@end example
+
+Here are the functions for obtaining information from the group database.
+There are several, modeled after the C library functions of the same names:
+
+@cindex @code{getline} command, @code{_gr_init()} user-defined function
+@cindex @code{_gr_init()} user-defined function
+@example
+@c file eg/lib/groupawk.in
+# group.awk --- functions for dealing with the group file
+@c endfile
+@ignore
+@c file eg/lib/groupawk.in
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised October 2000
+# Revised December 2010
+@c endfile
+@end ignore
+@c line break on _gr_init for smallbook
+@c file eg/lib/groupawk.in
+
+BEGIN @{
+ # Change to suit your system
+ _gr_awklib = "/usr/local/libexec/awk/"
+@}
+
+function _gr_init( oldfs, oldrs, olddol0, grcat,
+ using_fw, using_fpat, n, a, i)
+@{
+ if (_gr_inited)
+ return
+
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ using_fw = (PROCINFO["FS"] == "FIELDWIDTHS")
+ using_fpat = (PROCINFO["FS"] == "FPAT")
+ FS = ":"
+ RS = "\n"
+
+ grcat = _gr_awklib "grcat"
+ while ((grcat | getline) > 0) @{
+ if ($1 in _gr_byname)
+ _gr_byname[$1] = _gr_byname[$1] "," $4
+ else
+ _gr_byname[$1] = $0
+ if ($3 in _gr_bygid)
+ _gr_bygid[$3] = _gr_bygid[$3] "," $4
+ else
+ _gr_bygid[$3] = $0
+
+ n = split($4, a, "[ \t]*,[ \t]*")
+ for (i = 1; i <= n; i++)
+ if (a[i] in _gr_groupsbyuser)
+ _gr_groupsbyuser[a[i]] = gr_groupsbyuser[a[i]] " " $1
+ else
+ _gr_groupsbyuser[a[i]] = $1
+
+ _gr_bycount[++_gr_count] = $0
+ @}
+ close(grcat)
+ _gr_count = 0
+ _gr_inited++
+ FS = oldfs
+ if (using_fw)
+ FIELDWIDTHS = FIELDWIDTHS
+ else if (using_fpat)
+ FPAT = FPAT
+ RS = oldrs
+ $0 = olddol0
+@}
+@c endfile
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@command{grcat} is stored. Because it is used to help out an @command{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}. You might
+want it to be in a different directory on your system.
+
+These routines follow the same general outline as the user database routines
+(@pxref{Passwd Functions}).
+The @code{@w{_gr_inited}} variable is used to
+ensure that the database is scanned no more than once.
+The @code{@w{_gr_init()}} function first saves @code{FS},
+@code{RS}, and
+@code{$0}, and then sets @code{FS} and @code{RS} to the correct values for
+scanning the group information.
+It also takes care to note whether @code{FIELDWIDTHS} or @code{FPAT}
+is being used, and to restore the appropriate field splitting mechanism.
+
+The group information is stored is several associative arrays.
+The arrays are indexed by group name (@code{@w{_gr_byname}}), by group ID number
+(@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}).
+There is an additional array indexed by username (@code{@w{_gr_groupsbyuser}}),
+which is a space-separated list of groups to which each user belongs.
+
+Unlike the user database, it is possible to have multiple records in the
+database for the same group. This is common when a group has a large number
+of members. A pair of such entries might look like the following:
+
+@example
+tvpeople:*:101:johny,jay,arsenio
+tvpeople:*:101:david,conan,tom,joan
+@end example
+
+For this reason, @code{_gr_init()} looks to see if a group name or
+group ID number is already seen. If it is, the usernames are
+simply concatenated onto the previous list of users.@footnote{There is actually a
+subtle problem with the code just presented. Suppose that
+the first time there were no names. This code adds the names with
+a leading comma. It also doesn't check that there is a @code{$4}.}
+
+Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores
+@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0},
+initializes @code{_gr_count} to zero
+(it is used later), and makes @code{_gr_inited} nonzero.
+
+@cindex @code{getgrnam()} function (C library)
+The @code{getgrnam()} function takes a group name as its argument, and if that
+group exists, it is returned.
+Otherwise, it
+relies on the array reference to a nonexistent
+element to create the element with the null string as its value:
+
+@cindex @code{getgrnam()} user-defined function
+@example
+@c file eg/lib/groupawk.in
+function getgrnam(group)
+@{
+ _gr_init()
+ return _gr_byname[group]
+@}
+@c endfile
+@end example
+
+@cindex @code{getgrgid()} function (C library)
+The @code{getgrgid()} function is similar; it takes a numeric group ID and
+looks up the information associated with that group ID:
+
+@cindex @code{getgrgid()} user-defined function
+@example
+@c file eg/lib/groupawk.in
+function getgrgid(gid)
+@{
+ _gr_init()
+ return _gr_bygid[gid]
+@}
+@c endfile
+@end example
+
+@cindex @code{getgruser()} function (C library)
+The @code{getgruser()} function does not have a C counterpart. It takes a
+username and returns the list of groups that have the user as a member:
+
+@cindex @code{getgruser()} function, user-defined
+@example
+@c file eg/lib/groupawk.in
+function getgruser(user)
+@{
+ _gr_init()
+ return _gr_groupsbyuser[user]
+@}
+@c endfile
+@end example
+
+@cindex @code{getgrent()} function (C library)
+The @code{getgrent()} function steps through the database one entry at a time.
+It uses @code{_gr_count} to track its position in the list:
+
+@cindex @code{getgrent()} user-defined function
+@example
+@c file eg/lib/groupawk.in
+function getgrent()
+@{
+ _gr_init()
+ if (++_gr_count in _gr_bycount)
+ return _gr_bycount[_gr_count]
+ return ""
+@}
+@c endfile
+@end example
+
+@cindex @code{endgrent()} function (C library)
+The @code{endgrent()} function resets @code{_gr_count} to zero so that @code{getgrent()} can
+start over again:
+
+@cindex @code{endgrent()} user-defined function
+@example
+@c file eg/lib/groupawk.in
+function endgrent()
+@{
+ _gr_count = 0
+@}
+@c endfile
+@end example
+
+As with the user database routines, each function calls @code{_gr_init()} to
+initialize the arrays. Doing so only incurs the extra overhead of running
+@command{grcat} if these functions are used (as opposed to moving the body of
+@code{_gr_init()} into a @code{BEGIN} rule).
+
+Most of the work is in scanning the database and building the various
+associative arrays. The functions that the user calls are themselves very
+simple, relying on @command{awk}'s associative arrays to do work.
+
+The @command{id} program in @DBREF{Id Program}
+uses these functions.
+
+@node Walking Arrays
+@section Traversing Arrays of Arrays
+
+@DBREF{Arrays of Arrays} described how @command{gawk}
+provides arrays of arrays. In particular, any element of
+an array may be either a scalar, or another array. The
+@code{isarray()} function (@pxref{Type Functions})
+lets you distinguish an array
+from a scalar.
+The following function, @code{walk_array()}, recursively traverses
+an array, printing each element's indices and value.
+You call it with the array and a string representing the name
+of the array:
+
+@cindex @code{walk_array()} user-defined function
+@example
+@c file eg/lib/walkarray.awk
+function walk_array(arr, name, i)
+@{
+ for (i in arr) @{
+ if (isarray(arr[i]))
+ walk_array(arr[i], (name "[" i "]"))
+ else
+ printf("%s[%s] = %s\n", name, i, arr[i])
+ @}
+@}
+@c endfile
+@end example
+
+@noindent
+It works by looping over each element of the array. If any given
+element is itself an array, the function calls itself recursively,
+passing the subarray and a new string representing the current index.
+Otherwise, the function simply prints the element's name, index, and value.
+Here is a main program to demonstrate:
+
+@example
+BEGIN @{
+ a[1] = 1
+ a[2][1] = 21
+ a[2][2] = 22
+ a[3] = 3
+ a[4][1][1] = 411
+ a[4][2] = 42
+
+ walk_array(a, "a")
+@}
+@end example
+
+When run, the program produces the following output:
+
+@example
+$ @kbd{gawk -f walk_array.awk}
+@print{} a[1] = 1
+@print{} a[2][1] = 21
+@print{} a[2][2] = 22
+@print{} a[3] = 3
+@print{} a[4][1][1] = 411
+@print{} a[4][2] = 42
+@end example
+
+
+@node Library Functions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Reading programs is an excellent way to learn Good Programming.
+The functions and programs provided in this @value{CHAPTER} and the next
+are intended to serve that purpose.
+
+@item
+When writing general-purpose library functions, put some thought into how
+to name any global variables so that they won't conflict with variables
+from a user's program.
+
+@item
+The functions presented here fit into the following categories:
+
+@c nested list
+@table @asis
+@item General problems
+Number-to-string conversion, assertions, rounding, random number
+generation, converting characters to numbers, joining strings, getting
+easily usable time-of-day information, and reading a whole file in
+one shot.
+
+@item Managing @value{DF}s
+Noting @value{DF} boundaries, rereading the current file, checking for
+readable files, checking for zero-length files, and treating assignments
+as @value{FN}s.
+
+@item Processing command-line options
+An @command{awk} version of the standard C @code{getopt()} function.
+
+@item Reading the user and group databases
+Two sets of routines that parallel the C library versions.
+
+@item Traversing arrays of arrays
+A simple function to traverse an array of arrays to any depth.
+@end table
+@c end nested list
+
+@end itemize
+
+@c EXCLUDE START
+@node Library Exercises
+@section Exercises
+
+@enumerate
+@item
+In @ref{Empty Files}, we presented the @file{zerofile.awk} program,
+which made use of @command{gawk}'s @code{ARGIND} variable. Can this
+problem be solved without relying on @code{ARGIND}? If so, how?
+
+@ignore
+# zerofile2.awk --- same thing, portably
+
+BEGIN @{
+ ARGIND = Argind = 0
+ for (i = 1; i < ARGC; i++)
+ Fnames[ARGV[i]]++
+
+@}
+FNR == 1 @{
+ while (ARGV[ARGIND] != FILENAME)
+ ARGIND++
+ Seen[FILENAME]++
+ if (Seen[FILENAME] == Fnames[FILENAME])
+ do
+ ARGIND++
+ while (ARGV[ARGIND] != FILENAME)
+@}
+ARGIND > Argind + 1 @{
+ for (Argind++; Argind < ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+ARGIND != Argind @{
+ Argind = ARGIND
+@}
+END @{
+ if (ARGIND < ARGC - 1)
+ ARGIND = ARGC - 1
+ if (ARGIND > Argind)
+ for (Argind++; Argind <= ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+@end ignore
+
+@item
+As a related challenge, revise that code to handle the case where
+an intervening value in @code{ARGV} is a variable assignment.
+
+@item
+@DBREF{Walking Arrays} presented a function that walked a multidimensional
+array to print it out. However, walking an array and processing
+each element is a general-purpose operation. Generalize the
+@code{walk_array()} function by adding an additional parameter named
+@code{process}.
+
+Then, inside the loop, instead of printing the array element's index and
+value, use the indirect function call syntax (@pxref{Indirect Calls})
+on @code{process}, passing it the index and the value.
+
+When calling @code{walk_array()}, you would pass the name of a
+user-defined function that expects to receive an index and a value,
+and then processes the element.
+
+Test your new version by printing the array; you should end up with
+output identical to that of the original version.
+
+@end enumerate
+@c EXCLUDE END
+
+
+@node Sample Programs
+@chapter Practical @command{awk} Programs
+@cindex @command{awk} programs, examples of
+
+@c FULLXREF ON
+@ref{Library Functions},
+presents the idea that reading programs in a language contributes to
+learning that language. This @value{CHAPTER} continues that theme,
+presenting a potpourri of @command{awk} programs for your reading
+enjoyment.
+@c FULLXREF OFF
+@ifnotinfo
+There are three sections.
+The first describes how to run the programs presented
+in this @value{CHAPTER}.
+
+The second presents @command{awk}
+versions of several common POSIX utilities.
+These are programs that you are hopefully already familiar with,
+and therefore, whose problems are understood.
+By reimplementing these programs in @command{awk},
+you can focus on the @command{awk}-related aspects of solving
+the programming problem.
+
+The third is a grab bag of interesting programs.
+These solve a number of different data-manipulation and management
+problems. Many of the programs are short, which emphasizes @command{awk}'s
+ability to do a lot in just a few lines of code.
+@end ifnotinfo
+
+Many of these programs use library functions presented in
+@ref{Library Functions}.
+
+@menu
+* Running Examples:: How to run these examples.
+* Clones:: Clones of common utilities.
+* Miscellaneous Programs:: Some interesting @command{awk} programs.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
+@end menu
+
+@node Running Examples
+@section Running the Example Programs
+
+To run a given program, you would typically do something like this:
+
+@example
+awk -f @var{program} -- @var{options} @var{files}
+@end example
+
+@noindent
+Here, @var{program} is the name of the @command{awk} program (such as
+@file{cut.awk}), @var{options} are any command-line options for the
+program that start with a @samp{-}, and @var{files} are the actual @value{DF}s.
+
+If your system supports the @samp{#!} executable interpreter mechanism
+(@pxref{Executable Scripts}),
+you can instead run your program directly:
+
+@example
+cut.awk -c1-8 myfiles > results
+@end example
+
+If your @command{awk} is not @command{gawk}, you may instead need to use this:
+
+@example
+cut.awk -- -c1-8 myfiles > results
+@end example
+
+@node Clones
+@section Reinventing Wheels for Fun and Profit
+@cindex POSIX, programs@comma{} implementing in @command{awk}
+
+This @value{SECTION} presents a number of POSIX utilities implemented in
+@command{awk}. Reinventing these programs in @command{awk} is often enjoyable,
+because the algorithms can be very clearly expressed, and the code is usually
+very concise and simple. This is true because @command{awk} does so much for you.
+
+It should be noted that these programs are not necessarily intended to
+replace the installed versions on your system.
+Nor may all of these programs be fully compliant with the most recent
+POSIX standard. This is not a problem; their
+purpose is to illustrate @command{awk} language programming for ``real world''
+tasks.
+
+The programs are presented in alphabetical order.
+
+@menu
+* Cut Program:: The @command{cut} utility.
+* Egrep Program:: The @command{egrep} utility.
+* Id Program:: The @command{id} utility.
+* Split Program:: The @command{split} utility.
+* Tee Program:: The @command{tee} utility.
+* Uniq Program:: The @command{uniq} utility.
+* Wc Program:: The @command{wc} utility.
+@end menu
+
+@node Cut Program
+@subsection Cutting Out Fields and Columns
+
+@cindex @command{cut} utility
+@cindex @command{cut} utility
+@cindex fields, cutting
+@cindex columns, cutting
+The @command{cut} utility selects, or ``cuts,'' characters or fields
+from its standard input and sends them to its standard output.
+Fields are separated by TABs by default,
+but you may supply a command-line option to change the field
+@dfn{delimiter} (i.e., the field-separator character). @command{cut}'s
+definition of fields is less general than @command{awk}'s.
+
+A common use of @command{cut} might be to pull out just the login name of
+logged-on users from the output of @command{who}. For example, the following
+pipeline generates a sorted, unique list of the logged-on users:
+
+@example
+who | cut -c1-8 | sort | uniq
+@end example
+
+The options for @command{cut} are:
+
+@table @code
+@item -c @var{list}
+Use @var{list} as the list of characters to cut out. Items within the list
+may be separated by commas, and ranges of characters can be separated with
+dashes. The list @samp{1-8,15,22-35} specifies characters 1 through
+8, 15, and 22 through 35.
+
+@item -f @var{list}
+Use @var{list} as the list of fields to cut out.
+
+@item -d @var{delim}
+Use @var{delim} as the field-separator character instead of the TAB
+character.
+
+@item -s
+Suppress printing of lines that do not contain the field delimiter.
+@end table
+
+The @command{awk} implementation of @command{cut} uses the @code{getopt()} library
+function (@pxref{Getopt Function})
+and the @code{join()} library function
+(@pxref{Join Function}).
+
+The program begins with a comment describing the options, the library
+functions needed, and a @code{usage()} function that prints out a usage
+message and exits. @code{usage()} is called if invalid arguments are
+supplied:
+
+@cindex @code{cut.awk} program
+@example
+@c file eg/prog/cut.awk
+# cut.awk --- implement cut in awk
+@c endfile
+@ignore
+@c file eg/prog/cut.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+@c endfile
+@end ignore
+@c file eg/prog/cut.awk
+
+# Options:
+# -f list Cut fields
+# -d c Field delimiter character
+# -c list Cut characters
+#
+# -s Suppress lines without the delimiter
+#
+# Requires getopt() and join() library functions
+
+@group
+function usage()
+@{
+ print("usage: cut [-f list] [-d c] [-s] [files...]") > "/dev/stderr"
+ print("usage: cut [-c list] [files...]") > "/dev/stderr"
+ exit 1
+@}
+@end group
+@c endfile
+@end example
+
+@cindex @code{BEGIN} pattern, running @command{awk} programs and
+@cindex @code{FS} variable, running @command{awk} programs and
+Next comes a @code{BEGIN} rule that parses the command-line options.
+It sets @code{FS} to a single TAB character, because that is @command{cut}'s
+default field separator. The rule then sets the output field separator to be the
+same as the input field separator. A loop using @code{getopt()} steps
+through the command-line options. Exactly one of the variables
+@code{by_fields} or @code{by_chars} is set to true, to indicate that
+processing should be done by fields or by characters, respectively.
+When cutting by characters, the output field separator is set to the null
+string:
+
+@example
+@c file eg/prog/cut.awk
+BEGIN @{
+ FS = "\t" # default
+ OFS = FS
+ while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
+ if (c == "f") @{
+ by_fields = 1
+ fieldlist = Optarg
+ @} else if (c == "c") @{
+ by_chars = 1
+ fieldlist = Optarg
+ OFS = ""
+ @} else if (c == "d") @{
+ if (length(Optarg) > 1) @{
+ printf("cut: using first character of %s" \
+ " for delimiter\n", Optarg) > "/dev/stderr"
+ Optarg = substr(Optarg, 1, 1)
+ @}
+ FS = Optarg
+ OFS = FS
+ if (FS == " ") # defeat awk semantics
+ FS = "[ ]"
+ @} else if (c == "s")
+ suppress = 1
+ else
+ usage()
+ @}
+
+ # Clear out options
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+@c endfile
+@end example
+
+@cindex field separators, spaces as
+The code must take
+special care when the field delimiter is a space. Using
+a single space (@code{@w{" "}}) for the value of @code{FS} is
+incorrect---@command{awk} would separate fields with runs of spaces,
+TABs, and/or newlines, and we want them to be separated with individual
+spaces. Also remember that after @code{getopt()} is through
+(as described in @ref{Getopt Function}),
+we have to
+clear out all the elements of @code{ARGV} from 1 to @code{Optind},
+so that @command{awk} does not try to process the command-line options
+as @value{FN}s.
+
+After dealing with the command-line options, the program verifies that the
+options make sense. Only one or the other of @option{-c} and @option{-f}
+should be used, and both require a field list. Then the program calls
+either @code{set_fieldlist()} or @code{set_charlist()} to pull apart the
+list of fields or characters:
+
+@example
+@c file eg/prog/cut.awk
+ if (by_fields && by_chars)
+ usage()
+
+ if (by_fields == 0 && by_chars == 0)
+ by_fields = 1 # default
+
+ if (fieldlist == "") @{
+ print "cut: needs list for -c or -f" > "/dev/stderr"
+ exit 1
+ @}
+
+ if (by_fields)
+ set_fieldlist()
+ else
+ set_charlist()
+@}
+@c endfile
+@end example
+
+@code{set_fieldlist()} splits the field list apart at the commas
+into an array. Then, for each element of the array, it looks to
+see if the element is actually a range, and if so, splits it apart.
+The function checks the range
+to make sure that the first number is smaller than the second.
+Each number in the list is added to the @code{flist} array, which
+simply lists the fields that will be printed. Normal field splitting
+is used. The program lets @command{awk} handle the job of doing the
+field splitting:
+
+@example
+@c file eg/prog/cut.awk
+function set_fieldlist( n, m, i, j, k, f, g)
+@{
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) @{
+ if (index(f[i], "-") != 0) @{ # a range
+ m = split(f[i], g, "-")
+@group
+ if (m != 2 || g[1] >= g[2]) @{
+ printf("cut: bad field list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ @}
+@end group
+ for (k = g[1]; k <= g[2]; k++)
+ flist[j++] = k
+ @} else
+ flist[j++] = f[i]
+ @}
+ nfields = j - 1
+@}
+@c endfile
+@end example
+
+The @code{set_charlist()} function is more complicated than
+@code{set_fieldlist()}.
+The idea here is to use @command{gawk}'s @code{FIELDWIDTHS} variable
+(@pxref{Constant Size}),
+which describes constant-width input. When using a character list, that is
+exactly what we have.
+
+Setting up @code{FIELDWIDTHS} is more complicated than simply listing the
+fields that need to be printed. We have to keep track of the fields to
+print and also the intervening characters that have to be skipped.
+For example, suppose you wanted characters 1 through 8, 15, and
+22 through 35. You would use @samp{-c 1-8,15,22-35}. The necessary value
+for @code{FIELDWIDTHS} is @code{@w{"8 6 1 6 14"}}. This yields five
+fields, and the fields to print
+are @code{$1}, @code{$3}, and @code{$5}.
+The intermediate fields are @dfn{filler},
+which is stuff in between the desired data.
+@code{flist} lists the fields to print, and @code{t} tracks the
+complete field list, including filler fields:
+
+@example
+@c file eg/prog/cut.awk
+function set_charlist( field, i, j, f, g, n, m, t,
+ filler, last, len)
+@{
+ field = 1 # count total fields
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) @{
+ if (index(f[i], "-") != 0) @{ # range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) @{
+ printf("cut: bad character list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ @}
+ len = g[2] - g[1] + 1
+ if (g[1] > 1) # compute length of filler
+ filler = g[1] - last - 1
+ else
+ filler = 0
+@group
+ if (filler)
+ t[field++] = filler
+@end group
+ t[field++] = len # length of field
+ last = g[2]
+ flist[j++] = field - 1
+ @} else @{
+ if (f[i] > 1)
+ filler = f[i] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = 1
+ last = f[i]
+ flist[j++] = field - 1
+ @}
+ @}
+ FIELDWIDTHS = join(t, 1, field - 1)
+ nfields = j - 1
+@}
+@c endfile
+@end example
+
+Next is the rule that processes the data. If the @option{-s} option
+is given, then @code{suppress} is true. The first @code{if} statement
+makes sure that the input record does have the field separator. If
+@command{cut} is processing fields, @code{suppress} is true, and the field
+separator character is not in the record, then the record is skipped.
+
+If the record is valid, then @command{gawk} has split the data
+into fields, either using the character in @code{FS} or using fixed-length
+fields and @code{FIELDWIDTHS}. The loop goes through the list of fields
+that should be printed. The corresponding field is printed if it contains data.
+If the next field also has data, then the separator character is
+written out between the fields:
+
+@example
+@c file eg/prog/cut.awk
+@{
+ if (by_fields && suppress && index($0, FS) == 0)
+ next
+
+ for (i = 1; i <= nfields; i++) @{
+ if ($flist[i] != "") @{
+ printf "%s", $flist[i]
+ if (i < nfields && $flist[i+1] != "")
+ printf "%s", OFS
+ @}
+ @}
+ print ""
+@}
+@c endfile
+@end example
+
+This version of @command{cut} relies on @command{gawk}'s @code{FIELDWIDTHS}
+variable to do the character-based cutting. It is possible in
+other @command{awk} implementations to use @code{substr()}
+(@pxref{String Functions}), but
+it is also extremely painful.
+The @code{FIELDWIDTHS} variable supplies an elegant solution to the problem
+of picking the input line apart by characters.
+
+
+@node Egrep Program
+@subsection Searching for Regular Expressions in Files
+
+@cindex regular expressions, searching for
+@cindex searching, files for regular expressions
+@cindex files, searching for regular expressions
+@cindex @command{egrep} utility
+The @command{egrep} utility searches files for patterns. It uses regular
+expressions that are almost identical to those available in @command{awk}
+(@pxref{Regexp}).
+You invoke it as follows:
+
+@display
+@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{}
+@end display
+
+The @var{pattern} is a regular expression. In typical usage, the regular
+expression is quoted to prevent the shell from expanding any of the
+special characters as @value{FN} wildcards. Normally, @command{egrep}
+prints the lines that matched. If multiple @value{FN}s are provided on
+the command line, each output line is preceded by the name of the file
+and a colon.
+
+The options to @command{egrep} are as follows:
+
+@table @code
+@item -c
+Print out a count of the lines that matched the pattern, instead of the
+lines themselves.
+
+@item -s
+Be silent. No output is produced and the exit value indicates whether
+the pattern was matched.
+
+@item -v
+Invert the sense of the test. @command{egrep} prints the lines that do
+@emph{not} match the pattern and exits successfully if the pattern is not
+matched.
+
+@item -i
+Ignore case distinctions in both the pattern and the input data.
+
+@item -l
+Only print (list) the names of the files that matched, not the lines that matched.
+
+@item -e @var{pattern}
+Use @var{pattern} as the regexp to match. The purpose of the @option{-e}
+option is to allow patterns that start with a @samp{-}.
+@end table
+
+This version uses the @code{getopt()} library function
+(@pxref{Getopt Function})
+and the file transition library program
+(@pxref{Filetrans Function}).
+
+The program begins with a descriptive comment and then a @code{BEGIN} rule
+that processes the command-line arguments with @code{getopt()}. The @option{-i}
+(ignore case) option is particularly easy with @command{gawk}; we just use the
+@code{IGNORECASE} predefined variable
+(@pxref{Built-in Variables}):
+
+@cindex @code{egrep.awk} program
+@example
+@c file eg/prog/egrep.awk
+# egrep.awk --- simulate egrep in awk
+#
+@c endfile
+@ignore
+@c file eg/prog/egrep.awk
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+
+@c endfile
+@end ignore
+@c file eg/prog/egrep.awk
+# Options:
+# -c count of lines
+# -s silent - use exit value
+# -v invert test, success if no match
+# -i ignore case
+# -l print filenames only
+# -e argument is pattern
+#
+# Requires getopt and file transition library functions
+
+BEGIN @{
+ while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) @{
+ if (c == "c")
+ count_only++
+ else if (c == "s")
+ no_print++
+ else if (c == "v")
+ invert++
+ else if (c == "i")
+ IGNORECASE = 1
+ else if (c == "l")
+ filenames_only++
+ else if (c == "e")
+ pattern = Optarg
+ else
+ usage()
+ @}
+@c endfile
+@end example
+
+Next comes the code that handles the @command{egrep}-specific behavior. If no
+pattern is supplied with @option{-e}, the first nonoption on the
+command line is used. The @command{awk} command-line arguments up to @code{ARGV[Optind]}
+are cleared, so that @command{awk} won't try to process them as files. If no
+files are specified, the standard input is used, and if multiple files are
+specified, we make sure to note this so that the @value{FN}s can precede the
+matched lines in the output:
+
+@example
+@c file eg/prog/egrep.awk
+ if (pattern == "")
+ pattern = ARGV[Optind++]
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+ if (Optind >= ARGC) @{
+ ARGV[1] = "-"
+ ARGC = 2
+ @} else if (ARGC - Optind > 1)
+ do_filenames++
+
+# if (IGNORECASE)
+# pattern = tolower(pattern)
+@}
+@c endfile
+@end example
+
+The last two lines are commented out, as they are not needed in
+@command{gawk}. They should be uncommented if you have to use another version
+of @command{awk}.
+
+The next set of lines should be uncommented if you are not using
+@command{gawk}. This rule translates all the characters in the input line
+into lowercase if the @option{-i} option is specified.@footnote{It
+also introduces a subtle bug;
+if a match happens, we output the translated line, not the original.}
+The rule is
+commented out as it is not necessary with @command{gawk}:
+
+@example
+@c file eg/prog/egrep.awk
+#@{
+# if (IGNORECASE)
+# $0 = tolower($0)
+#@}
+@c endfile
+@end example
+
+The @code{beginfile()} function is called by the rule in @file{ftrans.awk}
+when each new file is processed. In this case, it is very simple; all it
+does is initialize a variable @code{fcount} to zero. @code{fcount} tracks
+how many lines in the current file matched the pattern.
+Naming the parameter @code{junk} shows we know that @code{beginfile()}
+is called with a parameter, but that we're not interested in its value:
+
+@example
+@c file eg/prog/egrep.awk
+function beginfile(junk)
+@{
+ fcount = 0
+@}
+@c endfile
+@end example
+
+The @code{endfile()} function is called after each file has been processed.
+It affects the output only when the user wants a count of the number of lines that
+matched. @code{no_print} is true only if the exit status is desired.
+@code{count_only} is true if line counts are desired. @command{egrep}
+therefore only prints line counts if printing and counting are enabled.
+The output format must be adjusted depending upon the number of files to
+process. Finally, @code{fcount} is added to @code{total}, so that we
+know the total number of lines that matched the pattern:
+
+@example
+@c file eg/prog/egrep.awk
+function endfile(file)
+@{
+ if (! no_print && count_only) @{
+ if (do_filenames)
+ print file ":" fcount
+ else
+ print fcount
+ @}
+
+ total += fcount
+@}
+@c endfile
+@end example
+
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be
+@command{gawk}-specific. Additionally, this example was written before
+@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}.
+
+The following rule does most of the work of matching lines. The variable
+@code{matches} is true if the line matched the pattern. If the user
+wants lines that did not match, the sense of @code{matches} is inverted
+using the @samp{!} operator. @code{fcount} is incremented with the value of
+@code{matches}, which is either one or zero, depending upon a
+successful or unsuccessful match. If the line does not match, the
+@code{next} statement just moves on to the next record.
+
+A number of additional tests are made, but they are only done if we
+are not counting lines. First, if the user only wants exit status
+(@code{no_print} is true), then it is enough to know that @emph{one}
+line in this file matched, and we can skip on to the next file with
+@code{nextfile}. Similarly, if we are only printing @value{FN}s, we can
+print the @value{FN}, and then skip to the next file with @code{nextfile}.
+Finally, each line is printed, with a leading @value{FN} and colon
+if necessary:
+
+@cindex @code{!} (exclamation point), @code{!} operator
+@cindex exclamation point (@code{!}), @code{!} operator
+@example
+@c file eg/prog/egrep.awk
+@{
+ matches = ($0 ~ pattern)
+ if (invert)
+ matches = ! matches
+
+ fcount += matches # 1 or 0
+
+ if (! matches)
+ next
+
+ if (! count_only) @{
+ if (no_print)
+ nextfile
+
+ if (filenames_only) @{
+ print FILENAME
+ nextfile
+ @}
+
+ if (do_filenames)
+ print FILENAME ":" $0
+ else
+ print
+ @}
+@}
+@c endfile
+@end example
+
+The @code{END} rule takes care of producing the correct exit status. If
+there are no matches, the exit status is one; otherwise it is zero:
+
+@example
+@c file eg/prog/egrep.awk
+END @{
+ exit (total == 0)
+@}
+@c endfile
+@end example
+
+The @code{usage()} function prints a usage message in case of invalid options,
+and then exits:
+
+@example
+@c file eg/prog/egrep.awk
+function usage()
+@{
+ print("Usage: egrep [-csvil] [-e pat] [files ...]") > "/dev/stderr"
+ print("\n\tegrep [-csvil] pat [files ...]") > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@end example
+
+
+@node Id Program
+@subsection Printing Out User Information
+
+@cindex printing, user information
+@cindex users, information about, printing
+@cindex @command{id} utility
+The @command{id} utility lists a user's real and effective user ID numbers,
+real and effective group ID numbers, and the user's group set, if any.
+@command{id} only prints the effective user ID and group ID if they are
+different from the real ones. If possible, @command{id} also supplies the
+corresponding user and group names. The output might look like this:
+
+@example
+$ @kbd{id}
+@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
+@end example
+
+@cindex @code{PROCINFO} array, and user and group ID numbers
+This information is part of what is provided by @command{gawk}'s
+@code{PROCINFO} array (@pxref{Built-in Variables}).
+However, the @command{id} utility provides a more palatable output than just
+individual numbers.
+
+Here is a simple version of @command{id} written in @command{awk}.
+It uses the user database library functions
+(@pxref{Passwd Functions})
+and the group database library functions
+(@pxref{Group Functions}):
+
+The program is fairly straightforward. All the work is done in the
+@code{BEGIN} rule. The user and group ID numbers are obtained from
+@code{PROCINFO}.
+The code is repetitive. The entry in the user database for the real user ID
+number is split into parts at the @samp{:}. The name is the first field.
+Similar code is used for the effective user ID number and the group
+numbers:
+
+@cindex @code{id.awk} program
+@example
+@c file eg/prog/id.awk
+# id.awk --- implement id in awk
+#
+# Requires user and group library functions
+@c endfile
+@ignore
+@c file eg/prog/id.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised February 1996
+# Revised May 2014
+# Revised September 2014
+
+@c endfile
+@end ignore
+@c file eg/prog/id.awk
+# output is:
+# uid=12(foo) euid=34(bar) gid=3(baz) \
+# egid=5(blat) groups=9(nine),2(two),1(one)
+
+@group
+BEGIN @{
+ uid = PROCINFO["uid"]
+ euid = PROCINFO["euid"]
+ gid = PROCINFO["gid"]
+ egid = PROCINFO["egid"]
+@end group
+
+ printf("uid=%d", uid)
+ pw = getpwuid(uid)
+ pr_first_field(pw)
+
+ if (euid != uid) @{
+ printf(" euid=%d", euid)
+ pw = getpwuid(euid)
+ pr_first_field(pw)
+ @}
+
+ printf(" gid=%d", gid)
+ pw = getgrgid(gid)
+ pr_first_field(pw)
+
+ if (egid != gid) @{
+ printf(" egid=%d", egid)
+ pw = getgrgid(egid)
+ pr_first_field(pw)
+ @}
+
+ for (i = 1; ("group" i) in PROCINFO; i++) @{
+ if (i == 1)
+ printf(" groups=")
+ group = PROCINFO["group" i]
+ printf("%d", group)
+ pw = getgrgid(group)
+ pr_first_field(pw)
+ if (("group" (i+1)) in PROCINFO)
+ printf(",")
+ @}
+
+ print ""
+@}
+
+function pr_first_field(str, a)
+@{
+ if (str != "") @{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ @}
+@}
+@c endfile
+@end example
+
+The test in the @code{for} loop is worth noting.
+Any supplementary groups in the @code{PROCINFO} array have the
+indices @code{"group1"} through @code{"group@var{N}"} for some
+@var{N} (i.e., the total number of supplementary groups).
+However, we don't know in advance how many of these groups
+there are.
+
+This loop works by starting at one, concatenating the value with
+@code{"group"}, and then using @code{in} to see if that value is
+in the array (@pxref{Reference to Elements}). Eventually, @code{i} is incremented past
+the last group in the array and the loop exits.
+
+The loop is also correct if there are @emph{no} supplementary
+groups; then the condition is false the first time it's
+tested, and the loop body never executes.
+
+The @code{pr_first_field()} function simply isolates out some
+code that is used repeatedly, making the whole program
+shorter and cleaner. In particular, moving the check for
+the empty string into this function saves several lines of code.
+
+
+@node Split Program
+@subsection Splitting a Large File into Pieces
+
+@c FIXME: One day, update to current POSIX version of split
+
+@cindex files, splitting
+@cindex @code{split} utility
+The @command{split} program splits large text files into smaller pieces.
+Usage is as follows:@footnote{This is the traditional usage. The
+POSIX usage is different, but not relevant for what the program
+aims to demonstrate.}
+
+@display
+@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}]
+@end display
+
+By default,
+the output files are named @file{xaa}, @file{xab}, and so on. Each file has
+1,000 lines in it, with the likely exception of the last file. To change the
+number of lines in each file, supply a number on the command line
+preceded with a minus (e.g., @samp{-500} for files with 500 lines in them
+instead of 1,000). To change the name of the output files to something like
+@file{myfileaa}, @file{myfileab}, and so on, supply an additional
+argument that specifies the @value{FN} prefix.
+
+Here is a version of @command{split} in @command{awk}. It uses the
+@code{ord()} and @code{chr()} functions presented in
+@ref{Ordinal Functions}.
+
+The program first sets its defaults, and then tests to make sure there are
+not too many arguments. It then looks at each argument in turn. The
+first argument could be a minus sign followed by a number. If it is, this happens
+to look like a negative number, so it is made positive, and that is the
+count of lines. The @value{DF} name is skipped over and the final argument
+is used as the prefix for the output @value{FN}s:
+
+@cindex @code{split.awk} program
+@example
+@c file eg/prog/split.awk
+# split.awk --- do split in awk
+#
+# Requires ord() and chr() library functions
+@c endfile
+@ignore
+@c file eg/prog/split.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised slightly, May 2014
+
+@c endfile
+@end ignore
+@c file eg/prog/split.awk
+# usage: split [-count] [file] [outname]
+
+BEGIN @{
+ outfile = "x" # default
+ count = 1000
+ if (ARGC > 4)
+ usage()
+
+ i = 1
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{
+ count = -ARGV[i]
+ ARGV[i] = ""
+ i++
+ @}
+ # test argv in case reading from stdin instead of file
+ if (i in ARGV)
+ i++ # skip datafile name
+ if (i in ARGV) @{
+ outfile = ARGV[i]
+ ARGV[i] = ""
+ @}
+
+ s1 = s2 = "a"
+ out = (outfile s1 s2)
+@}
+@c endfile
+@end example
+
+The next rule does most of the work. @code{tcount} (temporary count) tracks
+how many lines have been printed to the output file so far. If it is greater
+than @code{count}, it is time to close the current file and start a new one.
+@code{s1} and @code{s2} track the current suffixes for the @value{FN}. If
+they are both @samp{z}, the file is just too big. Otherwise, @code{s1}
+moves to the next letter in the alphabet and @code{s2} starts over again at
+@samp{a}:
+
+@c else on separate line here for page breaking
+@example
+@c file eg/prog/split.awk
+@{
+ if (++tcount > count) @{
+ close(out)
+ if (s2 == "z") @{
+ if (s1 == "z") @{
+ printf("split: %s is too large to split\n",
+ FILENAME) > "/dev/stderr"
+ exit 1
+ @}
+ s1 = chr(ord(s1) + 1)
+ s2 = "a"
+ @}
+@group
+ else
+ s2 = chr(ord(s2) + 1)
+@end group
+ out = (outfile s1 s2)
+ tcount = 1
+ @}
+ print > out
+@}
+@c endfile
+@end example
+
+@noindent
+The @code{usage()} function simply prints an error message and exits:
+
+@example
+@c file eg/prog/split.awk
+function usage()
+@{
+ print("usage: split [-num] [file] [outname]") > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@end example
+
+This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
+instead of doing it in an @code{END} rule.
+It also assumes that letters are contiguous in the character set,
+which isn't true for EBCDIC systems.
+
+@ifset FOR_PRINT
+You might want to consider how to eliminate the use of
+@code{ord()} and @code{chr()}; this can be done in such a
+way as to solve the EBCDIC issue as well.
+@end ifset
+
+
+@node Tee Program
+@subsection Duplicating Output into Multiple Files
+
+@cindex files, multiple@comma{} duplicating output into
+@cindex output, duplicating into files
+@cindex @code{tee} utility
+The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
+its standard input to its standard output and also duplicates it to the
+files named on the command line. Its usage is as follows:
+
+@display
+@command{tee} [@option{-a}] @var{file} @dots{}
+@end display
+
+The @option{-a} option tells @code{tee} to append to the named files, instead of
+truncating them and starting over.
+
+The @code{BEGIN} rule first makes a copy of all the command-line arguments
+into an array named @code{copy}.
+@code{ARGV[0]} is not needed, so it is not copied.
+@code{tee} cannot use @code{ARGV} directly, because @command{awk} attempts to
+process each @value{FN} in @code{ARGV} as input data.
+
+@cindex flag variables
+If the first argument is @option{-a}, then the flag variable
+@code{append} is set to true, and both @code{ARGV[1]} and
+@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no
+@value{FN}s were supplied and @code{tee} prints a usage message and exits.
+Finally, @command{awk} is forced to read the standard input by setting
+@code{ARGV[1]} to @code{"-"} and @code{ARGC} to two:
+
+@cindex @code{tee.awk} program
+@example
+@c file eg/prog/tee.awk
+# tee.awk --- tee in awk
+#
+# Copy standard input to all named output files.
+# Append content if -a option is supplied.
+#
+@c endfile
+@ignore
+@c file eg/prog/tee.awk
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised December 1995
+
+@c endfile
+@end ignore
+@c file eg/prog/tee.awk
+BEGIN @{
+ for (i = 1; i < ARGC; i++)
+ copy[i] = ARGV[i]
+
+ if (ARGV[1] == "-a") @{
+ append = 1
+ delete ARGV[1]
+ delete copy[1]
+ ARGC--
+ @}
+ if (ARGC < 2) @{
+ print "usage: tee [-a] file ..." > "/dev/stderr"
+ exit 1
+ @}
+ ARGV[1] = "-"
+ ARGC = 2
+@}
+@c endfile
+@end example
+
+The following single rule does all the work. Because there is no pattern, it is
+executed for each line of input. The body of the rule simply prints the
+line into each file on the command line, and then to the standard output:
+
+@example
+@c file eg/prog/tee.awk
+@{
+ # moving the if outside the loop makes it run faster
+ if (append)
+ for (i in copy)
+ print >> copy[i]
+ else
+ for (i in copy)
+ print > copy[i]
+ print
+@}
+@c endfile
+@end example
+
+@noindent
+It is also possible to write the loop this way:
+
+@example
+for (i in copy)
+ if (append)
+ print >> copy[i]
+ else
+ print > copy[i]
+@end example
+
+@noindent
+This is more concise, but it is also less efficient. The @samp{if} is
+tested for each record and for each output file. By duplicating the loop
+body, the @samp{if} is only tested once for each input record. If there are
+@var{N} input records and @var{M} output files, the first method only
+executes @var{N} @samp{if} statements, while the second executes
+@var{N}@code{*}@var{M} @samp{if} statements.
+
+Finally, the @code{END} rule cleans up by closing all the output files:
+
+@example
+@c file eg/prog/tee.awk
+END @{
+ for (i in copy)
+ close(copy[i])
+@}
+@c endfile
+@end example
+
+@node Uniq Program
+@subsection Printing Nonduplicated Lines of Text
+
+@c FIXME: One day, update to current POSIX version of uniq
+
+@cindex printing, unduplicated lines of text
+@cindex text@comma{} printing, unduplicated lines of
+@cindex @command{uniq} utility
+The @command{uniq} utility reads sorted lines of data on its standard
+input, and by default removes duplicate lines. In other words, it only
+prints unique lines---hence the name. @command{uniq} has a number of
+options. The usage is as follows:
+
+@display
+@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]]
+@end display
+
+The options for @command{uniq} are:
+
+@table @code
+@item -d
+Print only repeated (duplicated) lines.
+
+@item -u
+Print only nonrepeated (unique) lines.
+
+@item -c
+Count lines. This option overrides @option{-d} and @option{-u}. Both repeated
+and nonrepeated lines are counted.
+
+@item -@var{n}
+Skip @var{n} fields before comparing lines. The definition of fields
+is similar to @command{awk}'s default: nonwhitespace characters separated
+by runs of spaces and/or TABs.
+
+@item +@var{n}
+Skip @var{n} characters before comparing lines. Any fields specified with
+@samp{-@var{n}} are skipped first.
+
+@item @var{inputfile}
+Data is read from the input file named on the command line, instead of from
+the standard input.
+
+@item @var{outputfile}
+The generated output is sent to the named output file, instead of to the
+standard output.
+@end table
+
+Normally @command{uniq} behaves as if both the @option{-d} and
+@option{-u} options are provided.
+
+@command{uniq} uses the
+@code{getopt()} library function
+(@pxref{Getopt Function})
+and the @code{join()} library function
+(@pxref{Join Function}).
+
+The program begins with a @code{usage()} function and then a brief outline of
+the options and their meanings in comments.
+The @code{BEGIN} rule deals with the command-line arguments and options. It
+uses a trick to get @code{getopt()} to handle options of the form @samp{-25},
+treating such an option as the option letter @samp{2} with an argument of
+@samp{5}. If indeed two or more digits are supplied (@code{Optarg} looks
+like a number), @code{Optarg} is
+concatenated with the option digit and then the result is added to zero to make
+it into a number. If there is only one digit in the option, then
+@code{Optarg} is not needed. In this case, @code{Optind} must be decremented so that
+@code{getopt()} processes it next time. This code is admittedly a bit
+tricky.
+
+If no options are supplied, then the default is taken, to print both
+repeated and nonrepeated lines. The output file, if provided, is assigned
+to @code{outputfile}. Early on, @code{outputfile} is initialized to the
+standard output, @file{/dev/stdout}:
+
+@cindex @code{uniq.awk} program
+@example
+@c file eg/prog/uniq.awk
+@group
+# uniq.awk --- do uniq in awk
+#
+# Requires getopt() and join() library functions
+@end group
+@c endfile
+@ignore
+@c file eg/prog/uniq.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+@c endfile
+@end ignore
+@c file eg/prog/uniq.awk
+
+function usage()
+@{
+ print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
+ exit 1
+@}
+
+# -c count lines. overrides -d and -u
+# -d only repeated lines
+# -u only nonrepeated lines
+# -n skip n fields
+# +n skip n characters, skip fields first
+
+BEGIN @{
+ count = 1
+ outputfile = "/dev/stdout"
+ opts = "udc0:1:2:3:4:5:6:7:8:9:"
+ while ((c = getopt(ARGC, ARGV, opts)) != -1) @{
+ if (c == "u")
+ non_repeated_only++
+ else if (c == "d")
+ repeated_only++
+ else if (c == "c")
+ do_count++
+ else if (index("0123456789", c) != 0) @{
+ # getopt() requires args to options
+ # this messes us up for things like -5
+ if (Optarg ~ /^[[:digit:]]+$/)
+ fcount = (c Optarg) + 0
+ else @{
+ fcount = c + 0
+ Optind--
+ @}
+ @} else
+ usage()
+ @}
+
+ if (ARGV[Optind] ~ /^\+[[:digit:]]+$/) @{
+ charcount = substr(ARGV[Optind], 2) + 0
+ Optind++
+ @}
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ if (repeated_only == 0 && non_repeated_only == 0)
+ repeated_only = non_repeated_only = 1
+
+ if (ARGC - Optind == 2) @{
+ outputfile = ARGV[ARGC - 1]
+ ARGV[ARGC - 1] = ""
+ @}
+@}
+@c endfile
+@end example
+
+The following function, @code{are_equal()}, compares the current line,
+@code{$0}, to the previous line, @code{last}. It handles skipping fields
+and characters. If no field count and no character count are specified,
+@code{are_equal()} returns one or zero depending upon the result of a
+simple string comparison of @code{last} and @code{$0}.
+
+Otherwise, things get more complicated. If fields have to be skipped,
+each line is broken into an array using @code{split()} (@pxref{String
+Functions}); the desired fields are then joined back into a line
+using @code{join()}. The joined lines are stored in @code{clast} and
+@code{cline}. If no fields are skipped, @code{clast} and @code{cline}
+are set to @code{last} and @code{$0}, respectively. Finally, if
+characters are skipped, @code{substr()} is used to strip off the leading
+@code{charcount} characters in @code{clast} and @code{cline}. The two
+strings are then compared and @code{are_equal()} returns the result:
+
+@example
+@c file eg/prog/uniq.awk
+function are_equal( n, m, clast, cline, alast, aline)
+@{
+ if (fcount == 0 && charcount == 0)
+ return (last == $0)
+
+ if (fcount > 0) @{
+ n = split(last, alast)
+ m = split($0, aline)
+ clast = join(alast, fcount+1, n)
+ cline = join(aline, fcount+1, m)
+ @} else @{
+ clast = last
+ cline = $0
+ @}
+ if (charcount) @{
+ clast = substr(clast, charcount + 1)
+ cline = substr(cline, charcount + 1)
+ @}
+
+ return (clast == cline)
+@}
+@c endfile
+@end example
+
+The following two rules are the body of the program. The first one is
+executed only for the very first line of data. It sets @code{last} equal to
+@code{$0}, so that subsequent lines of text have something to be compared to.
+
+The second rule does the work. The variable @code{equal} is one or zero,
+depending upon the results of @code{are_equal()}'s comparison. If @command{uniq}
+is counting repeated lines, and the lines are equal, then it increments the @code{count} variable.
+Otherwise, it prints the line and resets @code{count},
+because the two lines are not equal.
+
+If @command{uniq} is not counting, and if the lines are equal, @code{count} is incremented.
+Nothing is printed, as the point is to remove duplicates.
+Otherwise, if @command{uniq} is counting repeated lines and more than
+one line is seen, or if @command{uniq} is counting nonrepeated lines
+and only one line is seen, then the line is printed, and @code{count}
+is reset.
+
+Finally, similar logic is used in the @code{END} rule to print the final
+line of input data:
+
+@example
+@c file eg/prog/uniq.awk
+NR == 1 @{
+ last = $0
+ next
+@}
+
+@{
+ equal = are_equal()
+
+ if (do_count) @{ # overrides -d and -u
+ if (equal)
+ count++
+ else @{
+ printf("%4d %s\n", count, last) > outputfile
+ last = $0
+ count = 1 # reset
+ @}
+ next
+ @}
+
+ if (equal)
+ count++
+ else @{
+ if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+ last = $0
+ count = 1
+ @}
+@}
+
+END @{
+ if (do_count)
+ printf("%4d %s\n", count, last) > outputfile
+ else if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+ close(outputfile)
+@}
+@c endfile
+@end example
+
+@c FIXME: Include this?
+@ignore
+This program does not follow our recommended convention of naming
+global variables with a leading capital letter. Doing that would
+make the program a little easier to follow.
+@end ignore
+
+@ifset FOR_PRINT
+The logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state machines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Consider how to rewrite the logic to follow this
+suggestion.
+@end ifset
+
+
+
+@node Wc Program
+@subsection Counting Things
+
+@c FIXME: One day, update to current POSIX version of wc
+
+@cindex counting
+@cindex input files, counting elements in
+@cindex words, counting
+@cindex characters, counting
+@cindex lines, counting
+@cindex @command{wc} utility
+The @command{wc} (word count) utility counts lines, words, and characters in
+one or more input files. Its usage is as follows:
+
+@display
+@command{wc} [@option{-lwc}] [@var{files} @dots{}]
+@end display
+
+If no files are specified on the command line, @command{wc} reads its standard
+input. If there are multiple files, it also prints total counts for all
+the files. The options and their meanings are as follows:
+
+@table @code
+@item -l
+Count only lines.
+
+@item -w
+Count only words.
+A ``word'' is a contiguous sequence of nonwhitespace characters, separated
+by spaces and/or TABs. Luckily, this is the normal way @command{awk} separates
+fields in its input data.
+
+@item -c
+Count only characters.
+@end table
+
+Implementing @command{wc} in @command{awk} is particularly elegant,
+because @command{awk} does a lot of the work for us; it splits lines into
+words (i.e., fields) and counts them, it counts lines (i.e., records),
+and it can easily tell us how long a line is.
+
+This program uses the @code{getopt()} library function
+(@pxref{Getopt Function})
+and the file-transition functions
+(@pxref{Filetrans Function}).
+
+This version has one notable difference from traditional versions of
+@command{wc}: it always prints the counts in the order lines, words,
+and characters. Traditional versions note the order of the @option{-l},
+@option{-w}, and @option{-c} options on the command line, and print the
+counts in that order.
+
+The @code{BEGIN} rule does the argument processing. The variable
+@code{print_total} is true if more than one file is named on the
+command line:
+
+@cindex @code{wc.awk} program
+@example
+@c file eg/prog/wc.awk
+# wc.awk --- count lines, words, characters
+@c endfile
+@ignore
+@c file eg/prog/wc.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+@c endfile
+@end ignore
+@c file eg/prog/wc.awk
+
+# Options:
+# -l only count lines
+# -w only count words
+# -c only count characters
+#
+# Default is to count lines, words, characters
+#
+# Requires getopt() and file transition library functions
+
+BEGIN @{
+ # let getopt() print a message about
+ # invalid options. we ignore them
+ while ((c = getopt(ARGC, ARGV, "lwc")) != -1) @{
+ if (c == "l")
+ do_lines = 1
+ else if (c == "w")
+ do_words = 1
+ else if (c == "c")
+ do_chars = 1
+ @}
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ # if no options, do all
+ if (! do_lines && ! do_words && ! do_chars)
+ do_lines = do_words = do_chars = 1
+
+ print_total = (ARGC - i > 2)
+@}
+@c endfile
+@end example
+
+The @code{beginfile()} function is simple; it just resets the counts of lines,
+words, and characters to zero, and saves the current @value{FN} in
+@code{fname}:
+
+@example
+@c file eg/prog/wc.awk
+function beginfile(file)
+@{
+ lines = words = chars = 0
+ fname = FILENAME
+@}
+@c endfile
+@end example
+
+The @code{endfile()} function adds the current file's numbers to the
+running totals of lines, words, and characters. It then prints out those
+numbers for the file that was just read. It relies on @code{beginfile()}
+to reset the numbers for the following @value{DF}:
+
+@example
+@c file eg/prog/wc.awk
+function endfile(file)
+@{
+ tlines += lines
+ twords += words
+ tchars += chars
+ if (do_lines)
+ printf "\t%d", lines
+@group
+ if (do_words)
+ printf "\t%d", words
+@end group
+ if (do_chars)
+ printf "\t%d", chars
+ printf "\t%s\n", fname
+@}
+@c endfile
+@end example
+
+There is one rule that is executed for each line. It adds the length of
+the record, plus one, to @code{chars}.@footnote{Because @command{gawk}
+understands multibyte locales, this code counts characters, not bytes.}
+Adding one plus the record length
+is needed because the newline character separating records (the value
+of @code{RS}) is not part of the record itself, and thus not included
+in its length. Next, @code{lines} is incremented for each line read,
+and @code{words} is incremented by the value of @code{NF}, which is the
+number of ``words'' on this line:
+
+@example
+@c file eg/prog/wc.awk
+# do per line
+@{
+ chars += length($0) + 1 # get newline
+ lines++
+ words += NF
+@}
+@c endfile
+@end example
+
+Finally, the @code{END} rule simply prints the totals for all the files:
+
+@example
+@c file eg/prog/wc.awk
+END @{
+ if (print_total) @{
+ if (do_lines)
+ printf "\t%d", tlines
+ if (do_words)
+ printf "\t%d", twords
+ if (do_chars)
+ printf "\t%d", tchars
+ print "\ttotal"
+ @}
+@}
+@c endfile
+@end example
+
+@node Miscellaneous Programs
+@section A Grab Bag of @command{awk} Programs
+
+This @value{SECTION} is a large ``grab bag'' of miscellaneous programs.
+We hope you find them both interesting and enjoyable.
+
+@menu
+* Dupword Program:: Finding duplicated words in a document.
+* Alarm Program:: An alarm clock.
+* Translate Program:: A program similar to the @command{tr} utility.
+* Labels Program:: Printing mailing labels.
+* Word Sorting:: A program to produce a word usage count.
+* History Sorting:: Eliminating duplicate entries from a history
+ file.
+* Extract Program:: Pulling out programs from Texinfo source
+ files.
+* Simple Sed:: A Simple Stream Editor.
+* Igawk Program:: A wrapper for @command{awk} that includes
+ files.
+* Anagram Program:: Finding anagrams from a dictionary.
+* Signature Program:: People do amazing things with too much time on
+ their hands.
+@end menu
+
+@node Dupword Program
+@subsection Finding Duplicated Words in a Document
+
+@cindex words, duplicate@comma{} searching for
+@cindex searching, for words
+@cindex documents@comma{} searching
+A common error when writing large amounts of prose is to accidentally
+duplicate words. Typically you will see this in text as something like ``the
+the program does the following@dots{}'' When the text is online, often
+the duplicated words occur at the end of one line and the
+@iftex
+the
+@end iftex
+beginning of
+another, making them very difficult to spot.
+@c as here!
+
+This program, @file{dupword.awk}, scans through a file one line at a time
+and looks for adjacent occurrences of the same word. It also saves the last
+word on a line (in the variable @code{prev}) for comparison with the first
+word on the next line.
+
+@cindex Texinfo
+The first two statements make sure that the line is all lowercase,
+so that, for example, ``The'' and ``the'' compare equal to each other.
+The next statement replaces nonalphanumeric and nonwhitespace characters
+with spaces, so that punctuation does not affect the comparison either.
+The characters are replaced with spaces so that formatting controls
+don't create nonsense words (e.g., the Texinfo @samp{@@code@{NF@}}
+becomes @samp{codeNF} if punctuation is simply deleted). The record is
+then resplit into fields, yielding just the actual words on the line,
+and ensuring that there are no empty fields.
+
+If there are no fields left after removing all the punctuation, the
+current record is skipped. Otherwise, the program loops through each
+word, comparing it to the previous one:
+
+@cindex @code{dupword.awk} program
+@example
+@c file eg/prog/dupword.awk
+# dupword.awk --- find duplicate words in text
+@c endfile
+@ignore
+@c file eg/prog/dupword.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# December 1991
+# Revised October 2000
+
+@c endfile
+@end ignore
+@c file eg/prog/dupword.awk
+@{
+ $0 = tolower($0)
+ gsub(/[^[:alnum:][:blank:]]/, " ");
+ $0 = $0 # re-split
+ if (NF == 0)
+ next
+ if ($1 == prev)
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $1)
+ for (i = 2; i <= NF; i++)
+ if ($i == $(i-1))
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $i)
+ prev = $NF
+@}
+@c endfile
+@end example
+
+@node Alarm Program
+@subsection An Alarm Clock Program
+@cindex insomnia, cure for
+@cindex Robbins, Arnold
+@quotation
+@i{Nothing cures insomnia like a ringing alarm clock.}
+@author Arnold Robbins
+@end quotation
+@cindex Quanstrom, Erik
+@ignore
+Date: Sat, 15 Feb 2014 16:47:09 -0500
+Subject: Re: 9atom install question
+Message-ID: <l2jcvx6j6mey60xnrkb0hhob.1392500829294@email.android.com>
+From: Erik Quanstrom <quanstro@quanstro.net>
+To: Aharon Robbins <arnold@skeeve.com>
+
+yes.
+
+- erik
+
+Aharon Robbins <arnold@skeeve.com> wrote:
+
+>> sleep is for web developers.
+>
+>Can I quote you, in the gawk manual?
+>
+>Thanks,
+>
+>Arnold
+@end ignore
+@quotation
+@i{Sleep is for web developers.}
+@author Erik Quanstrom
+@end quotation
+
+@cindex time, alarm clock example program
+@cindex alarm clock example program
+The following program is a simple ``alarm clock'' program.
+You give it a time of day and an optional message. At the specified time,
+it prints the message on the standard output. In addition, you can give it
+the number of times to repeat the message as well as a delay between
+repetitions.
+
+This program uses the @code{getlocaltime()} function from
+@ref{Getlocaltime Function}.
+
+All the work is done in the @code{BEGIN} rule. The first part is argument
+checking and setting of defaults: the delay, the count, and the message to
+print. If the user supplied a message without the ASCII BEL
+character (known as the ``alert'' character, @code{"\a"}), then it is added to
+the message. (On many systems, printing the ASCII BEL generates an
+audible alert. Thus when the alarm goes off, the system calls attention
+to itself in case the user is not looking at the computer.)
+Just for a change, this program uses a @code{switch} statement
+(@pxref{Switch Statement}), but the processing could be done with a series of
+@code{if}-@code{else} statements instead.
+Here is the program:
+
+@cindex @code{alarm.awk} program
+@example
+@c file eg/prog/alarm.awk
+# alarm.awk --- set an alarm
+#
+# Requires getlocaltime() library function
+@c endfile
+@ignore
+@c file eg/prog/alarm.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised December 2010
+
+@c endfile
+@end ignore
+@c file eg/prog/alarm.awk
+# usage: alarm time [ "message" [ count [ delay ] ] ]
+
+BEGIN @{
+ # Initial argument sanity checking
+ usage1 = "usage: alarm time ['message' [count [delay]]]"
+ usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
+
+ if (ARGC < 2) @{
+ print usage1 > "/dev/stderr"
+ print usage2 > "/dev/stderr"
+ exit 1
+ @}
+ switch (ARGC) @{
+ case 5:
+ delay = ARGV[4] + 0
+ # fall through
+ case 4:
+ count = ARGV[3] + 0
+ # fall through
+ case 3:
+ message = ARGV[2]
+ break
+ default:
+ if (ARGV[1] !~ /[[:digit:]]?[[:digit:]]:[[:digit:]]@{2@}/) @{
+ print usage1 > "/dev/stderr"
+ print usage2 > "/dev/stderr"
+ exit 1
+ @}
+ break
+ @}
+
+ # set defaults for once we reach the desired time
+ if (delay == 0)
+ delay = 180 # 3 minutes
+@group
+ if (count == 0)
+ count = 5
+@end group
+ if (message == "")
+ message = sprintf("\aIt is now %s!\a", ARGV[1])
+ else if (index(message, "\a") == 0)
+ message = "\a" message "\a"
+@c endfile
+@end example
+
+The next @value{SECTION} of code turns the alarm time into hours and minutes,
+converts it (if necessary) to a 24-hour clock, and then turns that
+time into a count of the seconds since midnight. Next it turns the current
+time into a count of seconds since midnight. The difference between the two
+is how long to wait before setting off the alarm:
+
+@example
+@c file eg/prog/alarm.awk
+ # split up alarm time
+ split(ARGV[1], atime, ":")
+ hour = atime[1] + 0 # force numeric
+ minute = atime[2] + 0 # force numeric
+
+ # get current broken down time
+ getlocaltime(now)
+
+ # if time given is 12-hour hours and it's after that
+ # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m.,
+ # then add 12 to real hour
+ if (hour < 12 && now["hour"] > hour)
+ hour += 12
+
+ # set target time in seconds since midnight
+ target = (hour * 60 * 60) + (minute * 60)
+
+ # get current time in seconds since midnight
+ current = (now["hour"] * 60 * 60) + \
+ (now["minute"] * 60) + now["second"]
+
+ # how long to sleep for
+ naptime = target - current
+ if (naptime <= 0) @{
+ print "alarm: time is in the past!" > "/dev/stderr"
+ exit 1
+ @}
+@c endfile
+@end example
+
+@cindex @command{sleep} utility
+Finally, the program uses the @code{system()} function
+(@pxref{I/O Functions})
+to call the @command{sleep} utility. The @command{sleep} utility simply pauses
+for the given number of seconds. If the exit status is not zero,
+the program assumes that @command{sleep} was interrupted and exits. If
+@command{sleep} exited with an OK status (zero), then the program prints the
+message in a loop, again using @command{sleep} to delay for however many
+seconds are necessary:
+
+@example
+@c file eg/prog/alarm.awk
+ # zzzzzz..... go away if interrupted
+ if (system(sprintf("sleep %d", naptime)) != 0)
+ exit 1
+
+ # time to notify!
+ command = sprintf("sleep %d", delay)
+ for (i = 1; i <= count; i++) @{
+ print message
+ # if sleep command interrupted, go away
+ if (system(command) != 0)
+ break
+ @}
+
+ exit 0
+@}
+@c endfile
+@end example
+
+@node Translate Program
+@subsection Transliterating Characters
+
+@cindex characters, transliterating
+@cindex @command{tr} utility
+The system @command{tr} utility transliterates characters. For example, it is
+often used to map uppercase letters into lowercase for further processing:
+
+@example
+@var{generate data} | tr 'A-Z' 'a-z' | @var{process data} @dots{}
+@end example
+
+@command{tr} requires two lists of characters.@footnote{On some older
+systems, including Solaris, the system version of @command{tr} may require
+that the lists be written as range expressions enclosed in square brackets
+(@samp{[a-z]}) and quoted, to prevent the shell from attempting a
+@value{FN} expansion. This is not a feature.} When processing the input, the
+first character in the first list is replaced with the first character
+in the second list, the second character in the first list is replaced
+with the second character in the second list, and so on. If there are
+more characters in the ``from'' list than in the ``to'' list, the last
+character of the ``to'' list is used for the remaining characters in the
+``from'' list.
+
+Once upon a time,
+@c early or mid-1989!
+a user proposed adding a transliteration function
+to @command{gawk}.
+@c Wishing to avoid gratuitous new features,
+@c at least theoretically
+The following program was written to
+prove that character transliteration could be done with a user-level
+function. This program is not as complete as the system @command{tr} utility
+but it does most of the job.
+
+The @command{translate} program was written long before @command{gawk}
+acquired the ability to split each character in a string into separate
+array elements. Thus, it makes repeated use of the @code{substr()},
+@code{index()}, and @code{gsub()} built-in functions (@pxref{String
+Functions}). There are two functions. The first, @code{stranslate()},
+takes three arguments:
+
+@table @code
+@item from
+A list of characters from which to translate.
+
+@item to
+A list of characters to which to translate.
+
+@item target
+The string on which to do the translation.
+@end table
+
+Associative arrays make the translation part fairly easy. @code{t_ar} holds
+the ``to'' characters, indexed by the ``from'' characters. Then a simple
+loop goes through @code{from}, one character at a time. For each character
+in @code{from}, if the character appears in @code{target},
+it is replaced with the corresponding @code{to} character.
+
+The @code{translate()} function calls @code{stranslate()} using @code{$0}
+as the target. The main program sets two global variables, @code{FROM} and
+@code{TO}, from the command line, and then changes @code{ARGV} so that
+@command{awk} reads from the standard input.
+
+Finally, the processing rule simply calls @code{translate()} for each record:
+
+@cindex @code{translate.awk} program
+@example
+@c file eg/prog/translate.awk
+# translate.awk --- do tr-like stuff
+@c endfile
+@ignore
+@c file eg/prog/translate.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# August 1989
+# February 2009 - bug fix
+
+@c endfile
+@end ignore
+@c file eg/prog/translate.awk
+# Bugs: does not handle things like: tr A-Z a-z, it has
+# to be spelled out. However, if `to' is shorter than `from',
+# the last character in `to' is used for the rest of `from'.
+
+function stranslate(from, to, target, lf, lt, ltarget, t_ar, i, c,
+ result)
+@{
+ lf = length(from)
+ lt = length(to)
+ ltarget = length(target)
+ for (i = 1; i <= lt; i++)
+ t_ar[substr(from, i, 1)] = substr(to, i, 1)
+ if (lt < lf)
+ for (; i <= lf; i++)
+ t_ar[substr(from, i, 1)] = substr(to, lt, 1)
+ for (i = 1; i <= ltarget; i++) @{
+ c = substr(target, i, 1)
+ if (c in t_ar)
+ c = t_ar[c]
+ result = result c
+ @}
+ return result
+@}
+
+function translate(from, to)
+@{
+ return $0 = stranslate(from, to, $0)
+@}
+
+# main program
+BEGIN @{
+@group
+ if (ARGC < 3) @{
+ print "usage: translate from to" > "/dev/stderr"
+ exit
+ @}
+@end group
+ FROM = ARGV[1]
+ TO = ARGV[2]
+ ARGC = 2
+ ARGV[1] = "-"
+@}
+
+@{
+ translate(FROM, TO)
+ print
+@}
+@c endfile
+@end example
+
+It is possible to do character transliteration in a user-level
+function, but it is not necessarily efficient, and we (the @command{gawk}
+developers) started to consider adding a built-in function. However,
+shortly after writing this program, we learned that Brian Kernighan
+had added the @code{toupper()} and @code{tolower()} functions to his
+@command{awk} (@pxref{String Functions}). These functions handle the
+vast majority of the cases where character transliteration is necessary,
+and so we chose to simply add those functions to @command{gawk} as well
+and then leave well enough alone.
+
+An obvious improvement to this program would be to set up the
+@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
+assumes that the ``from'' and ``to'' lists
+will never change throughout the lifetime of the program.
+
+Another obvious improvement is to enable the use of ranges,
+such as @samp{a-z}, as allowed by the @command{tr} utility.
+Look at the code for @file{cut.awk} (@pxref{Cut Program})
+for inspiration.
+
+
+@node Labels Program
+@subsection Printing Mailing Labels
+
+@cindex printing, mailing labels
+@cindex mailing labels@comma{} printing
+Here is a ``real world''@footnote{``Real world'' is defined as
+``a program actually used to get something done.''}
+program. This
+script reads lists of names and
+addresses and generates mailing labels. Each page of labels has 20 labels
+on it, two across and 10 down. The addresses are guaranteed to be no more
+than five lines of data. Each address is separated from the next by a blank
+line.
+
+The basic idea is to read 20 labels worth of data. Each line of each label
+is stored in the @code{line} array. The single rule takes care of filling
+the @code{line} array and printing the page when 20 labels have been read.
+
+The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
+@command{awk} splits records at blank lines
+(@pxref{Records}).
+It sets @code{MAXLINES} to 100, because 100 is the maximum number
+of lines on the page
+@iftex
+(@math{20 @cdot 5 = 100}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+(20 * 5 = 100).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+(20 &sdot; 5 = 100). @c
+@end docbook
+
+Most of the work is done in the @code{printpage()} function.
+The label lines are stored sequentially in the @code{line} array. But they
+have to print horizontally; @code{line[1]} next to @code{line[6]},
+@code{line[2]} next to @code{line[7]}, and so on. Two loops
+accomplish this. The outer loop, controlled by @code{i}, steps through
+every 10 lines of data; this is each row of labels. The inner loop,
+controlled by @code{j}, goes through the lines within the row.
+As @code{j} goes from 0 to 4, @samp{i+j} is the @code{j}-th line in
+the row, and @samp{i+j+5} is the entry next to it. The output ends up
+looking something like this:
+
+@example
+line 1 line 6
+line 2 line 7
+line 3 line 8
+line 4 line 9
+line 5 line 10
+@dots{}
+@end example
+
+@noindent
+The @code{printf} format string @samp{%-41s} left-aligns
+the data and prints it within a fixed-width field.
+
+As a final note, an extra blank line is printed at lines 21 and 61, to keep
+the output lined up on the labels. This is dependent on the particular
+brand of labels in use when the program was written. You will also note
+that there are two blank lines at the top and two blank lines at the bottom.
+
+The @code{END} rule arranges to flush the final page of labels; there may
+not have been an even multiple of 20 labels in the data:
+
+@cindex @code{labels.awk} program
+@example
+@c file eg/prog/labels.awk
+# labels.awk --- print mailing labels
+@c endfile
+@ignore
+@c file eg/prog/labels.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# June 1992
+# December 2010, minor edits
+@c endfile
+@end ignore
+@c file eg/prog/labels.awk
+
+# Each label is 5 lines of data that may have blank lines.
+# The label sheets have 2 blank lines at the top and 2 at
+# the bottom.
+
+BEGIN @{ RS = "" ; MAXLINES = 100 @}
+
+function printpage( i, j)
+@{
+ if (Nlines <= 0)
+ return
+
+ printf "\n\n" # header
+
+ for (i = 1; i <= Nlines; i += 10) @{
+ if (i == 21 || i == 61)
+ print ""
+ for (j = 0; j < 5; j++) @{
+ if (i + j > MAXLINES)
+ break
+ printf " %-41s %s\n", line[i+j], line[i+j+5]
+ @}
+ print ""
+ @}
+
+ printf "\n\n" # footer
+
+ delete line
+@}
+
+# main rule
+@{
+ if (Count >= 20) @{
+ printpage()
+ Count = 0
+ Nlines = 0
+ @}
+ n = split($0, a, "\n")
+ for (i = 1; i <= n; i++)
+ line[++Nlines] = a[i]
+ for (; i <= 5; i++)
+ line[++Nlines] = ""
+ Count++
+@}
+
+END @{
+ printpage()
+@}
+@c endfile
+@end example
+
+@node Word Sorting
+@subsection Generating Word-Usage Counts
+
+@cindex words, usage counts@comma{} generating
+
+When working with large amounts of text, it can be interesting to know
+how often different words appear. For example, an author may overuse
+certain words, in which case he or she might wish to find synonyms to substitute
+for words that appear too often. This @value{SUBSECTION} develops a
+program for counting words and presenting the frequency information
+in a useful format.
+
+At first glance, a program like this would seem to do the job:
+
+@example
+# wordfreq-first-try.awk --- print list of word frequencies
+
+@{
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}
+@end example
+
+The program relies on @command{awk}'s default field splitting
+mechanism to break each line up into ``words,'' and uses an
+associative array named @code{freq}, indexed by each word, to count
+the number of times the word occurs. In the @code{END} rule,
+it prints the counts.
+
+This program has several problems that prevent it from being
+useful on real text files:
+
+@itemize @value{BULLET}
+@item
+The @command{awk} language considers upper- and lowercase characters to be
+distinct. Therefore, ``bartender'' and ``Bartender'' are not treated
+as the same word. This is undesirable, because words are capitalized
+if they begin sentences in normal text, and a frequency analyzer should
+not be sensitive to capitalization.
+
+@item
+Words are detected using the @command{awk} convention that fields are
+separated just by whitespace. Other characters in the input (except
+newlines) don't have any special meaning to @command{awk}. This means that
+punctuation characters count as part of words.
+
+@item
+The output does not come out in any useful order. You're more likely to be
+interested in which words occur most frequently or in having an alphabetized
+table of how frequently each word occurs.
+@end itemize
+
+@cindex @command{sort} utility
+The first problem can be solved by using @code{tolower()} to remove case
+distinctions. The second problem can be solved by using @code{gsub()}
+to remove punctuation characters. Finally, we solve the third problem
+by using the system @command{sort} utility to process the output of the
+@command{awk} script. Here is the new version of the program:
+
+@cindex @code{wordfreq.awk} program
+@example
+@c file eg/prog/wordfreq.awk
+# wordfreq.awk --- print list of word frequencies
+
+@{
+ $0 = tolower($0) # remove case distinctions
+ # remove punctuation
+ gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+
+@c endfile
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}
+@end example
+
+The regexp @code{/[^[:alnum:]_[:blank:]]/} might have been written
+@code{/[[:punct:]]/}, but then underscores would also be removed,
+and we want to keep them.
+
+Assuming we have saved this program in a file named @file{wordfreq.awk},
+and that the data is in @file{file1}, the following pipeline:
+
+@example
+awk -f wordfreq.awk file1 | sort -k 2nr
+@end example
+
+@noindent
+produces a table of the words appearing in @file{file1} in order of
+decreasing frequency.
+
+The @command{awk} program suitably massages the
+data and produces a word frequency table, which is not ordered.
+The @command{awk} script's output is then sorted by the @command{sort}
+utility and printed on the screen.
+
+The options given to @command{sort}
+specify a sort that uses the second field of each input line (skipping
+one field), that the sort keys should be treated as numeric quantities
+(otherwise @samp{15} would come before @samp{5}), and that the sorting
+should be done in descending (reverse) order.
+
+The @command{sort} could even be done from within the program, by changing
+the @code{END} action to:
+
+@example
+@c file eg/prog/wordfreq.awk
+END @{
+ sort = "sort -k 2nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+@}
+@c endfile
+@end example
+
+This way of sorting must be used on systems that do not
+have true pipes at the command-line (or batch-file) level.
+See the general operating system documentation for more information on how
+to use the @command{sort} program.
+
+@node History Sorting
+@subsection Removing Duplicates from Unsorted Text
+
+@cindex lines, duplicate@comma{} removing
+The @command{uniq} program
+(@pxref{Uniq Program}),
+removes duplicate lines from @emph{sorted} data.
+
+Suppose, however, you need to remove duplicate lines from a @value{DF} but
+that you want to preserve the order the lines are in. A good example of
+this might be a shell history file. The history file keeps a copy of all
+the commands you have entered, and it is not unusual to repeat a command
+several times in a row. Occasionally you might want to compact the history
+by removing duplicate entries. Yet it is desirable to maintain the order
+of the original commands.
+
+This simple program does the job. It uses two arrays. The @code{data}
+array is indexed by the text of each line.
+For each line, @code{data[$0]} is incremented.
+If a particular line has not
+been seen before, then @code{data[$0]} is zero.
+In this case, the text of the line is stored in @code{lines[count]}.
+Each element of @code{lines} is a unique command, and the indices of
+@code{lines} indicate the order in which those lines are encountered.
+The @code{END} rule simply prints out the lines, in order:
+
+@cindex Rakitzis, Byron
+@cindex @code{histsort.awk} program
+@example
+@c file eg/prog/histsort.awk
+# histsort.awk --- compact a shell history file
+# Thanks to Byron Rakitzis for the general idea
+@c endfile
+@ignore
+@c file eg/prog/histsort.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+@c endfile
+@end ignore
+@c file eg/prog/histsort.awk
+
+@group
+@{
+ if (data[$0]++ == 0)
+ lines[++count] = $0
+@}
+@end group
+
+@group
+END @{
+ for (i = 1; i <= count; i++)
+ print lines[i]
+@}
+@end group
+@c endfile
+@end example
+
+This program also provides a foundation for generating other useful
+information. For example, using the following @code{print} statement in the
+@code{END} rule indicates how often a particular command is used:
+
+@example
+print data[lines[i]], lines[i]
+@end example
+
+@noindent
+This works because @code{data[$0]} is incremented each time a line is
+seen.
+
+@node Extract Program
+@subsection Extracting Programs from Texinfo Source Files
+
+@cindex Texinfo, extracting programs from source files
+@cindex files, Texinfo@comma{} extracting programs from
+@ifnotinfo
+Both this chapter and the previous chapter
+(@ref{Library Functions})
+present a large number of @command{awk} programs.
+@end ifnotinfo
+@ifinfo
+The nodes
+@ref{Library Functions},
+and @ref{Sample Programs},
+are the top level nodes for a large number of @command{awk} programs.
+@end ifinfo
+If you want to experiment with these programs, it is tedious to type
+them in by hand. Here we present a program that can extract parts of a
+Texinfo input file into separate files.
+
+@cindex Texinfo
+This @value{DOCUMENT} is written in @uref{http://www.gnu.org/software/texinfo/, Texinfo},
+the GNU project's document formatting language.
+A single Texinfo source file can be used to produce both
+printed documentation, with @TeX{}, and online documentation.
+@ifnotinfo
+(Texinfo is fully documented in the book
+@cite{Texinfo---The GNU Documentation Format},
+available from the Free Software Foundation,
+and also available @uref{http://www.gnu.org/software/texinfo/manual/texinfo/, online}.)
+@end ifnotinfo
+@ifinfo
+(The Texinfo language is described fully, starting with
+@inforef{Top, , Texinfo, texinfo,Texinfo---The GNU Documentation Format}.)
+@end ifinfo
+
+For our purposes, it is enough to know three things about Texinfo input
+files:
+
+@itemize @value{BULLET}
+@item
+The ``at'' symbol (@samp{@@}) is special in Texinfo, much as
+the backslash (@samp{\}) is in C
+or @command{awk}. Literal @samp{@@} symbols are represented in Texinfo source
+files as @samp{@@@@}.
+
+@item
+Comments start with either @samp{@@c} or @samp{@@comment}.
+The file-extraction program works by using special comments that start
+at the beginning of a line.
+
+@item
+Lines containing @samp{@@group} and @samp{@@end group} commands bracket
+example text that should not be split across a page boundary.
+(Unfortunately, @TeX{} isn't always smart enough to do things exactly right,
+so we have to give it some help.)
+@end itemize
+
+The following program, @file{extract.awk}, reads through a Texinfo source
+file and does two things, based on the special comments.
+Upon seeing @samp{@w{@@c system @dots{}}},
+it runs a command, by extracting the command text from the
+control line and passing it on to the @code{system()} function
+(@pxref{I/O Functions}).
+Upon seeing @samp{@@c file @var{filename}}, each subsequent line is sent to
+the file @var{filename}, until @samp{@@c endfile} is encountered.
+The rules in @file{extract.awk} match either @samp{@@c} or
+@samp{@@comment} by letting the @samp{omment} part be optional.
+Lines containing @samp{@@group} and @samp{@@end group} are simply removed.
+@file{extract.awk} uses the @code{join()} library function
+(@pxref{Join Function}).
+
+The example programs in the online Texinfo source for @cite{@value{TITLE}}
+(@file{gawktexi.in}) have all been bracketed inside @samp{file} and
+@samp{endfile} lines. The @command{gawk} distribution uses a copy of
+@file{extract.awk} to extract the sample programs and install many
+of them in a standard directory where @command{gawk} can find them.
+The Texinfo file looks something like this:
+
+@example
+@dots{}
+This program has a @@code@{BEGIN@} rule,
+that prints a nice message:
+
+@@example
+@@c file examples/messages.awk
+BEGIN @@@{ print "Don't panic!" @@@}
+@@c end file
+@@end example
+
+It also prints some final advice:
+
+@@example
+@@c file examples/messages.awk
+END @@@{ print "Always avoid bored archaeologists!" @@@}
+@@c end file
+@@end example
+@dots{}
+@end example
+
+@file{extract.awk} begins by setting @code{IGNORECASE} to one, so that
+mixed upper- and lowercase letters in the directives won't matter.
+
+The first rule handles calling @code{system()}, checking that a command is
+given (@code{NF} is at least three) and also checking that the command
+exits with a zero exit status, signifying OK:
+
+@cindex @code{extract.awk} program
+@example
+@c file eg/prog/extract.awk
+# extract.awk --- extract files and run programs from texinfo files
+@c endfile
+@ignore
+@c file eg/prog/extract.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# May 1993
+# Revised September 2000
+@c endfile
+@end ignore
+@c file eg/prog/extract.awk
+
+BEGIN @{ IGNORECASE = 1 @}
+
+/^@@c(omment)?[ \t]+system/ @{
+ if (NF < 3) @{
+ e = ("extract: " FILENAME ":" FNR)
+ e = (e ": badly formed `system' line")
+ print e > "/dev/stderr"
+ next
+ @}
+ $1 = ""
+ $2 = ""
+ stat = system($0)
+ if (stat != 0) @{
+ e = ("extract: " FILENAME ":" FNR)
+ e = (e ": warning: system returned " stat)
+ print e > "/dev/stderr"
+ @}
+@}
+@c endfile
+@end example
+
+@noindent
+The variable @code{e} is used so that the rule
+fits nicely on the @value{PAGE}.
+
+The second rule handles moving data into files. It verifies that a
+@value{FN} is given in the directive. If the file named is not the
+current file, then the current file is closed. Keeping the current file
+open until a new file is encountered allows the use of the @samp{>}
+redirection for printing the contents, keeping open file management
+simple.
+
+The @code{for} loop does the work. It reads lines using @code{getline}
+(@pxref{Getline}).
+For an unexpected end of file, it calls the @code{@w{unexpected_eof()}}
+function. If the line is an ``endfile'' line, then it breaks out of
+the loop.
+If the line is an @samp{@@group} or @samp{@@end group} line, then it
+ignores it and goes on to the next line.
+Similarly, comments within examples are also ignored.
+
+Most of the work is in the following few lines. If the line has no @samp{@@}
+symbols, the program can print it directly.
+Otherwise, each leading @samp{@@} must be stripped off.
+To remove the @samp{@@} symbols, the line is split into separate elements of
+the array @code{a}, using the @code{split()} function
+(@pxref{String Functions}).
+The @samp{@@} symbol is used as the separator character.
+Each element of @code{a} that is empty indicates two successive @samp{@@}
+symbols in the original line. For each two empty elements (@samp{@@@@} in
+the original file), we have to add a single @samp{@@} symbol back in.
+
+When the processing of the array is finished, @code{join()} is called with the
+value of @code{SUBSEP} (@pxref{Multidimensional}),
+to rejoin the pieces back into a single
+line. That line is then printed to the output file:
+
+@example
+@c file eg/prog/extract.awk
+/^@@c(omment)?[ \t]+file/ @{
+ if (NF != 3) @{
+ e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
+ print e > "/dev/stderr"
+ next
+ @}
+ if ($3 != curfile) @{
+ if (curfile != "")
+ close(curfile)
+ curfile = $3
+ @}
+
+ for (;;) @{
+ if ((getline line) <= 0)
+ unexpected_eof()
+ if (line ~ /^@@c(omment)?[ \t]+endfile/)
+ break
+ else if (line ~ /^@@(end[ \t]+)?group/)
+ continue
+ else if (line ~ /^@@c(omment+)?[ \t]+/)
+ continue
+ if (index(line, "@@") == 0) @{
+ print line > curfile
+ continue
+ @}
+ n = split(line, a, "@@")
+ # if a[1] == "", means leading @@,
+ # don't add one back in.
+ for (i = 2; i <= n; i++) @{
+ if (a[i] == "") @{ # was an @@@@
+ a[i] = "@@"
+ if (a[i+1] == "")
+ i++
+ @}
+ @}
+ print join(a, 1, n, SUBSEP) > curfile
+ @}
+@}
+@c endfile
+@end example
+
+An important thing to note is the use of the @samp{>} redirection.
+Output done with @samp{>} only opens the file once; it stays open and
+subsequent output is appended to the file
+(@pxref{Redirection}).
+This makes it easy to mix program text and explanatory prose for the same
+sample source file (as has been done here!) without any hassle. The file is
+only closed when a new @value{DF} name is encountered or at the end of the
+input file.
+
+Finally, the function @code{@w{unexpected_eof()}} prints an appropriate
+error message and then exits.
+The @code{END} rule handles the final cleanup, closing the open file:
+
+@example
+@c file eg/prog/extract.awk
+@group
+function unexpected_eof()
+@{
+ printf("extract: %s:%d: unexpected EOF or error\n",
+ FILENAME, FNR) > "/dev/stderr"
+ exit 1
+@}
+@end group
+
+END @{
+ if (curfile)
+ close(curfile)
+@}
+@c endfile
+@end example
+
+@node Simple Sed
+@subsection A Simple Stream Editor
+
+@cindex @command{sed} utility
+@cindex stream editors
+The @command{sed} utility is a stream editor, a program that reads a
+stream of data, makes changes to it, and passes it on.
+It is often used to make global changes to a large file or to a stream
+of data generated by a pipeline of commands.
+Although @command{sed} is a complicated program in its own right, its most common
+use is to perform global substitutions in the middle of a pipeline:
+
+@example
+@var{command1} < orig.data | sed 's/old/new/g' | @var{command2} > result
+@end example
+
+Here, @samp{s/old/new/g} tells @command{sed} to look for the regexp
+@samp{old} on each input line and globally replace it with the text
+@samp{new} (i.e., all the occurrences on a line). This is similar to
+@command{awk}'s @code{gsub()} function
+(@pxref{String Functions}).
+
+The following program, @file{awksed.awk}, accepts at least two command-line
+arguments: the pattern to look for and the text to replace it with. Any
+additional arguments are treated as @value{DF} names to process. If none
+are provided, the standard input is used:
+
+@cindex Brennan, Michael
+@cindex @command{awksed.awk} program
+@c @cindex simple stream editor
+@c @cindex stream editor, simple
+@example
+@c file eg/prog/awksed.awk
+# awksed.awk --- do s/foo/bar/g using just print
+# Thanks to Michael Brennan for the idea
+@c endfile
+@ignore
+@c file eg/prog/awksed.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# August 1995
+@c endfile
+@end ignore
+@c file eg/prog/awksed.awk
+
+function usage()
+@{
+ print "usage: awksed pat repl [files...]" > "/dev/stderr"
+ exit 1
+@}
+
+BEGIN @{
+ # validate arguments
+ if (ARGC < 3)
+ usage()
+
+ RS = ARGV[1]
+ ORS = ARGV[2]
+
+ # don't use arguments as files
+ ARGV[1] = ARGV[2] = ""
+@}
+
+@group
+# look ma, no hands!
+@{
+ if (RT == "")
+ printf "%s", $0
+ else
+ print
+@}
+@end group
+@c endfile
+@end example
+
+The program relies on @command{gawk}'s ability to have @code{RS} be a regexp,
+as well as on the setting of @code{RT} to the actual text that terminates the
+record (@pxref{Records}).
+
+The idea is to have @code{RS} be the pattern to look for. @command{gawk}
+automatically sets @code{$0} to the text between matches of the pattern.
+This is text that we want to keep, unmodified. Then, by setting @code{ORS}
+to the replacement text, a simple @code{print} statement outputs the
+text we want to keep, followed by the replacement text.
+
+There is one wrinkle to this scheme, which is what to do if the last record
+doesn't end with text that matches @code{RS}. Using a @code{print}
+statement unconditionally prints the replacement text, which is not correct.
+However, if the file did not end in text that matches @code{RS}, @code{RT}
+is set to the null string. In this case, we can print @code{$0} using
+@code{printf}
+(@pxref{Printf}).
+
+The @code{BEGIN} rule handles the setup, checking for the right number
+of arguments and calling @code{usage()} if there is a problem. Then it sets
+@code{RS} and @code{ORS} from the command-line arguments and sets
+@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they are
+not treated as @value{FN}s
+(@pxref{ARGC and ARGV}).
+
+The @code{usage()} function prints an error message and exits.
+Finally, the single rule handles the printing scheme outlined earlier,
+using @code{print} or @code{printf} as appropriate, depending upon the
+value of @code{RT}.
+
+@node Igawk Program
+@subsection An Easy Way to Use Library Functions
+
+@cindex libraries of @command{awk} functions, example program for using
+@cindex functions, library, example program for using
+In @ref{Include Files}, we saw how @command{gawk} provides a built-in
+file-inclusion capability. However, this is a @command{gawk} extension.
+This @value{SECTION} provides the motivation for making file inclusion
+available for standard @command{awk}, and shows how to do it using a
+combination of shell and @command{awk} programming.
+
+Using library functions in @command{awk} can be very beneficial. It
+encourages code reuse and the writing of general functions. Programs are
+smaller and therefore clearer.
+However, using library functions is only easy when writing @command{awk}
+programs; it is painful when running them, requiring multiple @option{-f}
+options. If @command{gawk} is unavailable, then so too is the @env{AWKPATH}
+environment variable and the ability to put @command{awk} functions into a
+library directory (@pxref{Options}).
+It would be nice to be able to write programs in the following manner:
+
+@example
+# library functions
+@@include getopt.awk
+@@include join.awk
+@dots{}
+
+# main program
+BEGIN @{
+ while ((c = getopt(ARGC, ARGV, "a:b:cde")) != -1)
+ @dots{}
+ @dots{}
+@}
+@end example
+
+The following program, @file{igawk.sh}, provides this service.
+It simulates @command{gawk}'s searching of the @env{AWKPATH} variable
+and also allows @dfn{nested} includes (i.e., a file that is included
+with @code{@@include} can contain further @code{@@include} statements).
+@command{igawk} makes an effort to only include files once, so that nested
+includes don't accidentally include a library function twice.
+
+@command{igawk} should behave just like @command{gawk} externally. This
+means it should accept all of @command{gawk}'s command-line arguments,
+including the ability to have multiple source files specified via
+@option{-f}, and the ability to mix command-line and library source files.
+
+The program is written using the POSIX Shell (@command{sh}) command
+language.@footnote{Fully explaining the @command{sh} language is beyond
+the scope of this book. We provide some minimal explanations, but see
+a good shell programming book if you wish to understand things in more
+depth.} It works as follows:
+
+@enumerate
+@item
+Loop through the arguments, saving anything that doesn't represent
+@command{awk} source code for later, when the expanded program is run.
+
+@item
+For any arguments that do represent @command{awk} text, put the arguments into
+a shell variable that will be expanded. There are two cases:
+
+@enumerate a
+@item
+Literal text, provided with @option{-e} or @option{--source}. This
+text is just appended directly.
+
+@item
+Source @value{FN}s, provided with @option{-f}. We use a neat trick and
+append @samp{@@include @var{filename}} to the shell variable's contents.
+Because the file-inclusion program works the way @command{gawk} does, this
+gets the text of the file included in the program at the correct point.
+@end enumerate
+
+@item
+Run an @command{awk} program (naturally) over the shell variable's contents to expand
+@code{@@include} statements. The expanded program is placed in a second
+shell variable.
+
+@item
+Run the expanded program with @command{gawk} and any other original command-line
+arguments that the user supplied (such as the @value{DF} names).
+@end enumerate
+
+This program uses shell variables extensively: for storing command-line arguments,
+the text of the @command{awk} program that will expand the user's program, for the
+user's original program, and for the expanded program. Doing so removes some
+potential problems that might arise were we to use temporary files instead,
+at the cost of making the script somewhat more complicated.
+
+The initial part of the program turns on shell tracing if the first
+argument is @samp{debug}.
+
+The next part loops through all the command-line arguments.
+There are several cases of interest:
+
+@c @asis for docbook
+@table @asis
+@item @option{--}
+This ends the arguments to @command{igawk}. Anything else should be passed on
+to the user's @command{awk} program without being evaluated.
+
+@item @option{-W}
+This indicates that the next option is specific to @command{gawk}. To make
+argument processing easier, the @option{-W} is appended to the front of the
+remaining arguments and the loop continues. (This is an @command{sh}
+programming trick. Don't worry about it if you are not familiar with
+@command{sh}.)
+
+@item @option{-v}, @option{-F}
+These are saved and passed on to @command{gawk}.
+
+@item @option{-f}, @option{--file}, @option{--file=}, @option{-Wfile=}
+The @value{FN} is appended to the shell variable @code{program} with an
+@code{@@include} statement.
+The @command{expr} utility is used to remove the leading option part of the
+argument (e.g., @samp{--file=}).
+(Typical @command{sh} usage would be to use the @command{echo} and @command{sed}
+utilities to do this work. Unfortunately, some versions of @command{echo} evaluate
+escape sequences in their arguments, possibly mangling the program text.
+Using @command{expr} avoids this problem.)
+
+@item @option{--source}, @option{--source=}, @option{-Wsource=}
+The source text is appended to @code{program}.
+
+@item @option{--version}, @option{-Wversion}
+@command{igawk} prints its version number, runs @samp{gawk --version}
+to get the @command{gawk} version information, and then exits.
+@end table
+
+If none of the @option{-f}, @option{--file}, @option{-Wfile}, @option{--source},
+or @option{-Wsource} arguments are supplied, then the first nonoption argument
+should be the @command{awk} program. If there are no command-line
+arguments left, @command{igawk} prints an error message and exits.
+Otherwise, the first argument is appended to @code{program}.
+In any case, after the arguments have been processed,
+the shell variable
+@code{program} contains the complete text of the original @command{awk}
+program.
+
+The program is as follows:
+
+@cindex @code{igawk.sh} program
+@example
+@c file eg/prog/igawk.sh
+#! /bin/sh
+# igawk --- like gawk but do @@include processing
+@c endfile
+@ignore
+@c file eg/prog/igawk.sh
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July 1993
+# December 2010, minor edits
+@c endfile
+@end ignore
+@c file eg/prog/igawk.sh
+
+if [ "$1" = debug ]
+then
+ set -x
+ shift
+fi
+
+# A literal newline, so that program text is formatted correctly
+n='
+'
+
+# Initialize variables to empty
+program=
+opts=
+
+while [ $# -ne 0 ] # loop over arguments
+do
+ case $1 in
+ --) shift
+ break ;;
+
+ -W) shift
+ # The $@{x?'message here'@} construct prints a
+ # diagnostic if $x is the null string
+ set -- -W"$@{@@?'missing operand'@}"
+ continue ;;
+
+ -[vF]) opts="$opts $1 '$@{2?'missing operand'@}'"
+ shift ;;
+
+ -[vF]*) opts="$opts '$1'" ;;
+
+ -f) program="$program$n@@include $@{2?'missing operand'@}"
+ shift ;;
+
+ -f*) f=$(expr "$1" : '-f\(.*\)')
+ program="$program$n@@include $f" ;;
+
+ -[W-]file=*)
+ f=$(expr "$1" : '-.file=\(.*\)')
+ program="$program$n@@include $f" ;;
+
+ -[W-]file)
+ program="$program$n@@include $@{2?'missing operand'@}"
+ shift ;;
+
+ -[W-]source=*)
+ t=$(expr "$1" : '-.source=\(.*\)')
+ program="$program$n$t" ;;
+
+ -[W-]source)
+ program="$program$n$@{2?'missing operand'@}"
+ shift ;;
+
+ -[W-]version)
+ echo igawk: version 3.0 1>&2
+ gawk --version
+ exit 0 ;;
+
+ -[W-]*) opts="$opts '$1'" ;;
+
+ *) break ;;
+ esac
+ shift
+done
+
+if [ -z "$program" ]
+then
+ program=$@{1?'missing program'@}
+ shift
+fi
+
+# At this point, `program' has the program.
+@c endfile
+@end example
+
+The @command{awk} program to process @code{@@include} directives
+is stored in the shell variable @code{expand_prog}. Doing this keeps
+the shell script readable. The @command{awk} program
+reads through the user's program, one line at a time, using @code{getline}
+(@pxref{Getline}). The input
+@value{FN}s and @code{@@include} statements are managed using a stack.
+As each @code{@@include} is encountered, the current @value{FN} is
+``pushed'' onto the stack and the file named in the @code{@@include}
+directive becomes the current @value{FN}. As each file is finished,
+the stack is ``popped,'' and the previous input file becomes the current
+input file again. The process is started by making the original file
+the first one on the stack.
+
+The @code{pathto()} function does the work of finding the full path to
+a file. It simulates @command{gawk}'s behavior when searching the
+@env{AWKPATH} environment variable
+(@pxref{AWKPATH Variable}).
+If a @value{FN} has a @samp{/} in it, no path search is done.
+Similarly, if the @value{FN} is @code{"-"}, then that string is
+used as-is. Otherwise,
+the @value{FN} is concatenated with the name of each directory in
+the path, and an attempt is made to open the generated @value{FN}.
+The only way to test if a file can be read in @command{awk} is to go
+ahead and try to read it with @code{getline}; this is what @code{pathto()}
+does.@footnote{On some very old versions of @command{awk}, the test
+@samp{getline junk < t} can loop forever if the file exists but is empty.}
+If the file can be read, it is closed and the @value{FN}
+is returned:
+
+@ignore
+An alternative way to test for the file's existence would be to call
+@samp{system("test -r " t)}, which uses the @command{test} utility to
+see if the file exists and is readable. The disadvantage to this method
+is that it requires creating an extra process and can thus be slightly
+slower.
+@end ignore
+
+@example
+@c file eg/prog/igawk.sh
+expand_prog='
+
+function pathto(file, i, t, junk)
+@{
+ if (index(file, "/") != 0)
+ return file
+
+ if (file == "-")
+ return file
+
+ for (i = 1; i <= ndirs; i++) @{
+ t = (pathlist[i] "/" file)
+@group
+ if ((getline junk < t) > 0) @{
+ # found it
+ close(t)
+ return t
+ @}
+@end group
+ @}
+ return ""
+@}
+@c endfile
+@end example
+
+The main program is contained inside one @code{BEGIN} rule. The first thing it
+does is set up the @code{pathlist} array that @code{pathto()} uses. After
+splitting the path on @samp{:}, null elements are replaced with @code{"."},
+which represents the current directory:
+
+@example
+@c file eg/prog/igawk.sh
+BEGIN @{
+ path = ENVIRON["AWKPATH"]
+ ndirs = split(path, pathlist, ":")
+ for (i = 1; i <= ndirs; i++) @{
+ if (pathlist[i] == "")
+ pathlist[i] = "."
+ @}
+@c endfile
+@end example
+
+The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}.
+The main loop comes next. Input lines are read in succession. Lines that
+do not start with @code{@@include} are printed verbatim.
+If the line does start with @code{@@include}, the @value{FN} is in @code{$2}.
+@code{pathto()} is called to generate the full path. If it cannot, then the program
+prints an error message and continues.
+
+The next thing to check is if the file is included already. The
+@code{processed} array is indexed by the full @value{FN} of each included
+file and it tracks this information for us. If the file is
+seen again, a warning message is printed. Otherwise, the new @value{FN} is
+pushed onto the stack and processing continues.
+
+Finally, when @code{getline} encounters the end of the input file, the file
+is closed and the stack is popped. When @code{stackptr} is less than zero,
+the program is done:
+
+@example
+@c file eg/prog/igawk.sh
+ stackptr = 0
+ input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+ for (; stackptr >= 0; stackptr--) @{
+ while ((getline < input[stackptr]) > 0) @{
+ if (tolower($1) != "@@include") @{
+ print
+ continue
+ @}
+ fpath = pathto($2)
+@group
+ if (fpath == "") @{
+ printf("igawk: %s:%d: cannot find %s\n",
+ input[stackptr], FNR, $2) > "/dev/stderr"
+ continue
+ @}
+@end group
+ if (! (fpath in processed)) @{
+ processed[fpath] = input[stackptr]
+ input[++stackptr] = fpath # push onto stack
+ @} else
+ print $2, "included in", input[stackptr],
+ "already included in",
+ processed[fpath] > "/dev/stderr"
+ @}
+ close(input[stackptr])
+ @}
+@}' # close quote ends `expand_prog' variable
+
+processed_program=$(gawk -- "$expand_prog" /dev/stdin << EOF
+$program
+EOF
+)
+@c endfile
+@end example
+
+The shell construct @samp{@var{command} << @var{marker}} is called
+a @dfn{here document}. Everything in the shell script up to the
+@var{marker} is fed to @var{command} as input. The shell processes
+the contents of the here document for variable and command substitution
+(and possibly other things as well, depending upon the shell).
+
+The shell construct @samp{$(@dots{})} is called @dfn{command substitution}.
+The output of the command inside the parentheses is substituted
+into the command line.
+Because the result is used in a variable assignment,
+it is saved as a single string, even if the results contain whitespace.
+
+The expanded program is saved in the variable @code{processed_program}.
+It's done in these steps:
+
+@enumerate
+@item
+Run @command{gawk} with the @code{@@include}-processing program (the
+value of the @code{expand_prog} shell variable) reading standard input.
+
+@item
+Standard input is the contents of the user's program,
+from the shell variable @code{program}.
+Feed its contents to @command{gawk} via a here document.
+
+@item
+Save the results of this processing in the shell variable
+@code{processed_program} by using command substitution.
+@end enumerate
+
+The last step is to call @command{gawk} with the expanded program,
+along with the original
+options and command-line arguments that the user supplied.
+
+@c this causes more problems than it solves, so leave it out.
+@ignore
+The special file @file{/dev/null} is passed as a @value{DF} to @command{gawk}
+to handle an interesting case. Suppose that the user's program only has
+a @code{BEGIN} rule and there are no @value{DF}s to read.
+The program should exit without reading any @value{DF}s.
+However, suppose that an included library file defines an @code{END}
+rule of its own. In this case, @command{gawk} will hang, reading standard
+input. In order to avoid this, @file{/dev/null} is explicitly added to the
+command line. Reading from @file{/dev/null} always returns an immediate
+end of file indication.
+
+@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
+@end ignore
+
+@example
+@c file eg/prog/igawk.sh
+eval gawk $opts -- '"$processed_program"' '"$@@"'
+@c endfile
+@end example
+
+The @command{eval} command is a shell construct that reruns the shell's parsing
+process. This keeps things properly quoted.
+
+This version of @command{igawk} represents the fifth version of this program.
+There are four key simplifications that make the program work better:
+
+@itemize @value{BULLET}
+@item
+Using @code{@@include} even for the files named with @option{-f} makes building
+the initial collected @command{awk} program much simpler; all the
+@code{@@include} processing can be done once.
+
+@item
+Not trying to save the line read with @code{getline}
+in the @code{pathto()} function when testing for the
+file's accessibility for use with the main program simplifies things
+considerably.
+
+@item
+Using a @code{getline} loop in the @code{BEGIN} rule does it all in one
+place. It is not necessary to call out to a separate loop for processing
+nested @code{@@include} statements.
+
+@item
+Instead of saving the expanded program in a temporary file, putting it in a shell variable
+avoids some potential security problems.
+This has the disadvantage that the script relies upon more features
+of the @command{sh} language, making it harder to follow for those who
+aren't familiar with @command{sh}.
+@end itemize
+
+Also, this program illustrates that it is often worthwhile to combine
+@command{sh} and @command{awk} programming together. You can usually
+accomplish quite a lot, without having to resort to low-level programming
+in C or C++, and it is frequently easier to do certain kinds of string
+and argument manipulation using the shell than it is in @command{awk}.
+
+Finally, @command{igawk} shows that it is not always necessary to add new
+features to a program; they can often be layered on top.@footnote{@command{gawk}
+does @code{@@include} processing itself in order to support the use
+of @command{awk} programs as Web CGI scripts.}
+
+
+@node Anagram Program
+@subsection Finding Anagrams from a Dictionary
+
+@cindex anagrams, finding
+An interesting programming challenge is to
+search for @dfn{anagrams} in a
+word list (such as
+@file{/usr/share/dict/words} on many GNU/Linux systems).
+One word is an anagram of another if both words contain
+the same letters
+(e.g., ``babbling'' and ``blabbing'').
+
+Column 2, Problem C, of Jon Bentley's @cite{Programming Pearls}, Second
+Edition, presents an elegant algorithm. The idea is to give words that
+are anagrams a common signature, sort all the words together by their
+signature, and then print them. Dr.@: Bentley observes that taking the
+letters in each word and sorting them produces that common signature.
+
+The following program uses arrays of arrays to bring together
+words with the same signature and array sorting to print the words
+in sorted order:
+
+@cindex @code{anagram.awk} program
+@example
+@c file eg/prog/anagram.awk
+# anagram.awk --- An implementation of the anagram finding algorithm
+# from Jon Bentley's "Programming Pearls", 2nd edition.
+# Addison Wesley, 2000, ISBN 0-201-65788-0.
+# Column 2, Problem C, section 2.8, pp 18-20.
+@c endfile
+@ignore
+@c file eg/prog/anagram.awk
+#
+# This program requires gawk 4.0 or newer.
+# Required gawk-specific features:
+# - True multidimensional arrays
+# - split() with "" as separator splits out individual characters
+# - asort() and asorti() functions
+#
+# See http://savannah.gnu.org/projects/gawk.
+#
+# Arnold Robbins
+# arnold@@skeeve.com
+# Public Domain
+# January, 2011
+@c endfile
+@end ignore
+@c file eg/prog/anagram.awk
+
+/'s$/ @{ next @} # Skip possessives
+@c endfile
+@end example
+
+The program starts with a header, and then a rule to skip
+possessives in the dictionary file. The next rule builds
+up the data structure. The first dimension of the array
+is indexed by the signature; the second dimension is the word
+itself:
+
+@example
+@c file eg/prog/anagram.awk
+@{
+ key = word2key($1) # Build signature
+ data[key][$1] = $1 # Store word with signature
+@}
+@c endfile
+@end example
+
+The @code{word2key()} function creates the signature.
+It splits the word apart into individual letters,
+sorts the letters, and then joins them back together:
+
+@example
+@c file eg/prog/anagram.awk
+# word2key --- split word apart into letters, sort, joining back together
+
+function word2key(word, a, i, n, result)
+@{
+ n = split(word, a, "")
+ asort(a)
+
+ for (i = 1; i <= n; i++)
+ result = result a[i]
+
+ return result
+@}
+@c endfile
+@end example
+
+Finally, the @code{END} rule traverses the array
+and prints out the anagram lists. It sends the output
+to the system @command{sort} command because otherwise
+the anagrams would appear in arbitrary order:
+
+@example
+@c file eg/prog/anagram.awk
+END @{
+ sort = "sort"
+ for (key in data) @{
+ # Sort words with same key
+ nwords = asorti(data[key], words)
+ if (nwords == 1)
+ continue
+
+ # And print. Minor glitch: trailing space at end of each line
+ for (j = 1; j <= nwords; j++)
+ printf("%s ", words[j]) | sort
+ print "" | sort
+ @}
+ close(sort)
+@}
+@c endfile
+@end example
+
+Here is some partial output when the program is run:
+
+@example
+$ @kbd{gawk -f anagram.awk /usr/share/dict/words | grep '^b'}
+@dots{}
+babbled blabbed
+babbler blabber brabble
+babblers blabbers brabbles
+babbling blabbing
+babbly blabby
+babel bable
+babels beslab
+babery yabber
+@dots{}
+@end example
+
+
+@node Signature Program
+@subsection And Now for Something Completely Different
+
+@cindex signature program
+@cindex Brini, Davide
+The following program was written by Davide Brini
+@c (@email{dave_br@@gmx.com})
+and is published on @uref{http://backreference.org/2011/02/03/obfuscated-awk/,
+his website}.
+It serves as his signature in the Usenet group @code{comp.lang.awk}.
+He supplies the following copyright terms:
+
+@quotation
+Copyright @copyright{} 2008 Davide Brini
+
+Copying and distribution of the code published in this page, with or without
+modification, are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
+@end quotation
+
+Here is the program:
+
+@example
+awk 'BEGIN@{O="~"~"~";o="=="=="==";o+=+o;x=O""O;while(X++<=x+o+o)c=c"%c";
+printf c,(x-O)*(x-O),x*(x-o)-o,x*(x-O)+x-O-o,+x*(x-O)-x+o,X*(o*o+O)+x-O,
+X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
+O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}'
+@end example
+
+@cindex Johansen, Chris
+We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this @value{DOCUMENT}.)
+
+@ignore
+To: "Arnold Robbins" <arnold@skeeve.com>
+Date: Sat, 20 Aug 2011 13:50:46 -0400
+Subject: The GNU Awk User's Guide, Section 13.3.11
+From: "Chris Johansen" <johansen@main.nc.us>
+Message-ID: <op.v0iw6wlv7finx3@asusodin.thrudvang.lan>
+
+Arnold, you don't know me, but we have a tenuous connection. My wife is
+Barbara A. Field, FAIA, GIT '65 (B. Arch.).
+
+I have had a couple of paper copies of "Effective Awk Programming" for
+years, and now I'm going through a Kindle version of "The GNU Awk User's
+Guide" again. When I got to section 13.3.11, I reformatted and lightly
+commented Davide Brin's signature script to understand its workings.
+
+It occurs to me that this might have pedagogical value as an example
+(although imperfect) of the value of whitespace and comments, and a
+starting point for that discussion. It certainly helped _me_ understand
+what's going on. You are welcome to it, as-is or modified (subject to
+Davide's constraints, of course, which I think I have met).
+
+If I were to include it in a future edition, I would put it at some
+distance from section 13.3.11, say, as a note or an appendix, so as not to
+be a "spoiler" to the puzzle.
+
+Best regards,
+--
+Chris Johansen {johansen at main dot nc dot us}
+ . . . collapsing the probability wave function, sending ripples of
+certainty through the space-time continuum.
+
+
+#! /usr/bin/gawk -f
+
+# From "13.3.11 And Now For Something Completely Different"
+# http://www.gnu.org/software/gawk/manual/html_node/Signature-Program.html#Signature-Program
+
+# Copyright © 2008 Davide Brini
+
+# Copying and distribution of the code published in this page, with
+# or without modification, are permitted in any medium without
+# royalty provided the copyright notice and this notice are preserved.
+
+BEGIN {
+ O = "~" ~ "~"; # 1
+ o = "==" == "=="; # 1
+ o += +o; # 2
+ x = O "" O; # 11
+
+
+ while ( X++ <= x + o + o ) c = c "%c";
+
+ # O is 1
+ # o is 2
+ # x is 11
+ # X is 17
+ # c is "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c"
+
+ printf c,
+ ( x - O )*( x - O), # 100 d
+ x*( x - o ) - o, # 97 a
+ x*( x - O ) + x - O - o, # 118 v
+ +x*( x - O ) - x + o, # 101 e
+ X*( o*o + O ) + x - O, # 95 _
+ X*( X - x ) - o*o, # 98 b
+ ( x + X )*o*o + o, # 114 r
+ x*( X - x ) - O - O, # 64 @
+ x - O + ( O + o + X + x )*( o + O ), # 103 g
+ X*X - X*( x - O ) - x + O, # 109 m
+ O + X*( o*( o + O ) + O ), # 120 x
+ +x + O + X*o, # 46 .
+ x*( x - o), # 99 c
+ ( o + X + x )*o*o - ( x - O - O ), # 111 0
+ O + ( X - x )*( X + O ), # 109 m
+ x - O # 10 \n
+}
+@end ignore
+
+@node Programs Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The programs provided in this @value{CHAPTER}
+continue on the theme that reading programs is an excellent way to learn
+Good Programming.
+
+@item
+Using @samp{#!} to make @command{awk} programs directly runnable makes
+them easier to use. Otherwise, invoke the program using @samp{awk
+-f @dots{}}.
+
+@item
+Reimplementing standard POSIX programs in @command{awk} is a pleasant
+exercise; @command{awk}'s expressive power lets you write such programs
+in relatively few lines of code, yet they are functionally complete
+and usable.
+
+@item
+One of standard @command{awk}'s weaknesses is working with individual
+characters. The ability to use @code{split()} with the empty string as
+the separator can considerably simplify such tasks.
+
+@item
+The library functions from @ref{Library Functions}, proved their
+usefulness for a number of real (if small) programs.
+
+@item
+Besides reinventing POSIX wheels, other programs solved a selection of
+interesting problems, such as finding duplicates words in text, printing
+mailing labels, and finding anagrams.
+
+@end itemize
+
+@c EXCLUDE START
+@node Programs Exercises
+@section Exercises
+
+@enumerate
+@item
+Rewrite @file{cut.awk} (@pxref{Cut Program})
+using @code{split()} with @code{""} as the separator.
+
+@item
+In @ref{Egrep Program}, we mentioned that @samp{egrep -i} could be
+simulated in versions of @command{awk} without @code{IGNORECASE} by
+using @code{tolower()} on the line and the pattern. In a footnote there,
+we also mentioned that this solution has a bug: the translated line is
+output, and not the original one. Fix this problem.
+@c Exercise: Fix this, w/array and new line as key to original line
+
+@item
+The POSIX version of @command{id} takes options that control which
+information is printed. Modify the @command{awk} version
+(@pxref{Id Program}) to accept the same arguments and perform in the
+same way.
+
+@item
+The @code{split.awk} program (@pxref{Split Program}) assumes
+that letters are contiguous in the character set,
+which isn't true for EBCDIC systems.
+Fix this problem.
+(Hint: Consider a different way to work through the alphabet,
+without relying on @code{ord()} and @code{chr()}.)
+
+@item
+In @file{uniq.awk} (@pxref{Uniq Program}, the
+logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state machines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Rewrite the logic to follow this
+suggestion.
+
+
+@item
+Why can't the @file{wc.awk} program (@pxref{Wc Program}) just
+use the value of @code{FNR} in @code{endfile()}?
+Hint: Examine the code in @ref{Filetrans Function}.
+
+@ignore
+@command{wc} can't just use the value of @code{FNR} in
+@code{endfile()}. If you examine the code in @ref{Filetrans Function},
+you will see that @code{FNR} has already been reset by the time
+@code{endfile()} is called.
+@end ignore
+
+@item
+Manipulation of individual characters in the @command{translate} program
+(@pxref{Translate Program}) is painful using standard @command{awk}
+functions. Given that @command{gawk} can split strings into individual
+characters using @code{""} as the separator, how might you use this
+feature to simplify the program?
+
+@item
+The @file{extract.awk} program (@pxref{Extract Program}) was written
+before @command{gawk} had the @code{gensub()} function. Use it
+to simplify the code.
+
+@item
+Compare the performance of the @file{awksed.awk} program
+(@pxref{Simple Sed}) with the more straightforward:
+
+@example
+BEGIN @{
+ pat = ARGV[1]
+ repl = ARGV[2]
+ ARGV[1] = ARGV[2] = ""
+@}
+
+@{ gsub(pat, repl); print @}
+@end example
+
+@item
+What are the advantages and disadvantages of @file{awksed.awk} versus
+the real @command{sed} utility?
+
+@ignore
+ Advantage: egrep regexps
+ speed (?)
+ Disadvantage: no & in replacement text
+
+Others?
+@end ignore
+
+@item
+In @ref{Igawk Program}, we mentioned that not trying to save the line
+read with @code{getline} in the @code{pathto()} function when testing
+for the file's accessibility for use with the main program simplifies
+things considerably. What problem does this engender though?
+@c answer, reading from "-" or /dev/stdin
+
+@cindex search paths
+@cindex search paths, for source files
+@cindex source files@comma{} search path for
+@cindex files, source@comma{} search path for
+@cindex directories, searching
+@item
+As an additional example of the idea that it is not always necessary to
+add new features to a program, consider the idea of having two files in
+a directory in the search path:
+
+@table @file
+@item default.awk
+This file contains a set of default library functions, such
+as @code{getopt()} and @code{assert()}.
+
+@item site.awk
+This file contains library functions that are specific to a site or
+installation; i.e., locally developed functions.
+Having a separate file allows @file{default.awk} to change with
+new @command{gawk} releases, without requiring the system administrator to
+update it each time by adding the local functions.
+@end table
+
+One user
+@c Karl Berry, karl@ileaf.com, 10/95
+suggested that @command{gawk} be modified to automatically read these files
+upon startup. Instead, it would be very simple to modify @command{igawk}
+to do this. Since @command{igawk} can process nested @code{@@include}
+directives, @file{default.awk} could simply contain @code{@@include}
+statements for the desired library functions.
+Make this change.
+
+@item
+Modify @file{anagram.awk} (@pxref{Anagram Program}), to avoid
+the use of the external @command{sort} utility.
+
+@end enumerate
+@c EXCLUDE END
+
+@ifnotinfo
+@part @value{PART3}Moving Beyond Standard @command{awk} with @command{gawk}
+@end ifnotinfo
+
+@ifdocbook
+Part III focuses on features specific to @command{gawk}.
+It contains the following chapters:
+
+@itemize @value{BULLET}
+@item
+@ref{Advanced Features}
+
+@item
+@ref{Internationalization}
+
+@item
+@ref{Debugger}
+
+@item
+@ref{Arbitrary Precision Arithmetic}
+
+@item
+@ref{Dynamic Extensions}
+@end itemize
+@end ifdocbook
+
+@node Advanced Features
+@chapter Advanced Features of @command{gawk}
+@cindex @command{gawk}, features, advanced
+@cindex advanced features, @command{gawk}
+@ignore
+Contributed by: Peter Langston <pud!psl@bellcore.bellcore.com>
+
+ Found in Steve English's "signature" line:
+
+"Write documentation as if whoever reads it is a violent psychopath
+who knows where you live."
+@end ignore
+@cindex Langston, Peter
+@cindex English, Steve
+@quotation
+@i{Write documentation as if whoever reads it is
+a violent psychopath who knows where you live.}
+@author Steve English, as quoted by Peter Langston
+@end quotation
+
+This @value{CHAPTER} discusses advanced features in @command{gawk}.
+It's a bit of a ``grab bag'' of items that are otherwise unrelated
+to each other.
+First, a command-line option allows @command{gawk} to recognize
+nondecimal numbers in input data, not just in @command{awk}
+programs.
+Then, @command{gawk}'s special features for sorting arrays are presented.
+Next, two-way I/O, discussed briefly in earlier parts of this
+@value{DOCUMENT}, is described in full detail, along with the basics
+of TCP/IP networking. Finally, @command{gawk}
+can @dfn{profile} an @command{awk} program, making it possible to tune
+it for performance.
+
+@c FULLXREF ON
+A number of advanced features require separate @value{CHAPTER}s of their
+own:
+
+@itemize @value{BULLET}
+@item
+@ref{Internationalization}, discusses how to internationalize
+your @command{awk} programs, so that they can speak multiple
+national languages.
+
+@item
+@ref{Debugger}, describes @command{gawk}'s built-in command-line
+debugger for debugging @command{awk} programs.
+
+@item
+@ref{Arbitrary Precision Arithmetic}, describes how you can use
+@command{gawk} to perform arbitrary-precision arithmetic.
+
+@item
+@ref{Dynamic Extensions},
+discusses the ability to dynamically add new built-in functions to
+@command{gawk}.
+@end itemize
+@c FULLXREF OFF
+
+@menu
+* Nondecimal Data:: Allowing nondecimal input data.
+* Array Sorting:: Facilities for controlling array traversal and
+ sorting arrays.
+* Two-way I/O:: Two-way communications with another process.
+* TCP/IP Networking:: Using @command{gawk} for network programming.
+* Profiling:: Profiling your @command{awk} programs.
+* Advanced Features Summary:: Summary of advanced features.
+@end menu
+
+@node Nondecimal Data
+@section Allowing Nondecimal Input Data
+@cindex @option{--non-decimal-data} option
+@cindex advanced features, nondecimal input data
+@cindex input, data@comma{} nondecimal
+@cindex constants, nondecimal
+
+If you run @command{gawk} with the @option{--non-decimal-data} option,
+you can have nondecimal values in your input data:
+
+@example
+$ @kbd{echo 0123 123 0x123 |}
+> @kbd{gawk --non-decimal-data '@{ printf "%d, %d, %d\n", $1, $2, $3 @}'}
+@print{} 83, 123, 291
+@end example
+
+For this feature to work, write your program so that
+@command{gawk} treats your data as numeric:
+
+@example
+$ @kbd{echo 0123 123 0x123 | gawk '@{ print $1, $2, $3 @}'}
+@print{} 0123 123 0x123
+@end example
+
+@noindent
+The @code{print} statement treats its expressions as strings.
+Although the fields can act as numbers when necessary,
+they are still strings, so @code{print} does not try to treat them
+numerically. You need to add zero to a field to force it to
+be treated as a number. For example:
+
+@example
+$ @kbd{echo 0123 123 0x123 | gawk --non-decimal-data '}
+> @kbd{@{ print $1, $2, $3}
+> @kbd{print $1 + 0, $2 + 0, $3 + 0 @}'}
+@print{} 0123 123 0x123
+@print{} 83 123 291
+@end example
+
+Because it is common to have decimal data with leading zeros, and because
+using this facility could lead to surprising results, the default is to leave it
+disabled. If you want it, you must explicitly request it.
+
+@cindex programming conventions, @code{--non-decimal-data} option
+@cindex @option{--non-decimal-data} option, @code{strtonum()} function and
+@cindex @code{strtonum()} function (@command{gawk}), @code{--non-decimal-data} option and
+@quotation CAUTION
+@emph{Use of this option is not recommended.}
+It can break old programs very badly.
+Instead, use the @code{strtonum()} function to convert your data
+(@pxref{String Functions}).
+This makes your programs easier to write and easier to read, and
+leads to less surprising results.
+
+This option may disappear in a future version of @command{gawk}.
+@end quotation
+
+@node Array Sorting
+@section Controlling Array Traversal and Array Sorting
+
+@command{gawk} lets you control the order in which a @samp{for (i in array)}
+loop traverses an array.
+
+In addition, two built-in functions, @code{asort()} and @code{asorti()},
+let you sort arrays based on the array values and indices, respectively.
+These two functions also provide control over the sorting criteria used
+to order the elements during sorting.
+
+@menu
+* Controlling Array Traversal:: How to use PROCINFO["sorted_in"].
+* Array Sorting Functions:: How to use @code{asort()} and @code{asorti()}.
+@end menu
+
+@node Controlling Array Traversal
+@subsection Controlling Array Traversal
+
+By default, the order in which a @samp{for (i in array)} loop
+scans an array is not defined; it is generally based upon
+the internal implementation of arrays inside @command{awk}.
+
+Often, though, it is desirable to be able to loop over the elements
+in a particular order that you, the programmer, choose. @command{gawk}
+lets you do this.
+
+@DBREF{Controlling Scanning} describes how you can assign special,
+predefined values to @code{PROCINFO["sorted_in"]} in order to
+control the order in which @command{gawk} traverses an array
+during a @code{for} loop.
+
+In addition, the value of @code{PROCINFO["sorted_in"]} can be a
+function name.@footnote{This is why the predefined sorting orders
+start with an @samp{@@} character, which cannot be part of an identifier.}
+This lets you traverse an array based on any custom criterion.
+The array elements are ordered according to the return value of this
+function. The comparison function should be defined with at least
+four arguments:
+
+@example
+function comp_func(i1, v1, i2, v2)
+@{
+ @var{compare elements 1 and 2 in some fashion}
+ @var{return < 0; 0; or > 0}
+@}
+@end example
+
+Here, @var{i1} and @var{i2} are the indices, and @var{v1} and @var{v2}
+are the corresponding values of the two elements being compared.
+Either @var{v1} or @var{v2}, or both, can be arrays if the array being
+traversed contains subarrays as values.
+(@DBXREF{Arrays of Arrays} for more information about subarrays.)
+The three possible return values are interpreted as follows:
+
+@table @code
+@item comp_func(i1, v1, i2, v2) < 0
+Index @var{i1} comes before index @var{i2} during loop traversal.
+
+@item comp_func(i1, v1, i2, v2) == 0
+Indices @var{i1} and @var{i2}
+come together but the relative order with respect to each other is undefined.
+
+@item comp_func(i1, v1, i2, v2) > 0
+Index @var{i1} comes after index @var{i2} during loop traversal.
+@end table
+
+Our first comparison function can be used to scan an array in
+numerical order of the indices:
+
+@example
+function cmp_num_idx(i1, v1, i2, v2)
+@{
+ # numerical index comparison, ascending order
+ return (i1 - i2)
+@}
+@end example
+
+Our second function traverses an array based on the string order of
+the element values rather than by indices:
+
+@example
+function cmp_str_val(i1, v1, i2, v2)
+@{
+ # string value comparison, ascending order
+ v1 = v1 ""
+ v2 = v2 ""
+ if (v1 < v2)
+ return -1
+ return (v1 != v2)
+@}
+@end example
+
+The third
+comparison function makes all numbers, and numeric strings without
+any leading or trailing spaces, come out first during loop traversal:
+
+@example
+function cmp_num_str_val(i1, v1, i2, v2, n1, n2)
+@{
+ # numbers before string value comparison, ascending order
+ n1 = v1 + 0
+ n2 = v2 + 0
+ if (n1 == v1)
+ return (n2 == v2) ? (n1 - n2) : -1
+ else if (n2 == v2)
+ return 1
+ return (v1 < v2) ? -1 : (v1 != v2)
+@}
+@end example
+
+Here is a main program to demonstrate how @command{gawk}
+behaves using each of the previous functions:
+
+@example
+BEGIN @{
+ data["one"] = 10
+ data["two"] = 20
+ data[10] = "one"
+ data[100] = 100
+ data[20] = "two"
+
+ f[1] = "cmp_num_idx"
+ f[2] = "cmp_str_val"
+ f[3] = "cmp_num_str_val"
+ for (i = 1; i <= 3; i++) @{
+ printf("Sort function: %s\n", f[i])
+ PROCINFO["sorted_in"] = f[i]
+ for (j in data)
+ printf("\tdata[%s] = %s\n", j, data[j])
+ print ""
+ @}
+@}
+@end example
+
+Here are the results when the program is run:
+
+@example
+$ @kbd{gawk -f compdemo.awk}
+@print{} Sort function: cmp_num_idx @ii{Sort by numeric index}
+@print{} data[two] = 20
+@print{} data[one] = 10 @ii{Both strings are numerically zero}
+@print{} data[10] = one
+@print{} data[20] = two
+@print{} data[100] = 100
+@print{}
+@print{} Sort function: cmp_str_val @ii{Sort by element values as strings}
+@print{} data[one] = 10
+@print{} data[100] = 100 @ii{String 100 is less than string 20}
+@print{} data[two] = 20
+@print{} data[10] = one
+@print{} data[20] = two
+@print{}
+@print{} Sort function: cmp_num_str_val @ii{Sort all numeric values before all strings}
+@print{} data[one] = 10
+@print{} data[two] = 20
+@print{} data[100] = 100
+@print{} data[10] = one
+@print{} data[20] = two
+@end example
+
+Consider sorting the entries of a GNU/Linux system password file
+according to login name. The following program sorts records
+by a specific field position and can be used for this purpose:
+
+@example
+# passwd-sort.awk --- simple program to sort by field position
+# field position is specified by the global variable POS
+
+function cmp_field(i1, v1, i2, v2)
+@{
+ # comparison by value, as string, and ascending order
+ return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS])
+@}
+
+@{
+ for (i = 1; i <= NF; i++)
+ a[NR][i] = $i
+@}
+
+END @{
+ PROCINFO["sorted_in"] = "cmp_field"
+ if (POS < 1 || POS > NF)
+ POS = 1
+ for (i in a) @{
+ for (j = 1; j <= NF; j++)
+ printf("%s%c", a[i][j], j < NF ? ":" : "")
+ print ""
+ @}
+@}
+@end example
+
+The first field in each entry of the password file is the user's login name,
+and the fields are separated by colons.
+Each record defines a subarray,
+with each field as an element in the subarray.
+Running the program produces the
+following output:
+
+@example
+$ @kbd{gawk -v POS=1 -F: -f sort.awk /etc/passwd}
+@print{} adm:x:3:4:adm:/var/adm:/sbin/nologin
+@print{} apache:x:48:48:Apache:/var/www:/sbin/nologin
+@print{} avahi:x:70:70:Avahi daemon:/:/sbin/nologin
+@dots{}
+@end example
+
+The comparison should normally always return the same value when given a
+specific pair of array elements as its arguments. If inconsistent
+results are returned, then the order is undefined. This behavior can be
+exploited to introduce random order into otherwise seemingly
+ordered data:
+
+@example
+function cmp_randomize(i1, v1, i2, v2)
+@{
+ # random order (caution: this may never terminate!)
+ return (2 - 4 * rand())
+@}
+@end example
+
+As already mentioned, the order of the indices is arbitrary if two
+elements compare equal. This is usually not a problem, but letting
+the tied elements come out in arbitrary order can be an issue, especially
+when comparing item values. The partial ordering of the equal elements
+may change the next time the array is traversed, if other elements are added or
+removed from the array. One way to resolve ties when comparing elements
+with otherwise equal values is to include the indices in the comparison
+rules. Note that doing this may make the loop traversal less efficient,
+so consider it only if necessary. The following comparison functions
+force a deterministic order, and are based on the fact that the
+(string) indices of two elements are never equal:
+
+@example
+function cmp_numeric(i1, v1, i2, v2)
+@{
+ # numerical value (and index) comparison, descending order
+ return (v1 != v2) ? (v2 - v1) : (i2 - i1)
+@}
+
+function cmp_string(i1, v1, i2, v2)
+@{
+ # string value (and index) comparison, descending order
+ v1 = v1 i1
+ v2 = v2 i2
+ return (v1 > v2) ? -1 : (v1 != v2)
+@}
+@end example
+
+@c Avoid using the term ``stable'' when describing the unpredictable behavior
+@c if two items compare equal. Usually, the goal of a "stable algorithm"
+@c is to maintain the original order of the items, which is a meaningless
+@c concept for a list constructed from a hash.
+
+A custom comparison function can often simplify ordered loop
+traversal, and the sky is really the limit when it comes to
+designing such a function.
+
+When string comparisons are made during a sort, either for element
+values where one or both aren't numbers, or for element indices
+handled as strings, the value of @code{IGNORECASE}
+(@pxref{Built-in Variables}) controls whether
+the comparisons treat corresponding upper- and lowercase letters as
+equivalent or distinct.
+
+Another point to keep in mind is that in the case of subarrays,
+the element values can themselves be arrays; a production comparison
+function should use the @code{isarray()} function
+(@pxref{Type Functions}),
+to check for this, and choose a defined sorting order for subarrays.
+
+All sorting based on @code{PROCINFO["sorted_in"]}
+is disabled in POSIX mode,
+because the @code{PROCINFO} array is not special in that case.
+
+As a side note, sorting the array indices before traversing
+the array has been reported to add 15% to 20% overhead to the
+execution time of @command{awk} programs. For this reason,
+sorted array traversal is not the default.
+
+@c The @command{gawk}
+@c maintainers believe that only the people who wish to use a
+@c feature should have to pay for it.
+
+@node Array Sorting Functions
+@subsection Sorting Array Values and Indices with @command{gawk}
+
+@cindex arrays, sorting
+@cindexgawkfunc{asort}
+@cindex @code{asort()} function (@command{gawk}), arrays@comma{} sorting
+@cindexgawkfunc{asorti}
+@cindex @code{asorti()} function (@command{gawk}), arrays@comma{} sorting
+@cindex sort function, arrays, sorting
+In most @command{awk} implementations, sorting an array requires writing
+a @code{sort()} function. This can be educational for exploring
+different sorting algorithms, but usually that's not the point of the program.
+@command{gawk} provides the built-in @code{asort()} and @code{asorti()}
+functions (@pxref{String Functions}) for sorting arrays. For example:
+
+@example
+@var{populate the array} data
+n = asort(data)
+for (i = 1; i <= n; i++)
+ @var{do something with} data[i]
+@end example
+
+After the call to @code{asort()}, the array @code{data} is indexed from 1
+to some number @var{n}, the total number of elements in @code{data}.
+(This count is @code{asort()}'s return value.)
+@code{data[1]} @value{LEQ} @code{data[2]} @value{LEQ} @code{data[3]}, and so on.
+The default comparison is based on the type of the elements
+(@pxref{Typing and Comparison}).
+All numeric values come before all string values,
+which in turn come before all subarrays.
+
+@cindex side effects, @code{asort()} function
+An important side effect of calling @code{asort()} is that
+@emph{the array's original indices are irrevocably lost}.
+As this isn't always desirable, @code{asort()} accepts a
+second argument:
+
+@example
+@var{populate the array} source
+n = asort(source, dest)
+for (i = 1; i <= n; i++)
+ @var{do something with} dest[i]
+@end example
+
+In this case, @command{gawk} copies the @code{source} array into the
+@code{dest} array and then sorts @code{dest}, destroying its indices.
+However, the @code{source} array is not affected.
+
+Often, what's needed is to sort on the values of the @emph{indices}
+instead of the values of the elements. To do that, use the
+@code{asorti()} function. The interface and behavior are identical to
+that of @code{asort()}, except that the index values are used for sorting,
+and become the values of the result array:
+
+@example
+@{ source[$0] = some_func($0) @}
+
+END @{
+ n = asorti(source, dest)
+ for (i = 1; i <= n; i++) @{
+ @ii{Work with sorted indices directly:}
+ @var{do something with} dest[i]
+ @dots{}
+ @ii{Access original array via sorted indices:}
+ @var{do something with} source[dest[i]]
+ @}
+@}
+@end example
+
+So far, so good. Now it starts to get interesting. Both @code{asort()}
+and @code{asorti()} accept a third string argument to control comparison
+of array elements. When we introduced @code{asort()} and @code{asorti()}
+in @ref{String Functions}, we ignored this third argument; however,
+now is the time to describe how this argument affects these two functions.
+
+Basically, the third argument specifies how the array is to be sorted.
+There are two possibilities. As with @code{PROCINFO["sorted_in"]},
+this argument may be one of the predefined names that @command{gawk}
+provides (@pxref{Controlling Scanning}), or it may be the name of a
+user-defined function (@pxref{Controlling Array Traversal}).
+
+In the latter case, @emph{the function can compare elements in any way
+it chooses}, taking into account just the indices, just the values,
+or both. This is extremely powerful.
+
+Once the array is sorted, @code{asort()} takes the @emph{values} in
+their final order, and uses them to fill in the result array, whereas
+@code{asorti()} takes the @emph{indices} in their final order, and uses
+them to fill in the result array.
+
+@cindex reference counting, sorting arrays
+@quotation NOTE
+Copying array indices and elements isn't expensive in terms of memory.
+Internally, @command{gawk} maintains @dfn{reference counts} to data.
+For example, when @code{asort()} copies the first array to the second one,
+there is only one copy of the original array elements' data, even though
+both arrays use the values.
+@end quotation
+
+@c Document It And Call It A Feature. Sigh.
+@cindex @command{gawk}, @code{IGNORECASE} variable in
+@cindex arrays, sorting, and @code{IGNORECASE} variable
+@cindex @code{IGNORECASE} variable, and array sorting functions
+Because @code{IGNORECASE} affects string comparisons, the value
+of @code{IGNORECASE} also affects sorting for both @code{asort()} and @code{asorti()}.
+Note also that the locale's sorting order does @emph{not}
+come into play; comparisons are based on character values only.@footnote{This
+is true because locale-based comparison occurs only when in
+POSIX-compatibility mode, and because @code{asort()} and @code{asorti()} are
+@command{gawk} extensions, they are not available in that case.}
+
+@node Two-way I/O
+@section Two-Way Communications with Another Process
+
+@c 8/2014. Neither Mike nor BWK saw this as relevant. Commenting it out.
+@ignore
+@cindex Brennan, Michael
+@cindex programmers, attractiveness of
+@smallexample
+@c Path: cssun.mathcs.emory.edu!gatech!newsxfer3.itd.umich.edu!news-peer.sprintlink.net!news-sea-19.sprintlink.net!news-in-west.sprintlink.net!news.sprintlink.net!Sprint!204.94.52.5!news.whidbey.com!brennan
+From: brennan@@whidbey.com (Mike Brennan)
+Newsgroups: comp.lang.awk
+Subject: Re: Learn the SECRET to Attract Women Easily
+Date: 4 Aug 1997 17:34:46 GMT
+@c Organization: WhidbeyNet
+@c Lines: 12
+Message-ID: <5s53rm$eca@@news.whidbey.com>
+@c References: <5s20dn$2e1@chronicle.concentric.net>
+@c Reply-To: brennan@whidbey.com
+@c NNTP-Posting-Host: asn202.whidbey.com
+@c X-Newsreader: slrn (0.9.4.1 UNIX)
+@c Xref: cssun.mathcs.emory.edu comp.lang.awk:5403
+
+On 3 Aug 1997 13:17:43 GMT, Want More Dates???
+<tracy78@@kilgrona.com> wrote:
+>Learn the SECRET to Attract Women Easily
+>
+>The SCENT(tm) Pheromone Sex Attractant For Men to Attract Women
+
+The scent of awk programmers is a lot more attractive to women than
+the scent of perl programmers.
+--
+Mike Brennan
+@c brennan@@whidbey.com
+@end smallexample
+@end ignore
+
+@cindex advanced features, processes@comma{} communicating with
+@cindex processes, two-way communications with
+It is often useful to be able to
+send data to a separate program for
+processing and then read the result. This can always be
+done with temporary files:
+
+@example
+# Write the data for processing
+tempfile = ("mydata." PROCINFO["pid"])
+while (@var{not done with data})
+ print @var{data} | ("subprogram > " tempfile)
+close("subprogram > " tempfile)
+
+# Read the results, remove tempfile when done
+while ((getline newdata < tempfile) > 0)
+ @var{process} newdata @var{appropriately}
+close(tempfile)
+system("rm " tempfile)
+@end example
+
+@noindent
+This works, but not elegantly. Among other things, it requires that
+the program be run in a directory that cannot be shared among users;
+for example, @file{/tmp} will not do, as another user might happen
+to be using a temporary file with the same name.@footnote{Michael
+Brennan suggests the use of @command{rand()} to generate unique
+@value{FN}s. This is a valid point; nevertheless, temporary files
+remain more difficult to use than two-way pipes.} @c 8/2014
+
+@cindex coprocesses
+@cindex input/output, two-way
+@cindex @code{|} (vertical bar), @code{|&} operator (I/O)
+@cindex vertical bar (@code{|}), @code{|&} operator (I/O)
+@cindex @command{csh} utility, @code{|&} operator, comparison with
+However, with @command{gawk}, it is possible to
+open a @emph{two-way} pipe to another process. The second process is
+termed a @dfn{coprocess}, as it runs in parallel with @command{gawk}.
+The two-way connection is created using the @samp{|&} operator
+(borrowed from the Korn shell, @command{ksh}):@footnote{This is very
+different from the same operator in the C shell and in Bash.}
+
+@example
+do @{
+ print @var{data} |& "subprogram"
+ "subprogram" |& getline results
+@} while (@var{data left to process})
+close("subprogram")
+@end example
+
+The first time an I/O operation is executed using the @samp{|&}
+operator, @command{gawk} creates a two-way pipeline to a child process
+that runs the other program. Output created with @code{print}
+or @code{printf} is written to the program's standard input, and
+output from the program's standard output can be read by the @command{gawk}
+program using @code{getline}.
+As is the case with processes started by @samp{|}, the subprogram
+can be any program, or pipeline of programs, that can be started by
+the shell.
+
+There are some cautionary items to be aware of:
+
+@itemize @value{BULLET}
+@item
+As the code inside @command{gawk} currently stands, the coprocess's
+standard error goes to the same place that the parent @command{gawk}'s
+standard error goes. It is not possible to read the child's
+standard error separately.
+
+@cindex deadlocks
+@cindex buffering, input/output
+@cindex @code{getline} command, deadlock and
+@item
+I/O buffering may be a problem. @command{gawk} automatically
+flushes all output down the pipe to the coprocess.
+However, if the coprocess does not flush its output,
+@command{gawk} may hang when doing a @code{getline} in order to read
+the coprocess's results. This could lead to a situation
+known as @dfn{deadlock}, where each process is waiting for the
+other one to do something.
+@end itemize
+
+@cindex @code{close()} function, two-way pipes and
+It is possible to close just one end of the two-way pipe to
+a coprocess, by supplying a second argument to the @code{close()}
+function of either @code{"to"} or @code{"from"}
+(@pxref{Close Files And Pipes}).
+These strings tell @command{gawk} to close the end of the pipe
+that sends data to the coprocess or the end that reads from it,
+respectively.
+
+@cindex @command{sort} utility, coprocesses and
+This is particularly necessary in order to use
+the system @command{sort} utility as part of a coprocess;
+@command{sort} must read @emph{all} of its input
+data before it can produce any output.
+The @command{sort} program does not receive an end-of-file indication
+until @command{gawk} closes the write end of the pipe.
+
+When you have finished writing data to the @command{sort}
+utility, you can close the @code{"to"} end of the pipe, and
+then start reading sorted data via @code{getline}.
+For example:
+
+@example
+BEGIN @{
+ command = "LC_ALL=C sort"
+ n = split("abcdefghijklmnopqrstuvwxyz", a, "")
+
+ for (i = n; i > 0; i--)
+ print a[i] |& command
+ close(command, "to")
+
+ while ((command |& getline line) > 0)
+ print "got", line
+ close(command)
+@}
+@end example
+
+This program writes the letters of the alphabet in reverse order, one
+per line, down the two-way pipe to @command{sort}. It then closes the
+write end of the pipe, so that @command{sort} receives an end-of-file
+indication. This causes @command{sort} to sort the data and write the
+sorted data back to the @command{gawk} program. Once all of the data
+has been read, @command{gawk} terminates the coprocess and exits.
+
+As a side note, the assignment @samp{LC_ALL=C} in the @command{sort}
+command ensures traditional Unix (ASCII) sorting from @command{sort}.
+This is not strictly necessary here, but it's good to know how to do this.
+
+@cindex @command{gawk}, @code{PROCINFO} array in
+@cindex @code{PROCINFO} array, and communications via ptys
+You may also use pseudo-ttys (ptys) for
+two-way communication instead of pipes, if your system supports them.
+This is done on a per-command basis, by setting a special element
+in the @code{PROCINFO} array
+(@pxref{Auto-set}),
+like so:
+
+@example
+command = "sort -nr" # command, save in convenience variable
+PROCINFO[command, "pty"] = 1 # update PROCINFO
+print @dots{} |& command # start two-way pipe
+@dots{}
+@end example
+
+@noindent
+Using ptys usually avoids the buffer deadlock issues described earlier, at some
+loss in performance. If your system does not have ptys, or if all the
+system's ptys are in use, @command{gawk} automatically falls back to
+using regular pipes.
+
+@node TCP/IP Networking
+@section Using @command{gawk} for Network Programming
+@cindex advanced features, network programming
+@cindex networks, programming
+@cindex TCP/IP
+@cindex @code{/inet/@dots{}} special files (@command{gawk})
+@cindex files, @code{/inet/@dots{}} (@command{gawk})
+@cindex @code{/inet4/@dots{}} special files (@command{gawk})
+@cindex files, @code{/inet4/@dots{}} (@command{gawk})
+@cindex @code{/inet6/@dots{}} special files (@command{gawk})
+@cindex files, @code{/inet6/@dots{}} (@command{gawk})
+@cindex @code{EMISTERED}
+@ifnotdocbook
+@quotation
+@code{EMRED}:@*
+@ @ @ @ @i{A host is a host from coast to coast,@*
+@ @ @ @ and nobody talks to a host that's close,@*
+@ @ @ @ unless the host that isn't close@*
+@ @ @ @ is busy, hung, or dead.}
+@author Mike O'Brien (aka Mr.@: Protocol)
+@end quotation
+@end ifnotdocbook
+
+@docbook
+<blockquote>
+<attribution>Mike O'Brien (aka Mr.&nbsp;Protocol)</attribution>
+<literallayout class="normal"><literal>EMISTERED</literal>:
+&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to coast,</emphasis>
+&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's close,</emphasis>
+&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
+&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>is busy, hung, or dead.</emphasis></literallayout>
+</blockquote>
+@end docbook
+
+In addition to being able to open a two-way pipeline to a coprocess
+on the same system
+(@pxref{Two-way I/O}),
+it is possible to make a two-way connection to
+another process on another system across an IP network connection.
+
+You can think of this as just a @emph{very long} two-way pipeline to
+a coprocess.
+The way @command{gawk} decides that you want to use TCP/IP networking is
+by recognizing special @value{FN}s that begin with one of @samp{/inet/},
+@samp{/inet4/}, or @samp{/inet6/}.
+
+The full syntax of the special @value{FN} is
+@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}.
+The components are:
+
+@table @var
+@item net-type
+Specifies the kind of Internet connection to make.
+Use @samp{/inet4/} to force IPv4, and
+@samp{/inet6/} to force IPv6.
+Plain @samp{/inet/} (which used to be the only option) uses
+the system default, most likely IPv4.
+
+@item protocol
+The protocol to use over IP. This must be either @samp{tcp}, or
+@samp{udp}, for a TCP or UDP IP connection,
+respectively. TCP should be used for most applications.
+
+@item local-port
+@cindex @code{getaddrinfo()} function (C library)
+The local TCP or UDP port number to use. Use a port number of @samp{0}
+when you want the system to pick a port. This is what you should do
+when writing a TCP or UDP client.
+You may also use a well-known service name, such as @samp{smtp}
+or @samp{http}, in which case @command{gawk} attempts to determine
+the predefined port number using the C @code{getaddrinfo()} function.
+
+@item remote-host
+The IP address or fully qualified domain name of the Internet
+host to which you want to connect.
+
+@item remote-port
+The TCP or UDP port number to use on the given @var{remote-host}.
+Again, use @samp{0} if you don't care, or else a well-known
+service name.
+@end table
+
+@cindex @command{gawk}, @code{ERRNO} variable in
+@cindex @code{ERRNO} variable
+@quotation NOTE
+Failure in opening a two-way socket will result in a non-fatal error
+being returned to the calling code. The value of @code{ERRNO} indicates
+the error (@pxref{Auto-set}).
+@end quotation
+
+Consider the following very simple example:
+
+@example
+BEGIN @{
+ Service = "/inet/tcp/0/localhost/daytime"
+ Service |& getline
+ print $0
+ close(Service)
+@}
+@end example
+
+This program reads the current date and time from the local system's
+TCP @samp{daytime} server.
+It then prints the results and closes the connection.
+
+Because this topic is extensive, the use of @command{gawk} for
+TCP/IP programming is documented separately.
+@ifinfo
+See
+@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}},
+@end ifinfo
+@ifnotinfo
+See
+@uref{http://www.gnu.org/software/gawk/manual/gawkinet/,
+@cite{TCP/IP Internetworking with @command{gawk}}},
+which comes as part of the @command{gawk} distribution,
+@end ifnotinfo
+for a much more complete introduction and discussion, as well as
+extensive examples.
+
+
+@node Profiling
+@section Profiling Your @command{awk} Programs
+@cindex @command{awk} programs, profiling
+@cindex profiling @command{awk} programs
+@cindex @code{awkprof.out} file
+@cindex files, @code{awkprof.out}
+
+You may produce execution traces of your @command{awk} programs.
+This is done by passing the option @option{--profile} to @command{gawk}.
+When @command{gawk} has finished running, it creates a profile of your program in a file
+named @file{awkprof.out}. Because it is profiling, it also executes up to 45% slower than
+@command{gawk} normally does.
+
+@cindex @option{--profile} option
+As shown in the following example,
+the @option{--profile} option can be used to change the name of the file
+where @command{gawk} will write the profile:
+
+@example
+gawk --profile=myprog.prof -f myprog.awk data1 data2
+@end example
+
+@noindent
+In the preceding example, @command{gawk} places the profile in
+@file{myprog.prof} instead of in @file{awkprof.out}.
+
+Here is a sample session showing a simple @command{awk} program,
+its input data, and the results from running @command{gawk} with the
+@option{--profile} option. First, the @command{awk} program:
+
+@example
+BEGIN @{ print "First BEGIN rule" @}
+
+END @{ print "First END rule" @}
+
+/foo/ @{
+ print "matched /foo/, gosh"
+ for (i = 1; i <= 3; i++)
+ sing()
+@}
+
+@{
+ if (/foo/)
+ print "if is true"
+ else
+ print "else is true"
+@}
+
+BEGIN @{ print "Second BEGIN rule" @}
+
+END @{ print "Second END rule" @}
+
+function sing( dummy)
+@{
+ print "I gotta be me!"
+@}
+@end example
+
+Following is the input data:
+
+@example
+foo
+bar
+baz
+foo
+junk
+@end example
+
+Here is the @file{awkprof.out} that results from running the
+@command{gawk} profiler on this program and data. (This example also
+illustrates that @command{awk} programmers sometimes get up very early
+in the morning to work.)
+
+@cindex @code{BEGIN} pattern, and profiling
+@cindex @code{END} pattern, and profiling
+@example
+ # gawk profile, created Mon Sep 29 05:16:21 2014
+
+ # BEGIN rule(s)
+
+ BEGIN @{
+ 1 print "First BEGIN rule"
+ @}
+
+ BEGIN @{
+ 1 print "Second BEGIN rule"
+ @}
+
+ # Rule(s)
+
+ 5 /foo/ @{ # 2
+ 2 print "matched /foo/, gosh"
+ 6 for (i = 1; i <= 3; i++) @{
+ 6 sing()
+ @}
+ @}
+
+ 5 @{
+ 5 if (/foo/) @{ # 2
+ 2 print "if is true"
+ 3 @} else @{
+ 3 print "else is true"
+ @}
+ @}
+
+ # END rule(s)
+
+ END @{
+ 1 print "First END rule"
+ @}
+
+ END @{
+ 1 print "Second END rule"
+ @}
+
+
+ # Functions, listed alphabetically
+
+ 6 function sing(dummy)
+ @{
+ 6 print "I gotta be me!"
+ @}
+@end example
+
+This example illustrates many of the basic features of profiling output.
+They are as follows:
+
+@itemize @value{BULLET}
+@item
+The program is printed in the order @code{BEGIN} rules,
+@code{BEGINFILE} rules,
+pattern/action rules,
+@code{ENDFILE} rules, @code{END} rules and functions, listed
+alphabetically.
+Multiple @code{BEGIN} and @code{END} rules retain their
+separate identities, as do
+multiple @code{BEGINFILE} and @code{ENDFILE} rules.
+
+@cindex patterns, counts, in a profile
+@item
+Pattern-action rules have two counts.
+The first count, to the left of the rule, shows how many times
+the rule's pattern was @emph{tested}.
+The second count, to the right of the rule's opening left brace
+in a comment,
+shows how many times the rule's action was @emph{executed}.
+The difference between the two indicates how many times the rule's
+pattern evaluated to false.
+
+@item
+Similarly,
+the count for an @code{if}-@code{else} statement shows how many times
+the condition was tested.
+To the right of the opening left brace for the @code{if}'s body
+is a count showing how many times the condition was true.
+The count for the @code{else}
+indicates how many times the test failed.
+
+@cindex loops, count for header, in a profile
+@item
+The count for a loop header (such as @code{for}
+or @code{while}) shows how many times the loop test was executed.
+(Because of this, you can't just look at the count on the first
+statement in a rule to determine how many times the rule was executed.
+If the first statement is a loop, the count is misleading.)
+
+@cindex functions, user-defined, counts, in a profile
+@cindex user-defined, functions, counts, in a profile
+@item
+For user-defined functions, the count next to the @code{function}
+keyword indicates how many times the function was called.
+The counts next to the statements in the body show how many times
+those statements were executed.
+
+@cindex @code{@{@}} (braces)
+@cindex braces (@code{@{@}})
+@item
+The layout uses ``K&R'' style with TABs.
+Braces are used everywhere, even when
+the body of an @code{if}, @code{else}, or loop is only a single statement.
+
+@cindex @code{()} (parentheses), in a profile
+@cindex parentheses @code{()}, in a profile
+@item
+Parentheses are used only where needed, as indicated by the structure
+of the program and the precedence rules.
+For example, @samp{(3 + 5) * 4} means add three and five, then multiply
+the total by four. However, @samp{3 + 5 * 4} has no parentheses, and
+means @samp{3 + (5 * 4)}.
+
+@ignore
+@item
+All string concatenations are parenthesized too.
+(This could be made a bit smarter.)
+@end ignore
+
+@item
+Parentheses are used around the arguments to @code{print}
+and @code{printf} only when
+the @code{print} or @code{printf} statement is followed by a redirection.
+Similarly, if
+the target of a redirection isn't a scalar, it gets parenthesized.
+
+@item
+@command{gawk} supplies leading comments in
+front of the @code{BEGIN} and @code{END} rules,
+the @code{BEGINFILE} and @code{ENDFILE} rules,
+the pattern/action rules, and the functions.
+
+@end itemize
+
+The profiled version of your program may not look exactly like what you
+typed when you wrote it. This is because @command{gawk} creates the
+profiled version by ``pretty printing'' its internal representation of
+the program. The advantage to this is that @command{gawk} can produce
+a standard representation.
+Also, things such as:
+
+@example
+/foo/
+@end example
+
+@noindent
+come out as:
+
+@example
+/foo/ @{
+ print $0
+@}
+@end example
+
+@noindent
+which is correct, but possibly unexpected.
+
+@cindex profiling @command{awk} programs, dynamically
+@cindex @command{gawk} program, dynamic profiling
+@cindex dynamic profiling
+Besides creating profiles when a program has completed,
+@command{gawk} can produce a profile while it is running.
+This is useful if your @command{awk} program goes into an
+infinite loop and you want to see what has been executed.
+To use this feature, run @command{gawk} with the @option{--profile}
+option in the background:
+
+@example
+$ @kbd{gawk --profile -f myprog &}
+[1] 13992
+@end example
+
+@cindex @command{kill} command@comma{} dynamic profiling
+@cindex @code{USR1} signal, for dynamic profiling
+@cindex @code{SIGUSR1} signal, for dynamic profiling
+@cindex signals, @code{USR1}/@code{SIGUSR1}, for profiling
+@noindent
+The shell prints a job number and process ID number; in this case, 13992.
+Use the @command{kill} command to send the @code{USR1} signal
+to @command{gawk}:
+
+@example
+$ @kbd{kill -USR1 13992}
+@end example
+
+@noindent
+As usual, the profiled version of the program is written to
+@file{awkprof.out}, or to a different file if one was specified with
+the @option{--profile} option.
+
+Along with the regular profile, as shown earlier, the profile file
+includes a trace of any active functions:
+
+@example
+# Function Call Stack:
+
+# 3. baz
+# 2. bar
+# 1. foo
+# -- main --
+@end example
+
+You may send @command{gawk} the @code{USR1} signal as many times as you like.
+Each time, the profile and function call trace are appended to the output
+profile file.
+
+@cindex @code{HUP} signal, for dynamic profiling
+@cindex @code{SIGHUP} signal, for dynamic profiling
+@cindex signals, @code{HUP}/@code{SIGHUP}, for profiling
+If you use the @code{HUP} signal instead of the @code{USR1} signal,
+@command{gawk} produces the profile and the function call trace and then exits.
+
+@cindex @code{INT} signal (MS-Windows)
+@cindex @code{SIGINT} signal (MS-Windows)
+@cindex signals, @code{INT}/@code{SIGINT} (MS-Windows)
+@cindex @code{QUIT} signal (MS-Windows)
+@cindex @code{SIGQUIT} signal (MS-Windows)
+@cindex signals, @code{QUIT}/@code{SIGQUIT} (MS-Windows)
+When @command{gawk} runs on MS-Windows systems, it uses the
+@code{INT} and @code{QUIT} signals for producing the profile and, in
+the case of the @code{INT} signal, @command{gawk} exits. This is
+because these systems don't support the @command{kill} command, so the
+only signals you can deliver to a program are those generated by the
+keyboard. The @code{INT} signal is generated by the
+@kbd{Ctrl-@key{C}} or @kbd{Ctrl-@key{BREAK}} key, while the
+@code{QUIT} signal is generated by the @kbd{Ctrl-@key{\}} key.
+
+Finally, @command{gawk} also accepts another option, @option{--pretty-print}.
+When called this way, @command{gawk} ``pretty prints'' the program into
+@file{awkprof.out}, without any execution counts.
+
+@quotation NOTE
+Once upon a time, the @option{--pretty-print} option would also run
+your program. This is is no longer the case.
+@end quotation
+
+There is a significant difference between the output created when
+profiling, and that created when pretty-printing. Pretty-printed output
+preserves the original comments that were in the program, although their
+placement may not correspond exactly to their original locations in the
+source code.
+
+However, as a deliberate design decision, profiling output @emph{omits}
+the original program's comments. This allows you to focus on the
+execution count data and helps you avoid the temptation to use the
+profiler for pretty-printing.
+
+Additionally, pretty-printed output does not have the leading indentation
+that the profiling output does. This makes it easy to pretty-print your
+code once development is completed, and then use the result as the final
+version of your program.
+
+@node Advanced Features Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The @option{--non-decimal-data} option causes @command{gawk} to treat
+octal- and hexadecimal-looking input data as octal and hexadecimal.
+This option should be used with caution or not at all; use of @code{strtonum()}
+is preferable.
+Note that this option may disappear in a future version of @command{gawk}.
+
+@item
+You can take over complete control of sorting in @samp{for (@var{indx} in @var{array})}
+array traversal by setting @code{PROCINFO["sorted_in"]} to the name of a user-defined
+function that does the comparison of array elements based on index and value.
+
+@item
+Similarly, you can supply the name of a user-defined comparison function as the
+third argument to either @code{asort()} or @command{asorti()} to control how
+those functions sort arrays. Or you may provide one of the predefined control
+strings that work for @code{PROCINFO["sorted_in"]}.
+
+@item
+You can use the @samp{|&} operator to create a two-way pipe to a coprocess.
+You read from the coprocess with @code{getline} and write to it with @code{print}
+or @code{printf}. Use @code{close()} to close off the coprocess completely, or
+optionally, close off one side of the two-way communications.
+
+@item
+By using special @value{FN}s with the @samp{|&} operator, you can open a
+TCP/IP (or UDP/IP) connection to remote hosts in the Internet. @command{gawk}
+supports both IPv4 and IPv6.
+
+@item
+You can generate statement count profiles of your program. This can help you
+determine which parts of your program may be taking the most time and let
+you tune them more easily. Sending the @code{USR1} signal while profiling causes
+@command{gawk} to dump the profile and keep going, including a function call stack.
+
+@item
+You can also just ``pretty print'' the program. This currently also runs
+the program, but that will change in the next major release.
+
+@end itemize
+
+
+@node Internationalization
+@chapter Internationalization with @command{gawk}
+
+Once upon a time, computer makers
+wrote software that worked only in English.
+Eventually, hardware and software vendors noticed that if their
+systems worked in the native languages of non-English-speaking
+countries, they were able to sell more systems.
+As a result, internationalization and localization
+of programs and software systems became a common practice.
+
+@cindex internationalization, localization
+@cindex @command{gawk}, internationalization and, See internationalization
+@cindex internationalization, localization, @command{gawk} and
+For many years, the ability to provide internationalization
+was largely restricted to programs written in C and C++.
+This @value{CHAPTER} describes the underlying library @command{gawk}
+uses for internationalization, as well as how
+@command{gawk} makes internationalization
+features available at the @command{awk} program level.
+Having internationalization available at the @command{awk} level
+gives software developers additional flexibility---they are no
+longer forced to write in C or C++ when internationalization is
+a requirement.
+
+@menu
+* I18N and L10N:: Internationalization and Localization.
+* Explaining gettext:: How GNU @command{gettext} works.
+* Programmer i18n:: Features for the programmer.
+* Translator i18n:: Features for the translator.
+* I18N Example:: A simple i18n example.
+* Gawk I18N:: @command{gawk} is also internationalized.
+* I18N Summary:: Summary of I18N stuff.
+@end menu
+
+@node I18N and L10N
+@section Internationalization and Localization
+
+@cindex internationalization
+@cindex localization, See internationalization@comma{} localization
+@cindex localization
+@dfn{Internationalization} means writing (or modifying) a program once,
+in such a way that it can use multiple languages without requiring
+further source-code changes.
+@dfn{Localization} means providing the data necessary for an
+internationalized program to work in a particular language.
+Most typically, these terms refer to features such as the language
+used for printing error messages, the language used to read
+responses, and information related to how numerical and
+monetary values are printed and read.
+
+@node Explaining gettext
+@section GNU @command{gettext}
+
+@cindex internationalizing a program
+@cindex @command{gettext} library
+@command{gawk} uses GNU @command{gettext} to provide its internationalization
+features.
+The facilities in GNU @command{gettext} focus on messages; strings printed
+by a program, either directly or via formatting with @code{printf} or
+@code{sprintf()}.@footnote{For some operating systems, the @command{gawk}
+port doesn't support GNU @command{gettext}.
+Therefore, these features are not available
+if you are using one of those operating systems. Sorry.}
+
+@cindex portability, @command{gettext} library and
+When using GNU @command{gettext}, each application has its own
+@dfn{text domain}. This is a unique name, such as @samp{kpilot} or @samp{gawk},
+that identifies the application.
+A complete application may have multiple components---programs written
+in C or C++, as well as scripts written in @command{sh} or @command{awk}.
+All of the components use the same text domain.
+
+To make the discussion concrete, assume we're writing an application
+named @command{guide}. Internationalization consists of the
+following steps, in this order:
+
+@enumerate
+@item
+The programmer reviews the source for all of @command{guide}'s components
+and marks each string that is a candidate for translation.
+For example, @code{"`-F': option required"} is a good candidate for translation.
+A table with strings of option names is not (e.g., @command{gawk}'s
+@option{--profile} option should remain the same, no matter what the local
+language).
+
+@cindex @code{textdomain()} function (C library)
+@item
+The programmer indicates the application's text domain
+(@command{"guide"}) to the @command{gettext} library,
+by calling the @code{textdomain()} function.
+
+@cindex @code{.pot} files
+@cindex files, @code{.pot}
+@cindex portable object template files
+@cindex files, portable object template
+@item
+Messages from the application are extracted from the source code and
+collected into a portable object template file (@file{guide.pot}),
+which lists the strings and their translations.
+The translations are initially empty.
+The original (usually English) messages serve as the key for
+lookup of the translations.
+
+@cindex @code{.po} files
+@cindex files, @code{.po}
+@cindex portable object files
+@cindex files, portable object
+@item
+For each language with a translator, @file{guide.pot}
+is copied to a portable object file (@code{.po})
+and translations are created and shipped with the application.
+For example, there might be a @file{fr.po} for a French translation.
+
+@cindex @code{.gmo} files
+@cindex files, @code{.gmo}
+@cindex message object files
+@cindex files, message object
+@item
+Each language's @file{.po} file is converted into a binary
+message object (@file{.gmo}) file.
+A message object file contains the original messages and their
+translations in a binary format that allows fast lookup of translations
+at runtime.
+
+@item
+When @command{guide} is built and installed, the binary translation files
+are installed in a standard place.
+
+@cindex @code{bindtextdomain()} function (C library)
+@item
+For testing and development, it is possible to tell @command{gettext}
+to use @file{.gmo} files in a different directory than the standard
+one by using the @code{bindtextdomain()} function.
+
+@cindex @code{.gmo} files, specifying directory of
+@cindex files, @code{.gmo}, specifying directory of
+@cindex message object files, specifying directory of
+@cindex files, message object, specifying directory of
+@item
+At runtime, @command{guide} looks up each string via a call
+to @code{gettext()}. The returned string is the translated string
+if available, or the original string if not.
+
+@item
+If necessary, it is possible to access messages from a different
+text domain than the one belonging to the application, without
+having to switch the application's default text domain back
+and forth.
+@end enumerate
+
+@cindex @code{gettext()} function (C library)
+In C (or C++), the string marking and dynamic translation lookup
+are accomplished by wrapping each string in a call to @code{gettext()}:
+
+@example
+printf("%s", gettext("Don't Panic!\n"));
+@end example
+
+The tools that extract messages from source code pull out all
+strings enclosed in calls to @code{gettext()}.
+
+@cindex @code{_} (underscore), C macro
+@cindex underscore (@code{_}), C macro
+The GNU @command{gettext} developers, recognizing that typing
+@samp{gettext(@dots{})} over and over again is both painful and ugly to look
+at, use the macro @samp{_} (an underscore) to make things easier:
+
+@example
+/* In the standard header file: */
+#define _(str) gettext(str)
+
+/* In the program text: */
+printf("%s", _("Don't Panic!\n"));
+@end example
+
+@cindex internationalization, localization, locale categories
+@cindex @command{gettext} library, locale categories
+@cindex locale categories
+@noindent
+This reduces the typing overhead to just three extra characters per string
+and is considerably easier to read as well.
+
+There are locale @dfn{categories}
+for different types of locale-related information.
+The defined locale categories that @command{gettext} knows about are:
+
+@table @code
+@cindex @code{LC_MESSAGES} locale category
+@item LC_MESSAGES
+Text messages. This is the default category for @command{gettext}
+operations, but it is possible to supply a different one explicitly,
+if necessary. (It is almost never necessary to supply a different category.)
+
+@cindex sorting characters in different languages
+@cindex @code{LC_COLLATE} locale category
+@item LC_COLLATE
+Text-collation information (i.e., how different characters
+and/or groups of characters sort in a given language).
+
+@cindex @code{LC_CTYPE} locale category
+@item LC_CTYPE
+Character-type information (alphabetic, digit, upper- or lowercase, and
+so on) as well as character encoding.
+@ignore
+In June 2001 Bruno Haible wrote:
+- Description of LC_CTYPE: It determines both
+ 1. character encoding,
+ 2. character type information.
+ (For example, in both KOI8-R and ISO-8859-5 the character type information
+ is the same - cyrillic letters could as 'alpha' - but the encoding is
+ different.)
+@end ignore
+This information is accessed via the
+POSIX character classes in regular expressions,
+such as @code{/[[:alnum:]]/}
+(@pxref{Bracket Expressions}).
+
+@cindex monetary information, localization
+@cindex currency symbols, localization
+@cindex @code{LC_MONETARY} locale category
+@item LC_MONETARY
+Monetary information, such as the currency symbol, and whether the
+symbol goes before or after a number.
+
+@cindex @code{LC_NUMERIC} locale category
+@item LC_NUMERIC
+Numeric information, such as which characters to use for the decimal
+point and the thousands separator.@footnote{Americans
+use a comma every three decimal places and a period for the decimal
+point, while many Europeans do exactly the opposite:
+1,234.56 versus 1.234,56.}
+
+@cindex time, localization and
+@cindex dates, information related to@comma{} localization
+@cindex @code{LC_TIME} locale category
+@item LC_TIME
+Time- and date-related information, such as 12- or 24-hour clock, month printed
+before or after the day in a date, local month abbreviations, and so on.
+
+@cindex @code{LC_ALL} locale category
+@item LC_ALL
+All of the above. (Not too useful in the context of @command{gettext}.)
+@end table
+
+@node Programmer i18n
+@section Internationalizing @command{awk} Programs
+@cindex @command{awk} programs, internationalizing
+
+@command{gawk} provides the following variables and functions for
+internationalization:
+
+@table @code
+@cindex @code{TEXTDOMAIN} variable
+@item TEXTDOMAIN
+This variable indicates the application's text domain.
+For compatibility with GNU @command{gettext}, the default
+value is @code{"messages"}.
+
+@cindex internationalization, localization, marked strings
+@cindex strings, for localization
+@item _"your message here"
+String constants marked with a leading underscore
+are candidates for translation at runtime.
+String constants without a leading underscore are not translated.
+
+@cindexgawkfunc{dcgettext}
+@item @code{dcgettext(@var{string}} [@code{,} @var{domain} [@code{,} @var{category}]]@code{)}
+Return the translation of @var{string} in
+text domain @var{domain} for locale category @var{category}.
+The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
+The default value for @var{category} is @code{"LC_MESSAGES"}.
+
+If you supply a value for @var{category}, it must be a string equal to
+one of the known locale categories described in
+@ifnotinfo
+the previous @value{SECTION}.
+@end ifnotinfo
+@ifinfo
+@ref{Explaining gettext}.
+@end ifinfo
+You must also supply a text domain. Use @code{TEXTDOMAIN} if
+you want to use the current domain.
+
+@quotation CAUTION
+The order of arguments to the @command{awk} version
+of the @code{dcgettext()} function is purposely different from the order for
+the C version. The @command{awk} version's order was
+chosen to be simple and to allow for reasonable @command{awk}-style
+default arguments.
+@end quotation
+
+@cindexgawkfunc{dcngettext}
+@item @code{dcngettext(@var{string1}, @var{string2}, @var{number}} [@code{,} @var{domain} [@code{,} @var{category}]]@code{)}
+Return the plural form used for @var{number} of the
+translation of @var{string1} and @var{string2} in text domain
+@var{domain} for locale category @var{category}. @var{string1} is the
+English singular variant of a message, and @var{string2} is the English plural
+variant of the same message.
+The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
+The default value for @var{category} is @code{"LC_MESSAGES"}.
+
+The same remarks about argument order as for the @code{dcgettext()} function apply.
+
+@cindex @code{.gmo} files, specifying directory of
+@cindex files, @code{.gmo}, specifying directory of
+@cindex message object files, specifying directory of
+@cindex files, message object, specifying directory of
+@cindexgawkfunc{bindtextdomain}
+@item @code{bindtextdomain(@var{directory}} [@code{,} @var{domain} ]@code{)}
+Change the directory in which
+@command{gettext} looks for @file{.gmo} files, in case they
+will not or cannot be placed in the standard locations
+(e.g., during testing).
+Return the directory in which @var{domain} is ``bound.''
+
+The default @var{domain} is the value of @code{TEXTDOMAIN}.
+If @var{directory} is the null string (@code{""}), then
+@code{bindtextdomain()} returns the current binding for the
+given @var{domain}.
+@end table
+
+To use these facilities in your @command{awk} program, follow the steps
+outlined in
+@ifnotinfo
+the previous @value{SECTION},
+@end ifnotinfo
+@ifinfo
+@ref{Explaining gettext},
+@end ifinfo
+like so:
+
+@enumerate
+@cindex @code{BEGIN} pattern, @code{TEXTDOMAIN} variable and
+@cindex @code{TEXTDOMAIN} variable, @code{BEGIN} pattern and
+@item
+Set the variable @code{TEXTDOMAIN} to the text domain of
+your program. This is best done in a @code{BEGIN} rule
+(@pxref{BEGIN/END}),
+or it can also be done via the @option{-v} command-line
+option (@pxref{Options}):
+
+@example
+BEGIN @{
+ TEXTDOMAIN = "guide"
+ @dots{}
+@}
+@end example
+
+@cindex @code{_} (underscore), translatable string
+@cindex underscore (@code{_}), translatable string
+@item
+Mark all translatable strings with a leading underscore (@samp{_})
+character. It @emph{must} be adjacent to the opening
+quote of the string. For example:
+
+@example
+print _"hello, world"
+x = _"you goofed"
+printf(_"Number of users is %d\n", nusers)
+@end example
+
+@item
+If you are creating strings dynamically, you can
+still translate them, using the @code{dcgettext()}
+built-in function:@footnote{Thanks to Bruno Haible for this
+example.}
+
+@example
+if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
+@end example
+
+Here, the call to @code{dcgettext()} supplies a different
+text domain (@code{"adminprog"}) in which to find the
+message, but it uses the default @code{"LC_MESSAGES"} category.
+
+The previous example only works if @code{ncustomers} is greater than one.
+This example would be better done with @code{dcngettext()}:
+
+@example
+if (groggy)
+ message = dcngettext("%d customer disturbing me\n",
+ "%d customers disturbing me\n", "adminprog")
+else
+ message = dcngettext("enjoying %d customer\n",
+ "enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
+@end example
+
+
+@cindex @code{LC_MESSAGES} locale category, @code{bindtextdomain()} function (@command{gawk})
+@item
+During development, you might want to put the @file{.gmo}
+file in a private directory for testing. This is done
+with the @code{bindtextdomain()} built-in function:
+
+@example
+BEGIN @{
+ TEXTDOMAIN = "guide" # our text domain
+ if (Testing) @{
+ # where to find our files
+ bindtextdomain("testdir")
+ # joe is in charge of adminprog
+ bindtextdomain("../joe/testdir", "adminprog")
+ @}
+ @dots{}
+@}
+@end example
+
+@end enumerate
+
+@DBXREF{I18N Example}
+for an example program showing the steps to create
+and use translations from @command{awk}.
+
+@node Translator i18n
+@section Translating @command{awk} Programs
+
+@cindex @code{.po} files
+@cindex files, @code{.po}
+@cindex portable object files
+@cindex files, portable object
+Once a program's translatable strings have been marked, they must
+be extracted to create the initial @file{.pot} file.
+As part of translation, it is often helpful to rearrange the order
+in which arguments to @code{printf} are output.
+
+@command{gawk}'s @option{--gen-pot} command-line option extracts
+the messages and is discussed next.
+After that, @code{printf}'s ability to
+rearrange the order for @code{printf} arguments at runtime
+is covered.
+
+@menu
+* String Extraction:: Extracting marked strings.
+* Printf Ordering:: Rearranging @code{printf} arguments.
+* I18N Portability:: @command{awk}-level portability issues.
+@end menu
+
+@node String Extraction
+@subsection Extracting Marked Strings
+@cindex strings, extracting
+@cindex marked strings@comma{} extracting
+@cindex @option{--gen-pot} option
+@cindex command-line options, string extraction
+@cindex string extraction (internationalization)
+@cindex marked string extraction (internationalization)
+@cindex extraction, of marked strings (internationalization)
+
+@cindex @option{--gen-pot} option
+Once your @command{awk} program is working, and all the strings have
+been marked and you've set (and perhaps bound) the text domain,
+it is time to produce translations.
+First, use the @option{--gen-pot} command-line option to create
+the initial @file{.pot} file:
+
+@example
+gawk --gen-pot -f guide.awk > guide.pot
+@end example
+
+@cindex @code{xgettext} utility
+When run with @option{--gen-pot}, @command{gawk} does not execute your
+program. Instead, it parses it as usual and prints all marked strings
+to standard output in the format of a GNU @command{gettext} Portable Object
+file. Also included in the output are any constant strings that
+appear as the first argument to @code{dcgettext()} or as the first and
+second argument to @code{dcngettext()}.@footnote{The
+@command{xgettext} utility that comes with GNU
+@command{gettext} can handle @file{.awk} files.}
+You should distribute the generated @file{.pot} file with
+your @command{awk} program; translators will eventually use it
+to provide you translations that you can also then distribute.
+@DBXREF{I18N Example}
+for the full list of steps to go through to create and test
+translations for @command{guide}.
+
+@node Printf Ordering
+@subsection Rearranging @code{printf} Arguments
+
+@cindex @code{printf} statement, positional specifiers
+@cindex positional specifiers, @code{printf} statement
+Format strings for @code{printf} and @code{sprintf()}
+(@pxref{Printf})
+present a special problem for translation.
+Consider the following:@footnote{This example is borrowed
+from the GNU @command{gettext} manual.}
+
+@example
+printf(_"String `%s' has %d characters\n",
+ string, length(string)))
+@end example
+
+A possible German translation for this might be:
+
+@example
+"%d Zeichen lang ist die Zeichenkette `%s'\n"
+@end example
+
+The problem should be obvious: the order of the format
+specifications is different from the original!
+Even though @code{gettext()} can return the translated string
+at runtime,
+it cannot change the argument order in the call to @code{printf}.
+
+To solve this problem, @code{printf} format specifiers may have
+an additional optional element, which we call a @dfn{positional specifier}.
+For example:
+
+@example
+"%2$d Zeichen lang ist die Zeichenkette `%1$s'\n"
+@end example
+
+Here, the positional specifier consists of an integer count, which indicates which
+argument to use, and a @samp{$}. Counts are one-based, and the
+format string itself is @emph{not} included. Thus, in the following
+example, @samp{string} is the first argument and @samp{length(string)} is the second:
+
+@example
+$ @kbd{gawk 'BEGIN @{}
+> @kbd{string = "Don\47t Panic"}
+> @kbd{printf "%2$d characters live in \"%1$s\"\n",}
+> @kbd{string, length(string)}
+> @kbd{@}'}
+@print{} 11 characters live in "Don't Panic"
+@end example
+
+If present, positional specifiers come first in the format specification,
+before the flags, the field width, and/or the precision.
+
+Positional specifiers can be used with the dynamic field width and
+precision capability:
+
+@example
+$ @kbd{gawk 'BEGIN @{}
+> @kbd{printf("%*.*s\n", 10, 20, "hello")}
+> @kbd{printf("%3$*2$.*1$s\n", 20, 10, "hello")}
+> @kbd{@}'}
+@print{} hello
+@print{} hello
+@end example
+
+@quotation NOTE
+When using @samp{*} with a positional specifier, the @samp{*}
+comes first, then the integer position, and then the @samp{$}.
+This is somewhat counterintuitive.
+@end quotation
+
+@cindex @code{printf} statement, positional specifiers, mixing with regular formats
+@cindex positional specifiers, @code{printf} statement, mixing with regular formats
+@cindex format specifiers, mixing regular with positional specifiers
+@command{gawk} does not allow you to mix regular format specifiers
+and those with positional specifiers in the same string:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf "%d %3$s\n", 1, 2, "hi" @}'}
+@error{} gawk: cmd. line:1: fatal: must use `count$' on all formats or none
+@end example
+
+@quotation NOTE
+There are some pathological cases that @command{gawk} may fail to
+diagnose. In such cases, the output may not be what you expect.
+It's still a bad idea to try mixing them, even if @command{gawk}
+doesn't detect it.
+@end quotation
+
+Although positional specifiers can be used directly in @command{awk} programs,
+their primary purpose is to help in producing correct translations of
+format strings into languages different from the one in which the program
+is first written.
+
+@node I18N Portability
+@subsection @command{awk} Portability Issues
+
+@cindex portability, internationalization and
+@cindex internationalization, localization, portability and
+@command{gawk}'s internationalization features were purposely chosen to
+have as little impact as possible on the portability of @command{awk}
+programs that use them to other versions of @command{awk}.
+Consider this program:
+
+@example
+BEGIN @{
+ TEXTDOMAIN = "guide"
+ if (Test_Guide) # set with -v
+ bindtextdomain("/test/guide/messages")
+ print _"don't panic!"
+@}
+@end example
+
+@noindent
+As written, it won't work on other versions of @command{awk}.
+However, it is actually almost portable, requiring very little
+change:
+
+@itemize @value{BULLET}
+@cindex @code{TEXTDOMAIN} variable, portability and
+@item
+Assignments to @code{TEXTDOMAIN} won't have any effect,
+because @code{TEXTDOMAIN} is not special in other @command{awk} implementations.
+
+@item
+Non-GNU versions of @command{awk} treat marked strings
+as the concatenation of a variable named @code{_} with the string
+following it.@footnote{This is good fodder for an ``Obfuscated
+@command{awk}'' contest.} Typically, the variable @code{_} has
+the null string (@code{""}) as its value, leaving the original string constant as
+the result.
+
+@item
+By defining ``dummy'' functions to replace @code{dcgettext()}, @code{dcngettext()}
+and @code{bindtextdomain()}, the @command{awk} program can be made to run, but
+all the messages are output in the original language.
+For example:
+
+@cindex @code{bindtextdomain()} function (@command{gawk}), portability and
+@cindex @code{dcgettext()} function (@command{gawk}), portability and
+@cindex @code{dcngettext()} function (@command{gawk}), portability and
+@example
+@c file eg/lib/libintl.awk
+function bindtextdomain(dir, domain)
+@{
+ return dir
+@}
+
+function dcgettext(string, domain, category)
+@{
+ return string
+@}
+
+function dcngettext(string1, string2, number, domain, category)
+@{
+ return (number == 1 ? string1 : string2)
+@}
+@c endfile
+@end example
+
+@item
+The use of positional specifications in @code{printf} or
+@code{sprintf()} is @emph{not} portable.
+To support @code{gettext()} at the C level, many systems' C versions of
+@code{sprintf()} do support positional specifiers. But it works only if
+enough arguments are supplied in the function call. Many versions of
+@command{awk} pass @code{printf} formats and arguments unchanged to the
+underlying C library version of @code{sprintf()}, but only one format and
+argument at a time. What happens if a positional specification is
+used is anybody's guess.
+However, because the positional specifications are primarily for use in
+@emph{translated} format strings, and because non-GNU @command{awk}s never
+retrieve the translated string, this should not be a problem in practice.
+@end itemize
+
+@node I18N Example
+@section A Simple Internationalization Example
+
+Now let's look at a step-by-step example of how to internationalize and
+localize a simple @command{awk} program, using @file{guide.awk} as our
+original source:
+
+@example
+@c file eg/prog/guide.awk
+BEGIN @{
+ TEXTDOMAIN = "guide"
+ bindtextdomain(".") # for testing
+ print _"Don't Panic"
+ print _"The Answer Is", 42
+ print "Pardon me, Zaphod who?"
+@}
+@c endfile
+@end example
+
+@noindent
+Run @samp{gawk --gen-pot} to create the @file{.pot} file:
+
+@example
+$ @kbd{gawk --gen-pot -f guide.awk > guide.pot}
+@end example
+
+@noindent
+This produces:
+
+@example
+@c file eg/data/guide.po
+#: guide.awk:4
+msgid "Don't Panic"
+msgstr ""
+
+#: guide.awk:5
+msgid "The Answer Is"
+msgstr ""
+
+@c endfile
+@end example
+
+This original portable object template file is saved and reused for each language
+into which the application is translated. The @code{msgid}
+is the original string and the @code{msgstr} is the translation.
+
+@quotation NOTE
+Strings not marked with a leading underscore do not
+appear in the @file{guide.pot} file.
+@end quotation
+
+Next, the messages must be translated.
+Here is a translation to a hypothetical dialect of English,
+called ``Mellow'':@footnote{Perhaps it would be better if it were
+called ``Hippy.'' Ah, well.}
+
+@example
+@group
+$ @kbd{cp guide.pot guide-mellow.po}
+@var{Add translations to} guide-mellow.po @dots{}
+@end group
+@end example
+
+@noindent
+Following are the translations:
+
+@example
+@c file eg/data/guide-mellow.po
+#: guide.awk:4
+msgid "Don't Panic"
+msgstr "Hey man, relax!"
+
+#: guide.awk:5
+msgid "The Answer Is"
+msgstr "Like, the scoop is"
+
+@c endfile
+@end example
+
+@cindex Linux
+@cindex GNU/Linux
+The next step is to make the directory to hold the binary message object
+file and then to create the @file{guide.mo} file.
+We pretend that our file is to be used in the @code{en_US.UTF-8} locale,
+because we have to use a locale name known to the C @command{gettext} routines.
+The directory layout shown here is standard for GNU @command{gettext} on
+GNU/Linux systems. Other versions of @command{gettext} may use a different
+layout:
+
+@example
+$ @kbd{mkdir en_US.UTF-8 en_US.UTF-8/LC_MESSAGES}
+@end example
+
+@cindex @code{.po} files, converting to @code{.mo}
+@cindex files, @code{.po}, converting to @code{.mo}
+@cindex @code{.mo} files, converting from @code{.po}
+@cindex files, @code{.mo}, converting from @code{.po}
+@cindex portable object files, converting to message object files
+@cindex files, portable object, converting to message object files
+@cindex message object files, converting from portable object files
+@cindex files, message object, converting from portable object files
+@cindex @command{msgfmt} utility
+The @command{msgfmt} utility does the conversion from human-readable
+@file{.po} file to machine-readable @file{.mo} file.
+By default, @command{msgfmt} creates a file named @file{messages}.
+This file must be renamed and placed in the proper directory (using
+the @option{-o} option) so that @command{gawk} can find it:
+
+@example
+$ @kbd{msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo}
+@end example
+
+Finally, we run the program to test it:
+
+@example
+$ @kbd{gawk -f guide.awk}
+@print{} Hey man, relax!
+@print{} Like, the scoop is 42
+@print{} Pardon me, Zaphod who?
+@end example
+
+If the three replacement functions for @code{dcgettext()}, @code{dcngettext()},
+and @code{bindtextdomain()}
+(@pxref{I18N Portability})
+are in a file named @file{libintl.awk},
+then we can run @file{guide.awk} unchanged as follows:
+
+@example
+$ @kbd{gawk --posix -f guide.awk -f libintl.awk}
+@print{} Don't Panic
+@print{} The Answer Is 42
+@print{} Pardon me, Zaphod who?
+@end example
+
+@node Gawk I18N
+@section @command{gawk} Can Speak Your Language
+
+@command{gawk} itself has been internationalized
+using the GNU @command{gettext} package.
+(GNU @command{gettext} is described in
+complete detail in
+@ifinfo
+@inforef{Top, , GNU @command{gettext} utilities, gettext, GNU gettext tools}.)
+@end ifinfo
+@ifnotinfo
+@uref{http://www.gnu.org/software/gettext/manual/,
+@cite{GNU gettext tools}}.)
+@end ifnotinfo
+As of this writing, the latest version of GNU @command{gettext} is
+@uref{ftp://ftp.gnu.org/gnu/gettext/gettext-0.19.4.tar.gz,
+@value{PVERSION} 0.19.4}.
+
+If a translation of @command{gawk}'s messages exists,
+then @command{gawk} produces usage messages, warnings,
+and fatal errors in the local language.
+
+@node I18N Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Internationalization means writing a program such that it can use multiple
+languages without requiring source-code changes. Localization means
+providing the data necessary for an internationalized program to work
+in a particular language.
+
+@item
+@command{gawk} uses GNU @command{gettext} to let you internationalize
+and localize @command{awk} programs. A program's text domain identifies
+the program for grouping all messages and other data together.
+
+@item
+You mark a program's strings for translation by preceding them with
+an underscore. Once that is done, the strings are extracted into a
+@file{.pot} file. This file is copied for each language into a @file{.po}
+file, and the @file{.po} files are compiled into @file{.gmo} files for
+use at runtime.
+
+@item
+You can use position specifications with @code{sprintf()} and
+@code{printf} to rearrange the placement of argument values in formatted
+strings and output. This is useful for the translations of format
+control strings.
+
+@item
+The internationalization features have been designed so that they
+can be easily worked around in a standard @command{awk}.
+
+@item
+@command{gawk} itself has been internationalized and ships with
+a number of translations for its messages.
+
+@end itemize
+
+
+@node Debugger
+@chapter Debugging @command{awk} Programs
+@cindex debugging @command{awk} programs
+
+@c The original text for this chapter was contributed by Efraim Yawitz.
+@c FIXME: Add more indexing.
+
+It would be nice if computer programs worked perfectly the first time they
+were run, but in real life, this rarely happens for programs of
+any complexity. Thus, most programming languages have facilities available
+for ``debugging'' programs, and now @command{awk} is no exception.
+
+The @command{gawk} debugger is purposely modeled after
+@uref{http://www.gnu.org/software/gdb/, the GNU Debugger (GDB)}
+command-line debugger. If you are familiar with GDB, learning
+how to use @command{gawk} for debugging your program is easy.
+
+@menu
+* Debugging:: Introduction to @command{gawk} debugger.
+* Sample Debugging Session:: Sample debugging session.
+* List of Debugger Commands:: Main debugger commands.
+* Readline Support:: Readline support.
+* Limitations:: Limitations and future plans.
+* Debugging Summary:: Debugging summary.
+@end menu
+
+@node Debugging
+@section Introduction to the @command{gawk} Debugger
+
+This @value{SECTION} introduces debugging in general and begins
+the discussion of debugging in @command{gawk}.
+
+@menu
+* Debugging Concepts:: Debugging in General.
+* Debugging Terms:: Additional Debugging Concepts.
+* Awk Debugging:: Awk Debugging.
+@end menu
+
+@node Debugging Concepts
+@subsection Debugging in General
+
+(If you have used debuggers in other languages, you may want to skip
+ahead to the next section on the specific features of the @command{gawk}
+debugger.)
+
+Of course, a debugging program cannot remove bugs for you, because it has
+no way of knowing what you or your users consider a ``bug'' versus a
+``feature.'' (Sometimes, we humans have a hard time with this ourselves.)
+In that case, what can you expect from such a tool? The answer to that
+depends on the language being debugged, but in general, you can expect at
+least the following:
+
+@itemize @value{BULLET}
+@item
+The ability to watch a program execute its instructions one by one,
+giving you, the programmer, the opportunity to think about what is happening
+on a time scale of seconds, minutes, or hours, rather than the nanosecond
+time scale at which the code usually runs.
+
+@item
+The opportunity to not only passively observe the operation of your
+program, but to control it and try different paths of execution, without
+having to change your source files.
+
+@item
+The chance to see the values of data in the program at any point in
+execution, and also to change that data on the fly, to see how that
+affects what happens afterward. (This often includes the ability
+to look at internal data structures besides the variables you actually
+defined in your code.)
+
+@item
+The ability to obtain additional information about your program's state
+or even its internal structure.
+@end itemize
+
+All of these tools provide a great amount of help in using your own
+skills and understanding of the goals of your program to find where it
+is going wrong (or, for that matter, to better comprehend a perfectly
+functional program that you or someone else wrote).
+
+@node Debugging Terms
+@subsection Debugging Concepts
+
+Before diving in to the details, we need to introduce several
+important concepts that apply to just about all debuggers.
+The following list defines terms used throughout the rest of
+this @value{CHAPTER}:
+
+@table @dfn
+@cindex stack frame
+@item Stack frame
+Programs generally call functions during the course of their execution.
+One function can call another, or a function can call itself (recursion).
+You can view the chain of called functions (main program calls A, which
+calls B, which calls C), as a stack of executing functions: the currently
+running function is the topmost one on the stack, and when it finishes
+(returns), the next one down then becomes the active function.
+Such a stack is termed a @dfn{call stack}.
+
+For each function on the call stack, the system maintains a data area
+that contains the function's parameters, local variables, and return value,
+as well as any other ``bookkeeping'' information needed to manage the
+call stack. This data area is termed a @dfn{stack frame}.
+
+@command{gawk} also follows this model, and gives you
+access to the call stack and to each stack frame. You can see the
+call stack, as well as from where each function on the stack was
+invoked. Commands that print the call stack print information about
+each stack frame (as detailed later on).
+
+@item Breakpoint
+@cindex breakpoint
+During debugging, you often wish to let the program run until it
+reaches a certain point, and then continue execution from there one
+statement (or instruction) at a time. The way to do this is to set
+a @dfn{breakpoint} within the program. A breakpoint is where the
+execution of the program should break off (stop), so that you can
+take over control of the program's execution. You can add and remove
+as many breakpoints as you like.
+
+@item Watchpoint
+@cindex watchpoint
+A watchpoint is similar to a breakpoint. The difference is that
+breakpoints are oriented around the code: stop when a certain point in the
+code is reached. A watchpoint, however, specifies that program execution
+should stop when a @emph{data value} is changed. This is useful, as
+sometimes it happens that a variable receives an erroneous value, and it's
+hard to track down where this happens just by looking at the code.
+By using a watchpoint, you can stop whenever a variable is assigned to,
+and usually find the errant code quite quickly.
+@end table
+
+@node Awk Debugging
+@subsection Awk Debugging
+
+Debugging an @command{awk} program has some specific aspects that are
+not shared with other programming languages.
+
+First of all, the fact that @command{awk} programs usually take input
+line by line from a file or files and operate on those lines using specific
+rules makes it especially useful to organize viewing the execution of
+the program in terms of these rules. As we will see, each @command{awk}
+rule is treated almost like a function call, with its own specific block
+of instructions.
+
+In addition, because @command{awk} is by design a very concise language,
+it is easy to lose sight of everything that is going on ``inside''
+each line of @command{awk} code. The debugger provides the opportunity
+to look at the individual primitive instructions carried out
+by the higher-level @command{awk} commands.
+
+@node Sample Debugging Session
+@section Sample Debugging Session
+@cindex sample debugging session
+
+In order to illustrate the use of @command{gawk} as a debugger, let's look at a sample
+debugging session. We will use the @command{awk} implementation of the
+POSIX @command{uniq} command described earlier (@pxref{Uniq Program})
+as our example.
+
+@menu
+* Debugger Invocation:: How to Start the Debugger.
+* Finding The Bug:: Finding the Bug.
+@end menu
+
+@node Debugger Invocation
+@subsection How to Start the Debugger
+@cindex starting the debugger
+@cindex debugger, how to start
+
+Starting the debugger is almost exactly like running @command{gawk} normally,
+except you have to pass an additional option @option{--debug}, or the
+corresponding short option @option{-D}. The file(s) containing the
+program and any supporting code are given on the command line as arguments
+to one or more @option{-f} options. (@command{gawk} is not designed
+to debug command-line programs, only programs contained in files.)
+In our case, we invoke the debugger like this:
+
+@example
+$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile}
+@end example
+
+@noindent
+where both @file{getopt.awk} and @file{uniq.awk} are in @env{$AWKPATH}.
+(Experienced users of GDB or similar debuggers should note that
+this syntax is slightly different from what they are used to.
+With the @command{gawk} debugger, you give the arguments for running the program
+in the command line to the debugger rather than as part of the @code{run}
+command at the debugger prompt.)
+The @option{-1} is an option to @file{uniq.awk}.
+
+Instead of immediately running the program on @file{inputfile}, as
+@command{gawk} would ordinarily do, the debugger merely loads all
+the program source files, compiles them internally, and then gives
+us a prompt:
+
+@example
+gawk>
+@end example
+
+@noindent
+from which we can issue commands to the debugger. At this point, no
+code has been executed.
+
+@node Finding The Bug
+@subsection Finding the Bug
+
+Let's say that we are having a problem using (a faulty version of)
+@file{uniq.awk} in the ``field-skipping'' mode, and it doesn't seem to be
+catching lines which should be identical when skipping the first field,
+such as:
+
+@example
+awk is a wonderful program!
+gawk is a wonderful program!
+@end example
+
+This could happen if we were thinking (C-like) of the fields in a record
+as being numbered in a zero-based fashion, so instead of the lines:
+
+@example
+clast = join(alast, fcount+1, n)
+cline = join(aline, fcount+1, m)
+@end example
+
+@noindent
+we wrote:
+
+@example
+clast = join(alast, fcount, n)
+cline = join(aline, fcount, m)
+@end example
+
+The first thing we usually want to do when trying to investigate a
+problem like this is to put a breakpoint in the program so that we can
+watch it at work and catch what it is doing wrong. A reasonable spot for
+a breakpoint in @file{uniq.awk} is at the beginning of the function
+@code{are_equal()}, which compares the current line with the previous one. To set
+the breakpoint, use the @code{b} (breakpoint) command:
+
+@example
+gawk> @kbd{b are_equal}
+@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
+@end example
+
+The debugger tells us the file and line number where the breakpoint is.
+Now type @samp{r} or @samp{run} and the program runs until it hits
+the breakpoint for the first time:
+
+@example
+gawk> @kbd{r}
+@print{} Starting program:
+@print{} Stopping in Rule ...
+@print{} Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
+ at `awklib/eg/prog/uniq.awk':63
+@print{} 63 if (fcount == 0 && charcount == 0)
+gawk>
+@end example
+
+Now we can look at what's going on inside our program. First of all,
+let's see how we got to where we are. At the prompt, we type @samp{bt}
+(short for ``backtrace''), and the debugger responds with a
+listing of the current stack frames:
+
+@example
+gawk> @kbd{bt}
+@print{} #0 are_equal(n, m, clast, cline, alast, aline)
+ at `awklib/eg/prog/uniq.awk':68
+@print{} #1 in main() at `awklib/eg/prog/uniq.awk':88
+@end example
+
+This tells us that @code{are_equal()} was called by the main program at
+line 88 of @file{uniq.awk}. (This is not a big surprise, because this
+is the only call to @code{are_equal()} in the program, but in more complex
+programs, knowing who called a function and with what parameters can be
+the key to finding the source of the problem.)
+
+Now that we're in @code{are_equal()}, we can start looking at the values
+of some variables. Let's say we type @samp{p n}
+(@code{p} is short for ``print''). We would expect to see the value of
+@code{n}, a parameter to @code{are_equal()}. Actually, the debugger
+gives us:
+
+@example
+gawk> @kbd{p n}
+@print{} n = untyped variable
+@end example
+
+@noindent
+In this case, @code{n} is an uninitialized local variable, because the
+function was called without arguments (@pxref{Function Calls}).
+
+A more useful variable to display might be the current record:
+
+@example
+gawk> @kbd{p $0}
+@print{} $0 = "gawk is a wonderful program!"
+@end example
+
+@noindent
+This might be a bit puzzling at first, as this is the second line of
+our test input. Let's look at @code{NR}:
+
+@example
+gawk> @kbd{p NR}
+@print{} NR = 2
+@end example
+
+@noindent
+So we can see that @code{are_equal()} was only called for the second record
+of the file. Of course, this is because our program contains a rule for
+@samp{NR == 1}:
+
+@example
+NR == 1 @{
+ last = $0
+ next
+@}
+@end example
+
+OK, let's just check that that rule worked correctly:
+
+@example
+gawk> @kbd{p last}
+@print{} last = "awk is a wonderful program!"
+@end example
+
+Everything we have done so far has verified that the program has worked as
+planned, up to and including the call to @code{are_equal()}, so the problem must
+be inside this function. To investigate further, we must begin
+``stepping through'' the lines of @code{are_equal()}. We start by typing
+@samp{n} (for ``next''):
+
+@example
+gawk> @kbd{n}
+@print{} 66 if (fcount > 0) @{
+@end example
+
+This tells us that @command{gawk} is now ready to execute line 66, which
+decides whether to give the lines the special ``field skipping'' treatment
+indicated by the @option{-1} command-line option. (Notice that we skipped
+from where we were before at line 63 to here, because the condition in line 63
+@samp{if (fcount == 0 && charcount == 0)} was false.)
+
+Continuing to step, we now get to the splitting of the current and
+last records:
+
+@example
+gawk> @kbd{n}
+@print{} 67 n = split(last, alast)
+gawk> @kbd{n}
+@print{} 68 m = split($0, aline)
+@end example
+
+At this point, we should be curious to see what our records were split
+into, so we try to look:
+
+@example
+gawk> @kbd{p n m alast aline}
+@print{} n = 5
+@print{} m = untyped variable
+@print{} alast = array, 5 elements
+@print{} aline = untyped variable
+@end example
+
+@noindent
+(The @code{p} command can take more than one argument, similar to
+@command{awk}'s @code{print} statement.)
+
+This is kind of disappointing, though. All we found out is that there
+are five elements in @code{alast}; @code{m} and @code{aline} don't have
+values because we are at line 68 but haven't executed it yet.
+This information is useful enough (we now know that
+none of the words were accidentally left out), but what if we want to see
+inside the array?
+
+The first choice would be to use subscripts:
+
+@example
+gawk> @kbd{p alast[0]}
+@print{} "0" not in array `alast'
+@end example
+
+@noindent
+Oops!
+
+@example
+gawk> @kbd{p alast[1]}
+@print{} alast["1"] = "awk"
+@end example
+
+This would be kind of slow for a 100-member array, though, so
+@command{gawk} provides a shortcut (reminiscent of another language
+not to be mentioned):
+
+@example
+gawk> @kbd{p @@alast}
+@print{} alast["1"] = "awk"
+@print{} alast["2"] = "is"
+@print{} alast["3"] = "a"
+@print{} alast["4"] = "wonderful"
+@print{} alast["5"] = "program!"
+@end example
+
+It looks like we got this far OK. Let's take another step
+or two:
+
+@example
+gawk> @kbd{n}
+@print{} 69 clast = join(alast, fcount, n)
+gawk> @kbd{n}
+@print{} 70 cline = join(aline, fcount, m)
+@end example
+
+Well, here we are at our error (sorry to spoil the suspense). What we
+had in mind was to join the fields starting from the second one to make
+the virtual record to compare, and if the first field was numbered zero,
+this would work. Let's look at what we've got:
+
+@example
+gawk> @kbd{p cline clast}
+@print{} cline = "gawk is a wonderful program!"
+@print{} clast = "awk is a wonderful program!"
+@end example
+
+Hey, those look pretty familiar! They're just our original, unaltered,
+input records. A little thinking (the human brain is still the best
+debugging tool), and we realize that we were off by one!
+
+We get out of the debugger:
+
+@example
+gawk> @kbd{q}
+@print{} The program is running. Exit anyway (y/n)? @kbd{y}
+@end example
+
+@noindent
+Then we get into an editor:
+
+@example
+clast = join(alast, fcount+1, n)
+cline = join(aline, fcount+1, m)
+@end example
+
+@noindent
+and problem solved!
+
+@node List of Debugger Commands
+@section Main Debugger Commands
+
+The @command{gawk} debugger command set can be divided into the
+following categories:
+
+@itemize @value{BULLET}
+
+@item
+Breakpoint control
+
+@item
+Execution control
+
+@item
+Viewing and changing data
+
+@item
+Working with the stack
+
+@item
+Getting information
+
+@item
+Miscellaneous
+@end itemize
+
+Each of these are discussed in the following subsections.
+In the following descriptions, commands which may be abbreviated
+show the abbreviation on a second description line.
+A debugger command name may also be truncated if that partial
+name is unambiguous. The debugger has the built-in capability to
+automatically repeat the previous command just by hitting @key{Enter}.
+This works for the commands @code{list}, @code{next}, @code{nexti},
+@code{step}, @code{stepi}, and @code{continue} executed without any
+argument.
+
+@menu
+* Breakpoint Control:: Control of Breakpoints.
+* Debugger Execution Control:: Control of Execution.
+* Viewing And Changing Data:: Viewing and Changing Data.
+* Execution Stack:: Dealing with the Stack.
+* Debugger Info:: Obtaining Information about the Program and
+ the Debugger State.
+* Miscellaneous Debugger Commands:: Miscellaneous Commands.
+@end menu
+
+@node Breakpoint Control
+@subsection Control of Breakpoints
+
+As we saw earlier, the first thing you probably want to do in a debugging
+session is to get your breakpoints set up, because your program
+will otherwise just run as if it was not under the debugger. The commands for
+controlling breakpoints are:
+
+@table @asis
+@cindex debugger commands, @code{b} (@code{break})
+@cindex debugger commands, @code{break}
+@cindex @code{break} debugger command
+@cindex @code{b} debugger command (alias for @code{break})
+@cindex set breakpoint
+@cindex breakpoint, setting
+@item @code{break} [[@var{filename}@code{:}]@var{n} | @var{function}] [@code{"@var{expression}"}]
+@itemx @code{b} [[@var{filename}@code{:}]@var{n} | @var{function}] [@code{"@var{expression}"}]
+Without any argument, set a breakpoint at the next instruction
+to be executed in the selected stack frame.
+Arguments can be one of the following:
+
+@c @asis for docbook
+@c nested table
+@table @asis
+@item @var{n}
+Set a breakpoint at line number @var{n} in the current source file.
+
+@item @var{filename}@code{:}@var{n}
+Set a breakpoint at line number @var{n} in source file @var{filename}.
+
+@item @var{function}
+Set a breakpoint at entry to (the first instruction of)
+function @var{function}.
+@end table
+
+Each breakpoint is assigned a number which can be used to delete it from
+the breakpoint list using the @code{delete} command.
+
+With a breakpoint, you may also supply a condition. This is an
+@command{awk} expression (enclosed in double quotes) that the debugger
+evaluates whenever the breakpoint is reached. If the condition is true,
+then the debugger stops execution and prompts for a command. Otherwise,
+it continues executing the program.
+
+@cindex debugger commands, @code{clear}
+@cindex @code{clear} debugger command
+@cindex delete breakpoint at location
+@cindex breakpoint at location, how to delete
+@item @code{clear} [[@var{filename}@code{:}]@var{n} | @var{function}]
+Without any argument, delete any breakpoint at the next instruction
+to be executed in the selected stack frame. If the program stops at
+a breakpoint, this deletes that breakpoint so that the program
+does not stop at that location again. Arguments can be one of the following:
+
+@c nested table
+@table @asis
+@item @var{n}
+Delete breakpoint(s) set at line number @var{n} in the current source file.
+
+@item @var{filename}@code{:}@var{n}
+Delete breakpoint(s) set at line number @var{n} in source file @var{filename}.
+
+@item @var{function}
+Delete breakpoint(s) set at entry to function @var{function}.
+@end table
+
+@cindex debugger commands, @code{condition}
+@cindex @code{condition} debugger command
+@cindex breakpoint condition
+@item @code{condition} @var{n} @code{"@var{expression}"}
+Add a condition to existing breakpoint or watchpoint @var{n}. The
+condition is an @command{awk} expression @emph{enclosed in double quotes}
+that the debugger evaluates
+whenever the breakpoint or watchpoint is reached. If the condition is true, then
+the debugger stops execution and prompts for a command. Otherwise,
+the debugger continues executing the program. If the condition expression is
+not specified, any existing condition is removed (i.e., the breakpoint or
+watchpoint is made unconditional).
+
+@cindex debugger commands, @code{d} (@code{delete})
+@cindex debugger commands, @code{delete}
+@cindex @code{delete} debugger command
+@cindex @code{d} debugger command (alias for @code{delete})
+@cindex delete breakpoint by number
+@cindex breakpoint, delete by number
+@item @code{delete} [@var{n1 n2} @dots{}] [@var{n}--@var{m}]
+@itemx @code{d} [@var{n1 n2} @dots{}] [@var{n}--@var{m}]
+Delete specified breakpoints or a range of breakpoints. Deletes
+all defined breakpoints if no argument is supplied.
+
+@cindex debugger commands, @code{disable}
+@cindex @code{disable} debugger command
+@cindex disable breakpoint
+@cindex breakpoint, how to disable or enable
+@item @code{disable} [@var{n1 n2} @dots{} | @var{n}--@var{m}]
+Disable specified breakpoints or a range of breakpoints. Without
+any argument, disables all breakpoints.
+
+@cindex debugger commands, @code{e} (@code{enable})
+@cindex debugger commands, @code{enable}
+@cindex @code{enable} debugger command
+@cindex @code{e} debugger command (alias for @code{enable})
+@cindex enable breakpoint
+@item @code{enable} [@code{del} | @code{once}] [@var{n1 n2} @dots{}] [@var{n}--@var{m}]
+@itemx @code{e} [@code{del} | @code{once}] [@var{n1 n2} @dots{}] [@var{n}--@var{m}]
+Enable specified breakpoints or a range of breakpoints. Without
+any argument, enables all breakpoints.
+Optionally, you can specify how to enable the breakpoint:
+
+@c nested table
+@table @code
+@item del
+Enable the breakpoint(s) temporarily, then delete it when
+the program stops at the breakpoint.
+
+@item once
+Enable the breakpoint(s) temporarily, then disable it when
+the program stops at the breakpoint.
+@end table
+
+@cindex debugger commands, @code{ignore}
+@cindex @code{ignore} debugger command
+@cindex ignore breakpoint
+@item @code{ignore} @var{n} @var{count}
+Ignore breakpoint number @var{n} the next @var{count} times it is
+hit.
+
+@cindex debugger commands, @code{t} (@code{tbreak})
+@cindex debugger commands, @code{tbreak}
+@cindex @code{tbreak} debugger command
+@cindex @code{t} debugger command (alias for @code{tbreak})
+@cindex temporary breakpoint
+@item @code{tbreak} [[@var{filename}@code{:}]@var{n} | @var{function}]
+@itemx @code{t} [[@var{filename}@code{:}]@var{n} | @var{function}]
+Set a temporary breakpoint (enabled for only one stop).
+The arguments are the same as for @code{break}.
+@end table
+
+@node Debugger Execution Control
+@subsection Control of Execution
+
+Now that your breakpoints are ready, you can start running the program
+and observing its behavior. There are more commands for controlling
+execution of the program than we saw in our earlier example:
+
+@table @asis
+@cindex debugger commands, @code{commands}
+@cindex @code{commands} debugger command
+@cindex debugger commands, @code{silent}
+@cindex @code{silent} debugger command
+@cindex debugger commands, @code{end}
+@cindex @code{end} debugger command
+@cindex breakpoint commands
+@cindex commands to execute at breakpoint
+@item @code{commands} [@var{n}]
+@itemx @code{silent}
+@itemx @dots{}
+@itemx @code{end}
+Set a list of commands to be executed upon stopping at
+a breakpoint or watchpoint. @var{n} is the breakpoint or watchpoint number.
+Without a number, the last one set is used. The actual commands follow,
+starting on the next line, and terminated by the @code{end} command.
+If the command @code{silent} is in the list, the usual messages about
+stopping at a breakpoint and the source line are not printed. Any command
+in the list that resumes execution (e.g., @code{continue}) terminates the list
+(an implicit @code{end}), and subsequent commands are ignored.
+For example:
+
+@example
+gawk> @kbd{commands}
+> @kbd{silent}
+> @kbd{printf "A silent breakpoint; i = %d\n", i}
+> @kbd{info locals}
+> @kbd{set i = 10}
+> @kbd{continue}
+> @kbd{end}
+gawk>
+@end example
+
+@cindex debugger commands, @code{c} (@code{continue})
+@cindex debugger commands, @code{continue}
+@cindex continue program, in debugger
+@item @code{continue} [@var{count}]
+@itemx @code{c} [@var{count}]
+Resume program execution. If continued from a breakpoint and @var{count} is
+specified, ignores the breakpoint at that location the next @var{count} times
+before stopping.
+
+@cindex debugger commands, @code{finish}
+@cindex @code{finish} debugger command
+@item @code{finish}
+Execute until the selected stack frame returns.
+Print the returned value.
+
+@cindex debugger commands, @code{n} (@code{next})
+@cindex debugger commands, @code{next}
+@cindex @code{next} debugger command
+@cindex @code{n} debugger command (alias for @code{next})
+@cindex single-step execution, in the debugger
+@item @code{next} [@var{count}]
+@itemx @code{n} [@var{count}]
+Continue execution to the next source line, stepping over function calls.
+The argument @var{count} controls how many times to repeat the action, as
+in @code{step}.
+
+@cindex debugger commands, @code{ni} (@code{nexti})
+@cindex debugger commands, @code{nexti}
+@cindex @code{nexti} debugger command
+@cindex @code{ni} debugger command (alias for @code{nexti})
+@item @code{nexti} [@var{count}]
+@itemx @code{ni} [@var{count}]
+Execute one (or @var{count}) instruction(s), stepping over function calls.
+
+@cindex debugger commands, @code{return}
+@cindex @code{return} debugger command
+@item @code{return} [@var{value}]
+Cancel execution of a function call. If @var{value} (either a string or a
+number) is specified, it is used as the function's return value. If used in a
+frame other than the innermost one (the currently executing function; i.e.,
+frame number 0), discard all inner frames in addition to the selected one,
+and the caller of that frame becomes the innermost frame.
+
+@cindex debugger commands, @code{r} (@code{run})
+@cindex debugger commands, @code{run}
+@cindex @code{run} debugger command
+@cindex @code{r} debugger command (alias for @code{run})
+@item @code{run}
+@itemx @code{r}
+Start/restart execution of the program. When restarting, the debugger
+retains the current breakpoints, watchpoints, command history,
+automatic display variables, and debugger options.
+
+@cindex debugger commands, @code{s} (@code{step})
+@cindex debugger commands, @code{step}
+@cindex @code{step} debugger command
+@cindex @code{s} debugger command (alias for @code{step})
+@item @code{step} [@var{count}]
+@itemx @code{s} [@var{count}]
+Continue execution until control reaches a different source line in the
+current stack frame. @code{step} steps inside any function called within
+the line. If the argument @var{count} is supplied, steps that many times before
+stopping, unless it encounters a breakpoint or watchpoint.
+
+@cindex debugger commands, @code{si} (@code{stepi})
+@cindex debugger commands, @code{stepi}
+@cindex @code{stepi} debugger command
+@cindex @code{si} debugger command (alias for @code{stepi})
+@item @code{stepi} [@var{count}]
+@itemx @code{si} [@var{count}]
+Execute one (or @var{count}) instruction(s), stepping inside function calls.
+(For illustration of what is meant by an ``instruction'' in @command{gawk},
+see the output shown under @code{dump} in @ref{Miscellaneous Debugger Commands}.)
+
+@cindex debugger commands, @code{u} (@code{until})
+@cindex debugger commands, @code{until}
+@cindex @code{until} debugger command
+@cindex @code{u} debugger command (alias for @code{until})
+@item @code{until} [[@var{filename}@code{:}]@var{n} | @var{function}]
+@itemx @code{u} [[@var{filename}@code{:}]@var{n} | @var{function}]
+Without any argument, continue execution until a line past the current
+line in the current stack frame is reached. With an argument,
+continue execution until the specified location is reached, or the current
+stack frame returns.
+@end table
+
+@node Viewing And Changing Data
+@subsection Viewing and Changing Data
+
+The commands for viewing and changing variables inside of @command{gawk} are:
+
+@table @asis
+@cindex debugger commands, @code{display}
+@cindex @code{display} debugger command
+@item @code{display} [@var{var} | @code{$}@var{n}]
+Add variable @var{var} (or field @code{$@var{n}}) to the display list.
+The value of the variable or field is displayed each time the program stops.
+Each variable added to the list is identified by a unique number:
+
+@example
+gawk> @kbd{display x}
+@print{} 10: x = 1
+@end example
+
+@noindent
+This displays the assigned item number, the variable name, and its current value.
+If the display variable refers to a function parameter, it is silently
+deleted from the list as soon as the execution reaches a context where
+no such variable of the given name exists.
+Without argument, @code{display} displays the current values of
+items on the list.
+
+@cindex debugger commands, @code{eval}
+@cindex @code{eval} debugger command
+@cindex evaluate expressions, in debugger
+@item @code{eval "@var{awk statements}"}
+Evaluate @var{awk statements} in the context of the running program.
+You can do anything that an @command{awk} program would do: assign
+values to variables, call functions, and so on.
+
+@item @code{eval} @var{param}, @dots{}
+@itemx @var{awk statements}
+@itemx @code{end}
+This form of @code{eval} is similar, but it allows you to define
+``local variables'' that exist in the context of the
+@var{awk statements}, instead of using variables or function
+parameters defined by the program.
+
+@cindex debugger commands, @code{p} (@code{print})
+@cindex debugger commands, @code{print}
+@cindex @code{print} debugger command
+@cindex @code{p} debugger command (alias for @code{print})
+@cindex print variables, in debugger
+@item @code{print} @var{var1}[@code{,} @var{var2} @dots{}]
+@itemx @code{p} @var{var1}[@code{,} @var{var2} @dots{}]
+Print the value of a @command{gawk} variable or field.
+Fields must be referenced by constants:
+
+@example
+gawk> @kbd{print $3}
+@end example
+
+@noindent
+This prints the third field in the input record (if the specified field does not
+exist, it prints @samp{Null field}). A variable can be an array element, with
+the subscripts being constant string values. To print the contents of an array,
+prefix the name of the array with the @samp{@@} symbol:
+
+@example
+gawk> @kbd{print @@a}
+@end example
+
+@noindent
+This prints the indices and the corresponding values for all elements in
+the array @code{a}.
+
+@cindex debugger commands, @code{printf}
+@cindex @code{printf} debugger command
+@item @code{printf} @var{format} [@code{,} @var{arg} @dots{}]
+Print formatted text. The @var{format} may include escape sequences,
+such as @samp{\n}
+(@pxref{Escape Sequences}).
+No newline is printed unless one is specified.
+
+@cindex debugger commands, @code{set}
+@cindex @code{set} debugger command
+@cindex assign values to variables, in debugger
+@item @code{set} @var{var}@code{=}@var{value}
+Assign a constant (number or string) value to an @command{awk} variable
+or field.
+String values must be enclosed between double quotes (@code{"}@dots{}@code{"}).
+
+You can also set special @command{awk} variables, such as @code{FS},
+@code{NF}, @code{NR}, and son on.
+
+@cindex debugger commands, @code{w} (@code{watch})
+@cindex debugger commands, @code{watch}
+@cindex @code{watch} debugger command
+@cindex @code{w} debugger command (alias for @code{watch})
+@cindex set watchpoint
+@item @code{watch} @var{var} | @code{$}@var{n} [@code{"@var{expression}"}]
+@itemx @code{w} @var{var} | @code{$}@var{n} [@code{"@var{expression}"}]
+Add variable @var{var} (or field @code{$@var{n}}) to the watch list.
+The debugger then stops whenever
+the value of the variable or field changes. Each watched item is assigned a
+number which can be used to delete it from the watch list using the
+@code{unwatch} command.
+
+With a watchpoint, you may also supply a condition. This is an
+@command{awk} expression (enclosed in double quotes) that the debugger
+evaluates whenever the watchpoint is reached. If the condition is true,
+then the debugger stops execution and prompts for a command. Otherwise,
+@command{gawk} continues executing the program.
+
+@cindex debugger commands, @code{undisplay}
+@cindex @code{undisplay} debugger command
+@cindex stop automatic display, in debugger
+@item @code{undisplay} [@var{n}]
+Remove item number @var{n} (or all items, if no argument) from the
+automatic display list.
+
+@cindex debugger commands, @code{unwatch}
+@cindex @code{unwatch} debugger command
+@cindex delete watchpoint
+@item @code{unwatch} [@var{n}]
+Remove item number @var{n} (or all items, if no argument) from the
+watch list.
+
+@end table
+
+@node Execution Stack
+@subsection Working with the Stack
+
+Whenever you run a program which contains any function calls,
+@command{gawk} maintains a stack of all of the function calls leading up
+to where the program is right now. You can see how you got to where you are,
+and also move around in the stack to see what the state of things was in the
+functions which called the one you are in. The commands for doing this are:
+
+@table @asis
+@cindex debugger commands, @code{bt} (@code{backtrace})
+@cindex debugger commands, @code{backtrace}
+@cindex debugger commands, @code{where} (@code{backtrace})
+@cindex @code{backtrace} debugger command
+@cindex @code{bt} debugger command (alias for @code{backtrace})
+@cindex @code{where} debugger command
+@cindex @code{where} debugger command (alias for @code{backtrace})
+@cindex call stack, display in debugger
+@cindex traceback, display in debugger
+@item @code{backtrace} [@var{count}]
+@itemx @code{bt} [@var{count}]
+@itemx @code{where} [@var{count}]
+Print a backtrace of all function calls (stack frames), or innermost @var{count}
+frames if @var{count} > 0. Print the outermost @var{count} frames if
+@var{count} < 0. The backtrace displays the name and arguments to each
+function, the source @value{FN}, and the line number.
+The alias @code{where} for @code{backtrace} is provided for longtime
+GDB users who may be used to that command.
+
+@cindex debugger commands, @code{down}
+@cindex @code{down} debugger command
+@item @code{down} [@var{count}]
+Move @var{count} (default 1) frames down the stack toward the innermost frame.
+Then select and print the frame.
+
+@cindex debugger commands, @code{f} (@code{frame})
+@cindex debugger commands, @code{frame}
+@cindex @code{frame} debugger command
+@cindex @code{f} debugger command (alias for @code{frame})
+@item @code{frame} [@var{n}]
+@itemx @code{f} [@var{n}]
+Select and print stack frame @var{n}. Frame 0 is the currently executing,
+or @dfn{innermost}, frame (function call), frame 1 is the frame that
+called the innermost one. The highest numbered frame is the one for the
+main program. The printed information consists of the frame number,
+function and argument names, source file, and the source line.
+
+@cindex debugger commands, @code{up}
+@cindex @code{up} debugger command
+@item @code{up} [@var{count}]
+Move @var{count} (default 1) frames up the stack toward the outermost frame.
+Then select and print the frame.
+@end table
+
+@node Debugger Info
+@subsection Obtaining Information About the Program and the Debugger State
+
+Besides looking at the values of variables, there is often a need to get
+other sorts of information about the state of your program and of the
+debugging environment itself. The @command{gawk} debugger has one command which
+provides this information, appropriately called @code{info}. @code{info}
+is used with one of a number of arguments that tell it exactly what
+you want to know:
+
+@table @asis
+@cindex debugger commands, @code{i} (@code{info})
+@cindex debugger commands, @code{info}
+@cindex @code{info} debugger command
+@cindex @code{i} debugger command (alias for @code{info})
+@item @code{info} @var{what}
+@itemx @code{i} @var{what}
+The value for @var{what} should be one of the following:
+
+@c nested table
+@table @code
+@item args
+@cindex show function arguments, in debugger
+List arguments of the selected frame.
+
+@item break
+@cindex show breakpoints
+List all currently set breakpoints.
+
+@item display
+@cindex automatic displays, in debugger
+List all items in the automatic display list.
+
+@item frame
+@cindex describe call stack frame, in debugger
+Give a description of the selected stack frame.
+
+@item functions
+@cindex list function definitions, in debugger
+List all function definitions including source @value{FN}s and
+line numbers.
+
+@item locals
+@cindex show local variables, in debugger
+List local variables of the selected frame.
+
+@item source
+@cindex show name of current source file, in debugger
+Print the name of the current source file. Each time the program stops, the
+current source file is the file containing the current instruction.
+When the debugger first starts, the current source file is the first file
+included via the @option{-f} option. The
+@samp{list @var{filename}:@var{lineno}} command can
+be used at any time to change the current source.
+
+@item sources
+@cindex show all source files, in debugger
+List all program sources.
+
+@item variables
+@cindex list all global variables, in debugger
+List all global variables.
+
+@item watch
+@cindex show watchpoints
+List all items in the watch list.
+@end table
+@end table
+
+Additional commands give you control over the debugger, the ability to
+save the debugger's state, and the ability to run debugger commands
+from a file. The commands are:
+
+@table @asis
+@cindex debugger commands, @code{o} (@code{option})
+@cindex debugger commands, @code{option}
+@cindex @code{option} debugger command
+@cindex @code{o} debugger command (alias for @code{option})
+@cindex display debugger options
+@cindex debugger options
+@item @code{option} [@var{name}[@code{=}@var{value}]]
+@itemx @code{o} [@var{name}[@code{=}@var{value}]]
+Without an argument, display the available debugger options
+and their current values. @samp{option @var{name}} shows the current
+value of the named option. @samp{option @var{name}=@var{value}} assigns
+a new value to the named option.
+The available options are:
+
+@c nested table
+@c asis for docbook
+@table @asis
+@item @code{history_size}
+@cindex debugger history size
+The maximum number of lines to keep in the history file @file{./.gawk_history}.
+The default is 100.
+
+@item @code{listsize}
+@cindex debugger default list amount
+The number of lines that @code{list} prints. The default is 15.
+
+@item @code{outfile}
+@cindex redirect @command{gawk} output, in debugger
+Send @command{gawk} output to a file; debugger output still goes
+to standard output. An empty string (@code{""}) resets output to
+standard output.
+
+@item @code{prompt}
+@cindex debugger prompt
+The debugger prompt. The default is @samp{@w{gawk> }}.
+
+@item @code{save_history} [@code{on} | @code{off}]
+@cindex debugger history file
+Save command history to file @file{./.gawk_history}.
+The default is @code{on}.
+
+@item @code{save_options} [@code{on} | @code{off}]
+@cindex save debugger options
+Save current options to file @file{./.gawkrc} upon exit.
+The default is @code{on}.
+Options are read back in to the next session upon startup.
+
+@item @code{trace} [@code{on} | @code{off}]
+@cindex instruction tracing, in debugger
+Turn instruction tracing on or off. The default is @code{off}.
+@end table
+
+@item @code{save} @var{filename}
+Save the commands from the current session to the given @value{FN},
+so that they can be replayed using the @command{source} command.
+
+@item @code{source} @var{filename}
+@cindex debugger, read commands from a file
+Run command(s) from a file; an error in any command does not
+terminate execution of subsequent commands. Comments (lines starting
+with @samp{#}) are allowed in a command file.
+Empty lines are ignored; they do @emph{not}
+repeat the last command.
+You can't restart the program by having more than one @code{run}
+command in the file. Also, the list of commands may include additional
+@code{source} commands; however, the @command{gawk} debugger will not source the
+same file more than once in order to avoid infinite recursion.
+
+In addition to, or instead of the @code{source} command, you can use
+the @option{-D @var{file}} or @option{--debug=@var{file}} command-line
+options to execute commands from a file non-interactively
+(@pxref{Options}).
+@end table
+
+@node Miscellaneous Debugger Commands
+@subsection Miscellaneous Commands
+
+There are a few more commands which do not fit into the
+previous categories, as follows:
+
+@table @asis
+@cindex debugger commands, @code{dump}
+@cindex @code{dump} debugger command
+@item @code{dump} [@var{filename}]
+Dump bytecode of the program to standard output or to the file
+named in @var{filename}. This prints a representation of the internal
+instructions which @command{gawk} executes to implement the @command{awk}
+commands in a program. This can be very enlightening, as the following
+partial dump of Davide Brini's obfuscated code
+(@pxref{Signature Program}) demonstrates:
+
+@c FIXME: This will need updating if num-handler branch is ever merged in.
+@smallexample
+gawk> @kbd{dump}
+@print{} # BEGIN
+@print{}
+@print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
+@print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]
+@print{} [ 1:0xfcc2a0] Op_push_i : "~" [MALLOC|STRING|STRCUR]
+@print{} [ 1:0xfcc280] Op_match :
+@print{} [ 1:0xfcc1e0] Op_store_var : O
+@print{} [ 1:0xfcc2e0] Op_push_i : "==" [MALLOC|STRING|STRCUR]
+@print{} [ 1:0xfcc340] Op_push_i : "==" [MALLOC|STRING|STRCUR]
+@print{} [ 1:0xfcc320] Op_equal :
+@print{} [ 1:0xfcc200] Op_store_var : o
+@print{} [ 1:0xfcc380] Op_push : o
+@print{} [ 1:0xfcc360] Op_plus_i : 0 [MALLOC|NUMCUR|NUMBER]
+@print{} [ 1:0xfcc220] Op_push_lhs : o [do_reference = true]
+@print{} [ 1:0xfcc300] Op_assign_plus :
+@print{} [ :0xfcc2c0] Op_pop :
+@print{} [ 1:0xfcc400] Op_push : O
+@print{} [ 1:0xfcc420] Op_push_i : "" [MALLOC|STRING|STRCUR]
+@print{} [ :0xfcc4a0] Op_no_op :
+@print{} [ 1:0xfcc480] Op_push : O
+@print{} [ :0xfcc4c0] Op_concat : [expr_count = 3] [concat_flag = 0]
+@print{} [ 1:0xfcc3c0] Op_store_var : x
+@print{} [ 1:0xfcc440] Op_push_lhs : X [do_reference = true]
+@print{} [ 1:0xfcc3a0] Op_postincrement :
+@print{} [ 1:0xfcc4e0] Op_push : x
+@print{} [ 1:0xfcc540] Op_push : o
+@print{} [ 1:0xfcc500] Op_plus :
+@print{} [ 1:0xfcc580] Op_push : o
+@print{} [ 1:0xfcc560] Op_plus :
+@print{} [ 1:0xfcc460] Op_leq :
+@print{} [ :0xfcc5c0] Op_jmp_false : [target_jmp = 0xfcc5e0]
+@print{} [ 1:0xfcc600] Op_push_i : "%c" [MALLOC|STRING|STRCUR]
+@print{} [ :0xfcc660] Op_no_op :
+@print{} [ 1:0xfcc520] Op_assign_concat : c
+@print{} [ :0xfcc620] Op_jmp : [target_jmp = 0xfcc440]
+@print{}
+@dots{}
+@print{}
+@print{} [ 2:0xfcc5a0] Op_K_printf : [expr_count = 17] [redir_type = ""]
+@print{} [ :0xfcc140] Op_no_op :
+@print{} [ :0xfcc1c0] Op_atexit :
+@print{} [ :0xfcc640] Op_stop :
+@print{} [ :0xfcc180] Op_no_op :
+@print{} [ :0xfcd150] Op_after_beginfile :
+@print{} [ :0xfcc160] Op_no_op :
+@print{} [ :0xfcc1a0] Op_after_endfile :
+gawk>
+@end smallexample
+
+@cindex debugger commands, @code{h} (@code{help})
+@cindex debugger commands, @code{help}
+@cindex @code{help} debugger command
+@cindex @code{h} debugger command (alias for @code{help})
+@item @code{help}
+@itemx @code{h}
+Print a list of all of the @command{gawk} debugger commands with a short
+summary of their usage. @samp{help @var{command}} prints the information
+about the command @var{command}.
+
+@cindex debugger commands, @code{l} (@code{list})
+@cindex debugger commands, @code{list}
+@cindex @code{list} debugger command
+@cindex @code{l} debugger command (alias for @code{list})
+@item @code{list} [@code{-} | @code{+} | @var{n} | @var{filename}@code{:}@var{n} | @var{n}--@var{m} | @var{function}]
+@itemx @code{l} [@code{-} | @code{+} | @var{n} | @var{filename}@code{:}@var{n} | @var{n}--@var{m} | @var{function}]
+Print the specified lines (default 15) from the current source file
+or the file named @var{filename}. The possible arguments to @code{list}
+are as follows:
+
+@c nested table
+@table @asis
+@item @code{-} (Minus)
+Print lines before the lines last printed.
+
+@item @code{+}
+Print lines after the lines last printed.
+@code{list} without any argument does the same thing.
+
+@item @var{n}
+Print lines centered around line number @var{n}.
+
+@item @var{n}--@var{m}
+Print lines from @var{n} to @var{m}.
+
+@item @var{filename}@code{:}@var{n}
+Print lines centered around line number @var{n} in
+source file @var{filename}. This command may change the current source file.
+
+@item @var{function}
+Print lines centered around beginning of the
+function @var{function}. This command may change the current source file.
+@end table
+
+@cindex debugger commands, @code{q} (@code{quit})
+@cindex debugger commands, @code{quit}
+@cindex @code{quit} debugger command
+@cindex @code{q} debugger command (alias for @code{quit})
+@cindex exit the debugger
+@item @code{quit}
+@itemx @code{q}
+Exit the debugger. Debugging is great fun, but sometimes we all have
+to tend to other obligations in life, and sometimes we find the bug,
+and are free to go on to the next one! As we saw earlier, if you are
+running a program, the debugger warns you if you accidentally type
+@samp{q} or @samp{quit}, to make sure you really want to quit.
+
+@cindex debugger commands, @code{trace}
+@cindex @code{trace} debugger command
+@item @code{trace} [@code{on} | @code{off}]
+Turn on or off a continuous printing of instructions which are about to
+be executed, along with printing the @command{awk} line which they
+implement. The default is @code{off}.
+
+It is to be hoped that most of the ``opcodes'' in these instructions are
+fairly self-explanatory, and using @code{stepi} and @code{nexti} while
+@code{trace} is on will make them into familiar friends.
+
+@end table
+
+@node Readline Support
+@section Readline Support
+@cindex command completion, in debugger
+@cindex history expansion, in debugger
+
+If @command{gawk} is compiled with
+@uref{http://cnswww.cns.cwru.edu/php/chet/readline/readline.html,
+the @code{readline} library}, you can take advantage of that library's
+command completion and history expansion features. The following types
+of completion are available:
+
+@table @asis
+@item Command completion
+Command names.
+
+@item Source @value{FN} completion
+Source @value{FN}s. Relevant commands are
+@code{break},
+@code{clear},
+@code{list},
+@code{tbreak},
+and
+@code{until}.
+
+@item Argument completion
+Non-numeric arguments to a command.
+Relevant commands are @code{enable} and @code{info}.
+
+@item Variable name completion
+Global variable names, and function arguments in the current context
+if the program is running. Relevant commands are
+@code{display},
+@code{print},
+@code{set},
+and
+@code{watch}.
+
+@end table
+
+@node Limitations
+@section Limitations
+
+We hope you find the @command{gawk} debugger useful and enjoyable to work with,
+but as with any program, especially in its early releases, it still has
+some limitations. A few which are worth being aware of are:
+
+@itemize @value{BULLET}
+@item
+At this point, the debugger does not give a detailed explanation of
+what you did wrong when you type in something it doesn't like. Rather, it just
+responds @samp{syntax error}. When you do figure out what your mistake was,
+though, you'll feel like a real guru.
+
+@item
+@c NOTE: no comma after the ref{} on purpose, due to following
+@c parenthetical remark.
+If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}
+(or if you are already familiar with @command{gawk} internals),
+you will realize that much of the internal manipulation of data
+in @command{gawk}, as in many interpreters, is done on a stack.
+@code{Op_push}, @code{Op_pop}, and the like, are the ``bread and butter'' of
+most @command{gawk} code.
+
+Unfortunately, as of now, the @command{gawk}
+debugger does not allow you to examine the stack's contents.
+That is, the intermediate results of expression evaluation are on the
+stack, but cannot be printed. Rather, only variables which are defined
+in the program can be printed. Of course, a workaround for
+this is to use more explicit variables at the debugging stage and then
+change back to obscure, perhaps more optimal code later.
+
+@item
+There is no way to look ``inside'' the process of compiling
+regular expressions to see if you got it right. As an @command{awk}
+programmer, you are expected to know the meaning of
+@code{/[^[:alnum:][:blank:]]/}.
+
+@item
+The @command{gawk} debugger is designed to be used by running a program (with all its
+parameters) on the command line, as described in @ref{Debugger Invocation}.
+There is no way (as of now) to attach or ``break in'' to a running program.
+This seems reasonable for a language which is used mainly for quickly
+executing, short programs.
+
+@item
+The @command{gawk} debugger only accepts source supplied with the @option{-f} option.
+@end itemize
+
+@ignore
+Look forward to a future release when these and other missing features may
+be added, and of course feel free to try to add them yourself!
+@end ignore
+
+@node Debugging Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Programs rarely work correctly the first time. Finding bugs
+is @dfn{debugging} and a program that helps you find bugs is a
+@dfn{debugger}. @command{gawk} has a built-in debugger that works very
+similarly to the GNU Debugger, GDB.
+
+@item
+Debuggers let you step through your program one statement at a time,
+examine and change variable and array values, and do a number of other
+things that let you understand what your program is actually doing (as
+opposed to what it is supposed to do).
+
+@item
+Like most debuggers, the @command{gawk} debugger works in terms of stack
+frames, and lets you set both breakpoints (stop at a point in the code)
+and watchpoints (stop when a data value changes).
+
+@item
+The debugger command set is fairly complete, providing control over
+breakpoints, execution, viewing and changing data, working with the stack,
+getting information, and other tasks.
+
+@item
+If the @code{readline} library is available when @command{gawk} is
+compiled, it is used by the debugger to provide command-line history
+and editing.
+
+@end itemize
+
+@node Arbitrary Precision Arithmetic
+@chapter Arithmetic and Arbitrary-Precision Arithmetic with @command{gawk}
+@cindex arbitrary precision
+@cindex multiple precision
+@cindex infinite precision
+@cindex floating-point, numbers@comma{} arbitrary precision
+
+This @value{CHAPTER} introduces some basic concepts relating to
+how computers do arithmetic and defines some important terms.
+It then proceeds to describe floating-point arithmetic,
+which is what @command{awk} uses for all its computations, including a
+discussion of arbitrary-precision floating-point arithmetic, which is
+a feature available only in @command{gawk}. It continues on to present
+arbitrary-precision integers, and concludes with a description of some
+points where @command{gawk} and the POSIX standard are not quite in
+agreement.
+
+@quotation NOTE
+Most users of @command{gawk} can safely skip this chapter.
+But if you want to do scientific calculations with @command{gawk},
+this is the place to be.
+@end quotation
+
+@menu
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in @command{gawk}.
+* FP Math Caution:: Things to know.
+* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
+ @command{gawk}.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
+@end menu
+
+@node Computer Arithmetic
+@section A General Description of Computer Arithmetic
+
+Until now, we have worked with data as either numbers or
+strings. Ultimately, however, computers represent everything in terms
+of @dfn{binary digits}, or @dfn{bits}. A decimal digit can take on any
+of 10 values: zero through nine. A binary digit can take on any of two
+values, zero or one. Using binary, computers (and computer software)
+can represent and manipulate numerical and character data. In general,
+the more bits you can use to represent a particular thing, the greater
+the range of possible values it can take on.
+
+Modern computers support at least two, and often more, ways to do
+arithmetic. Each kind of arithmetic uses a different representation
+(organization of the bits) for the numbers. The kinds of arithmetic
+that interest us are:
+
+@table @asis
+@item Decimal arithmetic
+This is the kind of arithmetic you learned in elementary school, using
+paper and pencil (and/or a calculator). In theory, numbers can have an
+arbitrary number of digits on either side (or both sides) of the decimal
+point, and the results of a computation are always exact.
+
+Some modern system can do decimal arithmetic in hardware, but usually you
+need a special software library to provide access to these instructions.
+There are also libraries that do decimal arithmetic entirely in software.
+
+Despite the fact that some users expect @command{gawk} to be performing
+decimal arithmetic,@footnote{We don't know why they expect this, but
+they do.} it does not do so.
+
+@item Integer arithmetic
+In school, integer values were referred to as ``whole'' numbers---that
+is, numbers without any fractional part, such as 1, 42, or @minus{}17.
+The advantage to integer numbers is that they represent values exactly.
+The disadvantage is that their range is limited.
+
+@cindex unsigned integers
+@cindex integers, unsigned
+In computers, integer values come in two flavors: @dfn{signed} and
+@dfn{unsigned}. Signed values may be negative or positive, whereas
+unsigned values are always positive (i.e., greater than or equal
+to zero).
+
+In computer systems, integer arithmetic is exact, but the possible
+range of values is limited. Integer arithmetic is generally faster than
+floating-point arithmetic.
+
+@item Floating-point arithmetic
+Floating-point numbers represent what were called in school ``real''
+numbers (i.e., those that have a fractional part, such as 3.1415927).
+The advantage to floating-point numbers is that they can represent a
+much larger range of values than can integers. The disadvantage is that
+there are numbers that they cannot represent exactly.
+
+Modern systems support floating-point arithmetic in hardware, with a
+limited range of values. There are software libraries that allow
+the use of arbitrary-precision floating-point calculations.
+
+POSIX @command{awk} uses @dfn{double-precision} floating-point numbers, which
+can hold more digits than @dfn{single-precision} floating-point numbers.
+@command{gawk} has facilities for performing arbitrary-precision
+floating-point arithmetic, which we describe in more detail shortly.
+@end table
+
+Computers work with integer and floating-point values of different
+ranges. Integer values are usually either 32 or 64 bits in size.
+Single-precision floating-point values occupy 32 bits, whereas double-precision
+floating-point values occupy 64 bits. Floating-point values are always
+signed. The possible ranges of values are shown in @ref{table-numeric-ranges}.
+
+@float Table,table-numeric-ranges
+@caption{Value ranges for different numeric representations}
+@multitable @columnfractions .34 .33 .33
+@headitem Numeric representation @tab Minimum value @tab Maximum value
+@item 32-bit signed integer @tab @minus{}2,147,483,648 @tab 2,147,483,647
+@item 32-bit unsigned integer @tab 0 @tab 4,294,967,295
+@item 64-bit signed integer @tab @minus{}9,223,372,036,854,775,808 @tab 9,223,372,036,854,775,807
+@item 64-bit unsigned integer @tab 0 @tab 18,446,744,073,709,551,615
+@item Single-precision floating point (approximate) @tab @code{1.175494e-38} @tab @code{3.402823e+38}
+@item Double-precision floating point (approximate) @tab @code{2.225074e-308} @tab @code{1.797693e+308}
+@end multitable
+@end float
+
+@node Math Definitions
+@section Other Stuff to Know
+
+The rest of this @value{CHAPTER} uses a number of terms. Here are some
+informal definitions that should help you work your way through the material
+here.
+
+@table @dfn
+@item Accuracy
+A floating-point calculation's accuracy is how close it comes
+to the real (paper and pencil) value.
+
+@item Error
+The difference between what the result of a computation ``should be''
+and what it actually is. It is best to minimize error as much
+as possible.
+
+@item Exponent
+The order of magnitude of a value;
+some number of bits in a floating-point value store the exponent.
+
+@item Inf
+A special value representing infinity. Operations involving another
+number and infinity produce infinity.
+
+@item NaN
+``Not A Number.''@footnote{Thanks to Michael Brennan for this description,
+which we have paraphrased, and for the examples.} A special value that
+results from attempting a calculation that has no answer as a real number.
+In such a case, programs can either receive a floating-point exception,
+or get @code{NaN} back as the result. The IEEE 754 standard recommends
+that systems return @code{NaN}. Some examples:
+
+@table @code
+@item sqrt(-1)
+This makes sense in the range of complex numbers, but not in the
+range of real numbers, so the result is @code{NaN}.
+
+@item log(-8)
+@minus{}8 is out of the domain of @code{log()}, so the result is @code{NaN}.
+@end table
+
+@item Normalized
+How the significand (see later in this list) is usually stored. The
+value is adjusted so that the first bit is one, and then that leading
+one is assumed instead of physically stored. This provides one
+extra bit of precision.
+
+@item Precision
+The number of bits used to represent a floating-point number.
+The more bits, the more digits you can represent.
+Binary and decimal precisions are related approximately, according to the
+formula:
+
+@display
+@iftex
+@math{prec = 3.322 @cdot dps}
+@end iftex
+@ifnottex
+@ifnotdocbook
+@var{prec} = 3.322 * @var{dps}
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<emphasis>prec</emphasis> = 3.322 &sdot; <emphasis>dps</emphasis> @c
+@end docbook
+@end display
+
+@noindent
+Here, @var{prec} denotes the binary precision
+(measured in bits) and @var{dps} (short for decimal places)
+is the decimal digits.
+
+@item Rounding mode
+How numbers are rounded up or down when necessary.
+More details are provided later.
+
+@item Significand
+A floating-point value consists the significand multiplied by 10
+to the power of the exponent. For example, in @code{1.2345e67},
+the significand is @code{1.2345}.
+
+@item Stability
+From @uref{http://en.wikipedia.org/wiki/Numerical_stability,
+the Wikipedia article on numerical stability}:
+``Calculations that can be proven not to magnify approximation errors
+are called @dfn{numerically stable}.''
+@end table
+
+See @uref{http://en.wikipedia.org/wiki/Accuracy_and_precision,
+the Wikipedia article on accuracy and precision} for more information
+on some of those terms.
+
+On modern systems, floating-point hardware uses the representation and
+operations defined by the IEEE 754 standard.
+Three of the standard IEEE 754 types are 32-bit single precision,
+64-bit double precision, and 128-bit quadruple precision.
+The standard also specifies extended precision formats
+to allow greater precisions and larger exponent ranges.
+(@command{awk} uses only the 64-bit double-precision format.)
+
+@ref{table-ieee-formats} lists the precision and exponent
+field values for the basic IEEE 754 binary formats:
+
+@float Table,table-ieee-formats
+@caption{Basic IEEE format values}
+@multitable @columnfractions .20 .20 .20 .20 .20
+@headitem Name @tab Total bits @tab Precision @tab Minimum exponent @tab Maximum exponent
+@item Single @tab 32 @tab 24 @tab @minus{}126 @tab +127
+@item Double @tab 64 @tab 53 @tab @minus{}1022 @tab +1023
+@item Quadruple @tab 128 @tab 113 @tab @minus{}16382 @tab +16383
+@end multitable
+@end float
+
+@quotation NOTE
+The precision numbers include the implied leading one that gives them
+one extra bit of significand.
+@end quotation
+
+@node MPFR features
+@section Arbitrary-Precision Arithmetic Features in @command{gawk}
+
+By default, @command{gawk} uses the double-precision floating-point values
+supplied by the hardware of the system it runs on. However, if it was
+compiled to do so, @command{gawk} uses the @uref{http://www.mpfr.org,
+GNU MPFR} and @uref{http://gmplib.org, GNU MP} (GMP) libraries for
+arbitrary-precision arithmetic on numbers. You can see if MPFR support
+is available like so:
+
+@example
+$ @kbd{gawk --version}
+@print{} GNU Awk 4.1.2, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
+@print{} Copyright (C) 1989, 1991-2015 Free Software Foundation.
+@dots{}
+@end example
+
+@noindent
+(You may see different version numbers than what's shown here. That's OK;
+what's important is to see that GNU MPFR and GNU MP are listed in
+the output.)
+
+Additionally, there are a few elements available in the @code{PROCINFO}
+array to provide information about the MPFR and GMP libraries
+(@pxref{Auto-set}).
+
+The MPFR library provides precise control over precisions and rounding
+modes, and gives correctly rounded, reproducible, platform-independent
+results. With the @option{-M} command-line option,
+all floating-point arithmetic operators and numeric functions
+can yield results to any desired precision level supported by MPFR.
+
+Two predefined variables, @code{PREC} and @code{ROUNDMODE},
+provide control over the working precision and the rounding mode.
+The precision and the rounding mode are set globally for every operation
+to follow.
+@DBXREF{Setting precision} and @DBREF{Setting the rounding mode}
+for more information.
+
+@node FP Math Caution
+@section Floating-Point Arithmetic: Caveat Emptor!
+
+@quotation
+@i{Math class is tough!}
+@author Teen Talk Barbie, July 1992
+@end quotation
+
+This @value{SECTION} provides a high level overview of the issues
+involved when doing lots of floating-point arithmetic.@footnote{There
+is a very nice @uref{http://www.validlab.com/goldberg/paper.pdf,
+paper on floating-point arithmetic} by David Goldberg, ``What Every
+Computer Scientist Should Know About Floating-point Arithmetic,''
+@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), 5-48. This is
+worth reading if you are interested in the details, but it does require
+a background in computer science.}
+The discussion applies to both hardware and arbitrary-precision
+floating-point arithmetic.
+
+@quotation CAUTION
+The material here is purposely general. If you need to do serious
+computer arithmetic, you should do some research first, and not
+rely just on what we tell you.
+@end quotation
+
+@menu
+* Inexactness of computations:: Floating point math is not exact.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
+@end menu
+
+@node Inexactness of computations
+@subsection Floating-Point Arithmetic Is Not Exact
+
+Binary floating-point representations and arithmetic are inexact.
+Simple values like 0.1 cannot be precisely represented using
+binary floating-point numbers, and the limited precision of
+floating-point numbers means that slight changes in
+the order of operations or the precision of intermediate storage
+can change the result. To make matters worse, with arbitrary-precision
+floating-point arithmetic, you can set the precision before starting a
+computation, but then you cannot be sure of the number of significant
+decimal places in the final result.
+
+@menu
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+@end menu
+
+@node Inexact representation
+@subsubsection Many Numbers Cannot Be Represented Exactly
+
+So, before you start to write any code, you should think
+about what you really want and what's really happening. Consider the
+two numbers in the following example:
+
+@example
+x = 0.875 # 1/2 + 1/4 + 1/8
+y = 0.425
+@end example
+
+Unlike the number in @code{y}, the number stored in @code{x}
+is exactly representable
+in binary because it can be written as a finite sum of one or
+more fractions whose denominators are all powers of two.
+When @command{gawk} reads a floating-point number from
+program source, it automatically rounds that number to whatever
+precision your machine supports. If you try to print the numeric
+content of a variable using an output format string of @code{"%.17g"},
+it may not produce the same number as you assigned to it:
+
+@example
+$ @kbd{gawk 'BEGIN @{ x = 0.875; y = 0.425}
+> @kbd{ printf("%0.17g, %0.17g\n", x, y) @}'}
+@print{} 0.875, 0.42499999999999999
+@end example
+
+Often the error is so small you do not even notice it, and if you do,
+you can always specify how much precision you would like in your output.
+Usually this is a format string like @code{"%.15g"}, which when
+used in the previous example, produces an output identical to the input.
+
+@node Comparing FP Values
+@subsubsection Be Careful Comparing Values
+
+Because the underlying representation can be a little bit off from the exact value,
+comparing floating-point values to see if they are exactly equal is generally a bad idea.
+Here is an example where it does not work like you would expect:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 0
+@end example
+
+The general wisdom when comparing floating-point values is to see if
+they are within some small range of each other (called a @dfn{delta},
+or @dfn{tolerance}).
+You have to decide how small a delta is important to you. Code to do
+this looks something like the following:
+
+@example
+delta = 0.00001 # for example
+difference = abs(a) - abs(b) # subtract the two values
+if (difference < delta)
+ # all ok
+else
+ # not ok
+@end example
+
+@noindent
+(We assume that you have a simple absolute value function named
+@code{abs()} defined elsewhere in your program.)
+
+@node Errors accumulate
+@subsubsection Errors Accumulate
+
+The loss of accuracy during a single computation with floating-point
+numbers usually isn't enough to worry about. However, if you compute a
+value which is the result of a sequence of floating-point operations,
+the error can accumulate and greatly affect the computation itself.
+Here is an attempt to compute the value of @value{PI} using one of its
+many series representations:
+
+@example
+BEGIN @{
+ x = 1.0 / sqrt(3.0)
+ n = 6
+ for (i = 1; i < 30; i++) @{
+ n = n * 2.0
+ x = (sqrt(x * x + 1) - 1) / x
+ printf("%.15f\n", n * x)
+ @}
+@}
+@end example
+
+When run, the early errors propagate through later computations,
+causing the loop to terminate prematurely after attempting to divide by zero:
+
+@example
+$ @kbd{gawk -f pi.awk}
+@print{} 3.215390309173475
+@print{} 3.159659942097510
+@print{} 3.146086215131467
+@print{} 3.142714599645573
+@dots{}
+@print{} 3.224515243534819
+@print{} 2.791117213058638
+@print{} 0.000000000000000
+@error{} gawk: pi.awk:6: fatal: division by zero attempted
+@end example
+
+Here is an additional example where the inaccuracies in internal representations
+yield an unexpected result:
+
+@example
+$ @kbd{gawk 'BEGIN @{}
+> @kbd{for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)}
+> @kbd{i++}
+> @kbd{print i}
+> @kbd{@}'}
+@print{} 4
+@end example
+
+@node Getting Accuracy
+@subsection Getting the Accuracy You Need
+
+Can arbitrary-precision arithmetic give exact results? There are
+no easy answers. The standard rules of algebra often do not apply
+when using floating-point arithmetic.
+Among other things, the distributive and associative laws
+do not hold completely, and order of operation may be important
+for your computation. Rounding error, cumulative precision loss
+and underflow are often troublesome.
+
+When @command{gawk} tests the expressions @samp{0.1 + 12.2} and
+@samp{12.3} for equality using the machine double-precision arithmetic,
+it decides that they are not equal! (@xref{Comparing FP Values}.)
+You can get the result you want by increasing the precision; 56 bits in
+this case does the job:
+
+@example
+$ @kbd{gawk -M -v PREC=56 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 1
+@end example
+
+If adding more bits is good, perhaps adding even more bits of
+precision is better?
+Here is what happens if we use an even larger value of @code{PREC}:
+
+@example
+$ @kbd{gawk -M -v PREC=201 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 0
+@end example
+
+This is not a bug in @command{gawk} or in the MPFR library.
+It is easy to forget that the finite number of bits used to store the value
+is often just an approximation after proper rounding.
+The test for equality succeeds if and only if @emph{all} bits in the two operands
+are exactly the same. Because this is not necessarily true after floating-point
+computations with a particular precision and effective rounding mode,
+a straight test for equality may not work. Instead, compare the
+two numbers to see if they are within the desirable delta of each other.
+
+In applications where 15 or fewer decimal places suffice,
+hardware double-precision arithmetic can be adequate, and is usually much faster.
+But you need to keep in mind that every floating-point operation
+can suffer a new rounding error with catastrophic consequences, as illustrated
+by our earlier attempt to compute the value of @value{PI}.
+Extra precision can greatly enhance the stability and the accuracy
+of your computation in such cases.
+
+Repeated addition is not necessarily equivalent to multiplication
+in floating-point arithmetic. In the example in
+@ref{Errors accumulate}:
+
+@example
+$ @kbd{gawk 'BEGIN @{}
+> @kbd{for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)}
+> @kbd{i++}
+> @kbd{print i}
+> @kbd{@}'}
+@print{} 4
+@end example
+
+@noindent
+you may or may not succeed in getting the correct result by choosing
+an arbitrarily large value for @code{PREC}. Reformulation of
+the problem at hand is often the correct approach in such situations.
+
+@node Try To Round
+@subsection Try a Few Extra Bits of Precision and Rounding
+
+Instead of arbitrary-precision floating-point arithmetic,
+often all you need is an adjustment of your logic
+or a different order for the operations in your calculation.
+The stability and the accuracy of the computation of @value{PI}
+in the earlier example can be enhanced by using the following
+simple algebraic transformation:
+
+@example
+(sqrt(x * x + 1) - 1) / x @equiv{} x / (sqrt(x * x + 1) + 1)
+@end example
+
+@noindent
+After making this change, the program converges to
+@value{PI} in under 30 iterations:
+
+@example
+$ @kbd{gawk -f pi2.awk}
+@print{} 3.215390309173473
+@print{} 3.159659942097501
+@print{} 3.146086215131436
+@print{} 3.142714599645370
+@print{} 3.141873049979825
+@dots{}
+@print{} 3.141592653589797
+@print{} 3.141592653589797
+@end example
+
+@node Setting precision
+@subsection Setting the Precision
+
+@command{gawk} uses a global working precision; it does not keep track of
+the precision or accuracy of individual numbers. Performing an arithmetic
+operation or calling a built-in function rounds the result to the current
+working precision. The default working precision is 53 bits, which you can
+modify using the predefined variable @code{PREC}. You can also set the
+value to one of the predefined case-insensitive strings
+shown in @ref{table-predefined-precision-strings},
+to emulate an IEEE 754 binary format.
+
+@float Table,table-predefined-precision-strings
+@caption{Predefined precision strings for @code{PREC}}
+@multitable {@code{"double"}} {12345678901234567890123456789012345}
+@headitem @code{PREC} @tab IEEE 754 Binary Format
+@item @code{"half"} @tab 16-bit half-precision
+@item @code{"single"} @tab Basic 32-bit single precision
+@item @code{"double"} @tab Basic 64-bit double precision
+@item @code{"quad"} @tab Basic 128-bit quadruple precision
+@item @code{"oct"} @tab 256-bit octuple precision
+@end multitable
+@end float
+
+The following example illustrates the effects of changing precision
+on arithmetic operations:
+
+@example
+$ @kbd{gawk -M -v PREC=100 'BEGIN @{ x = 1.0e-400; print x + 0}
+> @kbd{PREC = "double"; print x + 0 @}'}
+@print{} 1e-400
+@print{} 0
+@end example
+
+@quotation CAUTION
+Be wary of floating-point constants! When reading a floating-point
+constant from program source code, @command{gawk} uses the default
+precision (that of a C @code{double}), unless overridden by an assignment
+to the special variable @code{PREC} on the command line, to store it
+internally as an MPFR number. Changing the precision using @code{PREC}
+in the program text does @emph{not} change the precision of a constant.
+
+If you need to represent a floating-point constant at a higher precision
+than the default and cannot use a command-line assignment to @code{PREC},
+you should either specify the constant as a string, or as a rational
+number, whenever possible. The following example illustrates the
+differences among various ways to print a floating-point constant:
+@end quotation
+
+@example
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 0.1) @}'}
+@print{} 0.1000000000000000055511151
+$ @kbd{gawk -M -v PREC=113 'BEGIN @{ printf("%0.25f\n", 0.1) @}'}
+@print{} 0.1000000000000000000000000
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", "0.1") @}'}
+@print{} 0.1000000000000000000000000
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 1/10) @}'}
+@print{} 0.1000000000000000000000000
+@end example
+
+@node Setting the rounding mode
+@subsection Setting the Rounding Mode
+
+The @code{ROUNDMODE} variable provides
+program level control over the rounding mode.
+The correspondence between @code{ROUNDMODE} and the IEEE
+rounding modes is shown in @ref{table-gawk-rounding-modes}.
+
+@float Table,table-gawk-rounding-modes
+@caption{@command{gawk} rounding modes}
+@multitable @columnfractions .45 .30 .25
+@headitem Rounding Mode @tab IEEE Name @tab @code{ROUNDMODE}
+@item Round to nearest, ties to even @tab @code{roundTiesToEven} @tab @code{"N"} or @code{"n"}
+@item Round toward plus Infinity @tab @code{roundTowardPositive} @tab @code{"U"} or @code{"u"}
+@item Round toward negative Infinity @tab @code{roundTowardNegative} @tab @code{"D"} or @code{"d"}
+@item Round toward zero @tab @code{roundTowardZero} @tab @code{"Z"} or @code{"z"}
+@item Round to nearest, ties away from zero @tab @code{roundTiesToAway} @tab @code{"A"} or @code{"a"}
+@end multitable
+@end float
+
+@code{ROUNDMODE} has the default value @code{"N"}, which
+selects the IEEE 754 rounding mode @code{roundTiesToEven}.
+In @ref{table-gawk-rounding-modes}, the value @code{"A"} selects
+@code{roundTiesToAway}. This is only available if your version of the
+MPFR library supports it; otherwise, setting @code{ROUNDMODE} to @code{"A"}
+has no effect.
+
+The default mode @code{roundTiesToEven} is the most preferred,
+but the least intuitive. This method does the obvious thing for most values,
+by rounding them up or down to the nearest digit.
+For example, rounding 1.132 to two digits yields 1.13,
+and rounding 1.157 yields 1.16.
+
+However, when it comes to rounding a value that is exactly halfway between,
+things do not work the way you probably learned in school.
+In this case, the number is rounded to the nearest even digit.
+So rounding 0.125 to two digits rounds down to 0.12,
+but rounding 0.6875 to three digits rounds up to 0.688.
+You probably have already encountered this rounding mode when
+using @code{printf} to format floating-point numbers.
+For example:
+
+@example
+BEGIN @{
+ x = -4.5
+ for (i = 1; i < 10; i++) @{
+ x += 1.0
+ printf("%4.1f => %2.0f\n", x, x)
+ @}
+@}
+@end example
+
+@noindent
+produces the following output when run on the author's system:@footnote{It
+is possible for the output to be completely different if the
+C library in your system does not use the IEEE 754 even-rounding
+rule to round halfway cases for @code{printf}.}
+
+@example
+-3.5 => -4
+-2.5 => -2
+-1.5 => -2
+-0.5 => 0
+ 0.5 => 0
+ 1.5 => 2
+ 2.5 => 2
+ 3.5 => 4
+ 4.5 => 4
+@end example
+
+The theory behind @code{roundTiesToEven} is that it more or less evenly
+distributes upward and downward rounds of exact halves, which might
+cause any accumulating round-off error to cancel itself out. This is the
+default rounding mode for IEEE 754 computing functions and operators.
+
+The other rounding modes are rarely used. Round toward positive infinity
+(@code{roundTowardPositive}) and round toward negative infinity
+(@code{roundTowardNegative}) are often used to implement interval
+arithmetic, where you adjust the rounding mode to calculate upper and
+lower bounds for the range of output. The @code{roundTowardZero} mode can
+be used for converting floating-point numbers to integers. The rounding
+mode @code{roundTiesToAway} rounds the result to the nearest number and
+selects the number with the larger magnitude if a tie occurs.
+
+Some numerical analysts will tell you that your choice of rounding
+style has tremendous impact on the final outcome, and advise you to
+wait until final output for any rounding. Instead, you can often avoid
+round-off error problems by setting the precision initially to some
+value sufficiently larger than the final desired precision, so that
+the accumulation of round-off error does not influence the outcome.
+If you suspect that results from your computation are sensitive to
+accumulation of round-off error, look for a significant difference in
+output when you change the rounding mode to be sure.
+
+@node Arbitrary Precision Integers
+@section Arbitrary-Precision Integer Arithmetic with @command{gawk}
+@cindex integers, arbitrary precision
+@cindex arbitrary precision integers
+
+When given the @option{-M} option,
+@command{gawk} performs all integer arithmetic using GMP arbitrary-precision
+integers. Any number that looks like an integer in a source
+or @value{DF} is stored as an arbitrary-precision integer. The size
+of the integer is limited only by the available memory. For example,
+the following computes
+@iftex
+@math{5^{4^{3^{2}}}},
+@end iftex
+@ifnottex
+@ifnotdocbook
+5^4^3^2,
+@end ifnotdocbook
+@end ifnottex
+@docbook
+5<superscript>4<superscript>3<superscript>2</superscript></superscript></superscript>, @c
+@end docbook
+the result of which is beyond the
+limits of ordinary hardware double-precision floating-point values:
+
+@example
+$ @kbd{gawk -M 'BEGIN @{}
+> @kbd{x = 5^4^3^2}
+> @kbd{print "number of digits =", length(x)}
+> @kbd{print substr(x, 1, 20), "...", substr(x, length(x) - 19, 20)}
+> @kbd{@}'}
+@print{} number of digits = 183231
+@print{} 62060698786608744707 ... 92256259918212890625
+@end example
+
+If instead you were to compute the same value using arbitrary-precision
+floating-point values, the precision needed for correct output (using
+the formula
+@iftex
+@math{prec = 3.322 @cdot dps}),
+would be @math{3.322 @cdot 183231},
+@end iftex
+@ifnottex
+@ifnotdocbook
+@samp{prec = 3.322 * dps}),
+would be 3.322 x 183231,
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<emphasis>prec</emphasis> = 3.322 &sdot; <emphasis>dps</emphasis>),
+would be
+<emphasis>prec</emphasis> = 3.322 &sdot; 183231, @c
+@end docbook
+or 608693.
+
+The result from an arithmetic operation with an integer and a floating-point value
+is a floating-point value with a precision equal to the working precision.
+The following program calculates the eighth term in
+Sylvester's sequence@footnote{Weisstein, Eric W.
+@cite{Sylvester's Sequence}. From MathWorld---A Wolfram Web Resource
+@w{(@url{http://mathworld.wolfram.com/SylvestersSequence.html}).}}
+using a recurrence:
+
+@example
+$ @kbd{gawk -M 'BEGIN @{}
+> @kbd{s = 2.0}
+> @kbd{for (i = 1; i <= 7; i++)}
+> @kbd{s = s * (s - 1) + 1}
+> @kbd{print s}
+> @kbd{@}'}
+@print{} 113423713055421845118910464
+@end example
+
+The output differs from the actual number, 113,423,713,055,421,844,361,000,443,
+because the default precision of 53 bits is not enough to represent the
+floating-point results exactly. You can either increase the precision
+(100 bits is enough in this case), or replace the floating-point constant
+@samp{2.0} with an integer, to perform all computations using integer
+arithmetic to get the correct output.
+
+Sometimes @command{gawk} must implicitly convert an arbitrary-precision
+integer into an arbitrary-precision floating-point value. This is
+primarily because the MPFR library does not always provide the relevant
+interface to process arbitrary-precision integers or mixed-mode numbers
+as needed by an operation or function. In such a case, the precision is
+set to the minimum value necessary for exact conversion, and the working
+precision is not used for this purpose. If this is not what you need or
+want, you can employ a subterfuge, and convert the integer to floating
+point first, like this:
+
+@example
+gawk -M 'BEGIN @{ n = 13; print (n + 0.0) % 2.0 @}'
+@end example
+
+You can avoid this issue altogether by specifying the number as a floating-point value
+to begin with:
+
+@example
+gawk -M 'BEGIN @{ n = 13.0; print n % 2.0 @}'
+@end example
+
+Note that for this particular example, it is likely best
+to just use the following:
+
+@example
+gawk -M 'BEGIN @{ n = 13; print n % 2 @}'
+@end example
+
+When dividing two arbitrary precision integers with either
+@samp{/} or @samp{%}, the result is typically an arbitrary
+precision floating point value (unless the denominator evenly
+divides into the numerator). In order to do integer division
+or remainder with arbitrary precision integers, use the built-in
+@code{div()} function (@pxref{Numeric Functions}).
+
+You can simulate the @code{div()} function in standard @command{awk}
+using this user-defined function:
+
+@example
+@c file eg/lib/div.awk
+# div --- do integer division
+
+@c endfile
+@ignore
+@c file eg/lib/div.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+
+@c endfile
+
+@end ignore
+@c file eg/lib/div.awk
+function div(numerator, denominator, result)
+@{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+@}
+@c endfile
+@end example
+
+The following example program, contributed by Katie Wasserman,
+uses @code{div()} to
+compute the digits of @value{PI} to as many places as you
+choose to set:
+
+@example
+@c file eg/prog/pi.awk
+# pi.awk --- compute the digits of pi
+@c endfile
+@c endfile
+@ignore
+@c file eg/prog/pi.awk
+#
+# Katie Wasserman, katie@@wass.net
+# August 2014
+@c endfile
+@end ignore
+@c file eg/prog/pi.awk
+
+BEGIN @{
+ digits = 100000
+ two = 2 * 10 ^ digits
+ pi = two
+ for (m = digits * 4; m > 0; --m) @{
+ d = m * 2 + 1
+ x = pi * m
+ div(x, d, result)
+ pi = result["quotient"]
+ pi = pi + two
+ @}
+ print pi
+@}
+@c endfile
+@end example
+
+@ignore
+Date: Wed, 20 Aug 2014 10:19:11 -0400
+To: arnold@skeeve.com
+From: Katherine Wasserman <katie@wass.net>
+Subject: Re: computation of digits of pi?
+
+Arnold,
+
+>The program that you sent to compute the digits of pi using div(). Is
+>that some standard algorithm that every math student knows? If so,
+>what's it called?
+
+It's not that well known but it's not that obscure either
+
+It's Euler's modification to Newton's method for calculating pi.
+
+Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm
+
+The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes.
+
+http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899
+
+-Katie
+@end ignore
+
+When asked about the algorithm used, Katie replied:
+
+@quotation
+It's not that well known but it's not that obscure either.
+It's Euler's modification to Newton's method for calculating pi.
+Take a look at lines (23) - (25) here: @uref{http://mathworld.wolfram.com/PiFormulas.htm}.
+
+The algorithm I wrote simply expands the multiply by 2 and works from
+the innermost expression outwards. I used this to program HP calculators
+because it's quite easy to modify for tiny memory devices with smallish
+word sizes. See
+@uref{http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899}.
+@end quotation
+
+@node POSIX Floating Point Problems
+@section Standards Versus Existing Practice
+
+Historically, @command{awk} has converted any non-numeric looking string
+to the numeric value zero, when required. Furthermore, the original
+definition of the language and the original POSIX standards specified that
+@command{awk} only understands decimal numbers (base 10), and not octal
+(base 8) or hexadecimal numbers (base 16).
+
+Changes in the language of the
+2001 and 2004 POSIX standards can be interpreted to imply that @command{awk}
+should support additional features. These features are:
+
+@itemize @value{BULLET}
+@item
+Interpretation of floating-point data values specified in hexadecimal
+notation (e.g., @code{0xDEADBEEF}). (Note: data values, @emph{not}
+source code constants.)
+
+@item
+Support for the special IEEE 754 floating-point values ``Not A Number''
+(NaN), positive Infinity (``inf''), and negative Infinity (``@minus{}inf'').
+In particular, the format for these values is as specified by the ISO 1999
+C standard, which ignores case and can allow implementation-dependent additional
+characters after the @samp{nan} and allow either @samp{inf} or @samp{infinity}.
+@end itemize
+
+The first problem is that both of these are clear changes to historical
+practice:
+
+@itemize @value{BULLET}
+@item
+The @command{gawk} maintainer feels that supporting hexadecimal
+floating-point values, in particular, is ugly, and was never intended by the
+original designers to be part of the language.
+
+@item
+Allowing completely alphabetic strings to have valid numeric
+values is also a very severe departure from historical practice.
+@end itemize
+
+The second problem is that the @code{gawk} maintainer feels that this
+interpretation of the standard, which requires a certain amount of
+``language lawyering'' to arrive at in the first place, was not even
+intended by the standard developers. In other words, ``we see how you
+got where you are, but we don't think that that's where you want to be.''
+
+Recognizing these issues, but attempting to provide compatibility
+with the earlier versions of the standard,
+the 2008 POSIX standard added explicit wording to allow, but not require,
+that @command{awk} support hexadecimal floating-point values and
+special values for ``Not A Number'' and infinity.
+
+Although the @command{gawk} maintainer continues to feel that
+providing those features is inadvisable,
+nevertheless, on systems that support IEEE floating point, it seems
+reasonable to provide @emph{some} way to support NaN and Infinity values.
+The solution implemented in @command{gawk} is as follows:
+
+@itemize @value{BULLET}
+@item
+With the @option{--posix} command-line option, @command{gawk} becomes
+``hands off.'' String values are passed directly to the system library's
+@code{strtod()} function, and if it successfully returns a numeric value,
+that is what's used.@footnote{You asked for it, you got it.}
+By definition, the results are not portable across
+different systems. They are also a little surprising:
+
+@example
+$ @kbd{echo nanny | gawk --posix '@{ print $1 + 0 @}'}
+@print{} nan
+$ @kbd{echo 0xDeadBeef | gawk --posix '@{ print $1 + 0 @}'}
+@print{} 3735928559
+@end example
+
+@item
+Without @option{--posix}, @command{gawk} interprets the four strings
+@samp{+inf},
+@samp{-inf},
+@samp{+nan},
+and
+@samp{-nan}
+specially, producing the corresponding special numeric values.
+The leading sign acts a signal to @command{gawk} (and the user)
+that the value is really numeric. Hexadecimal floating point is
+not supported (unless you also use @option{--non-decimal-data},
+which is @emph{not} recommended). For example:
+
+@example
+$ @kbd{echo nanny | gawk '@{ print $1 + 0 @}'}
+@print{} 0
+$ @kbd{echo +nan | gawk '@{ print $1 + 0 @}'}
+@print{} nan
+$ @kbd{echo 0xDeadBeef | gawk '@{ print $1 + 0 @}'}
+@print{} 0
+@end example
+
+@command{gawk} ignores case in the four special values.
+Thus @samp{+nan} and @samp{+NaN} are the same.
+@end itemize
+
+@node Floating point summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Most computer arithmetic is done using either integers or floating-point
+values. Standard @command{awk} uses double-precision
+floating-point values.
+
+@item
+In the early 1990s, Barbie mistakenly said ``Math class is tough!''
+Although math isn't tough, floating-point arithmetic isn't the same
+as pencil and paper math, and care must be taken:
+
+@c nested list
+@itemize @value{MINUS}
+@item
+Not all numbers can be represented exactly.
+
+@item
+Comparing values should use a delta, instead of being done directly
+with @samp{==} and @samp{!=}.
+
+@item
+Errors accumulate.
+
+@item
+Operations are not always truly associative or distributive.
+@end itemize
+
+@item
+Increasing the accuracy can help, but it is not a panacea.
+
+@item
+Often, increasing the accuracy and then rounding to the desired
+number of digits produces reasonable results.
+
+@item
+Use @option{-M} (or @option{--bignum}) to enable MPFR
+arithmetic. Use @code{PREC} to set the precision in bits, and
+@code{ROUNDMODE} to set the IEEE 754 rounding mode.
+
+@item
+With @option{-M}, @command{gawk} performs
+arbitrary-precision integer arithmetic using the GMP library.
+This is faster and more space efficient than using MPFR for
+the same calculations.
+
+@item
+There are several ``dark corners'' with respect to floating-point
+numbers where @command{gawk} disagrees with the POSIX standard.
+It pays to be aware of them.
+
+@item
+Overall, there is no need to be unduly suspicious about the results from
+floating-point arithmetic. The lesson to remember is that floating-point
+arithmetic is always more complex than arithmetic using pencil and
+paper. In order to take advantage of the power of computer floating point,
+you need to know its limitations and work within them. For most casual
+use of floating-point arithmetic, you will often get the expected result
+if you simply round the display of your final results to the correct number
+of significant decimal digits.
+
+@item
+As general advice, avoid presenting numerical data in a manner that
+implies better precision than is actually the case.
+
+@end itemize
+
+@node Dynamic Extensions
+@chapter Writing Extensions for @command{gawk}
+@cindex dynamically loaded extensions
+
+It is possible to add new functions written in C or C++ to @command{gawk} using
+dynamically loaded libraries. This facility is available on systems
+that support the C @code{dlopen()} and @code{dlsym()}
+functions. This @value{CHAPTER} describes how to create extensions
+using code written in C or C++.
+
+If you don't know anything about C programming, you can safely skip this
+@value{CHAPTER}, although you may wish to review the documentation on the
+extensions that come with @command{gawk} (@pxref{Extension Samples}),
+and the information on the @code{gawkextlib} project (@pxref{gawkextlib}).
+The sample extensions are automatically built and installed when
+@command{gawk} is.
+
+@quotation NOTE
+When @option{--sandbox} is specified, extensions are disabled
+(@pxref{Options}).
+@end quotation
+
+@menu
+* Extension Intro:: What is an extension.
+* Plugin License:: A note about licensing.
+* Extension Mechanism Outline:: An outline of how it works.
+* Extension API Description:: A full description of the API.
+* Finding Extensions:: How @command{gawk} finds compiled extensions.
+* Extension Example:: Example C code for an extension.
+* Extension Samples:: The sample extensions that ship with
+ @code{gawk}.
+* gawkextlib:: The @code{gawkextlib} project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
+@end menu
+
+@node Extension Intro
+@section Introduction
+
+@cindex plug-in
+An @dfn{extension} (sometimes called a @dfn{plug-in}) is a piece of
+external compiled code that @command{gawk} can load at runtime to
+provide additional functionality, over and above the built-in capabilities
+described in the rest of this @value{DOCUMENT}.
+
+Extensions are useful because they allow you (of course) to extend
+@command{gawk}'s functionality. For example, they can provide access to
+system calls (such as @code{chdir()} to change directory) and to other
+C library routines that could be of use. As with most software,
+``the sky is the limit;'' if you can imagine something that you might
+want to do and can write in C or C++, you can write an extension to do it!
+
+Extensions are written in C or C++, using the @dfn{application programming
+interface} (API) defined for this purpose by the @command{gawk}
+developers. The rest of this @value{CHAPTER} explains
+the facilities that the API provides and how to use
+them, and presents a small example extension. In addition, it documents
+the sample extensions included in the @command{gawk} distribution,
+and describes the @code{gawkextlib} project.
+@ifclear FOR_PRINT
+@xref{Extension Design}, for a discussion of the extension mechanism
+goals and design.
+@end ifclear
+@ifset FOR_PRINT
+See @uref{http://www.gnu.org/software/gawk/manual/html_node/Extension-Design.html}
+for a discussion of the extension mechanism
+goals and design.
+@end ifset
+
+@node Plugin License
+@section Extension Licensing
+
+Every dynamic extension must be distributed under a license that is
+compatible with the GNU GPL (@pxref{Copying}).
+
+In order for the extension to tell @command{gawk} that it is
+properly licensed, the extension must define the global symbol
+@code{plugin_is_GPL_compatible}. If this symbol does not exist,
+@command{gawk} emits a fatal error and exits when it tries to load
+your extension.
+
+The declared type of the symbol should be @code{int}. It does not need
+to be in any allocated section, though. The code merely asserts that
+the symbol exists in the global scope. Something like this is enough:
+
+@example
+int plugin_is_GPL_compatible;
+@end example
+
+@node Extension Mechanism Outline
+@section How It Works at a High Level
+
+Communication between
+@command{gawk} and an extension is two-way. First, when an extension
+is loaded, @command{gawk} passes it a pointer to a @code{struct} whose fields are
+function pointers.
+@ifnotdocbook
+This is shown in @ref{figure-load-extension}.
+@end ifnotdocbook
+@ifdocbook
+This is shown in @inlineraw{docbook, <xref linkend="figure-load-extension"/>}.
+@end ifdocbook
+
+@ifnotdocbook
+@float Figure,figure-load-extension
+@caption{Loading the extension}
+@c FIXME: One day, it should not be necessary to have two cases,
+@c but rather just the one without the "txt" final argument.
+@c This applies to the other figures as well.
+@ifinfo
+@center @image{api-figure1, , , Loading the extension, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{api-figure1, , , Loading the extension}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-load-extension" float="0">
+<title>Loading the extension</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure1.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
+
+The extension can call functions inside @command{gawk} through these
+function pointers, at runtime, without needing (link-time) access
+to @command{gawk}'s symbols. One of these function pointers is to a
+function for ``registering'' new functions.
+@ifnotdocbook
+This is shown in @ref{figure-register-new-function}.
+@end ifnotdocbook
+@ifdocbook
+This is shown in @inlineraw{docbook, <xref linkend="figure-register-new-function"/>}.
+@end ifdocbook
+
+@ifnotdocbook
+@float Figure,figure-register-new-function
+@caption{Registering a new function}
+@ifinfo
+@center @image{api-figure2, , , Registering a new Function, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{api-figure2, , , Registering a new Function}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-register-new-function" float="0">
+<title>Registering a new function</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure2.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
+
+In the other direction, the extension registers its new functions
+with @command{gawk} by passing function pointers to the functions that
+provide the new feature (@code{do_chdir()}, for example). @command{gawk}
+associates the function pointer with a name and can then call it, using a
+defined calling convention.
+@ifnotdocbook
+This is shown in @ref{figure-call-new-function}.
+@end ifnotdocbook
+@ifdocbook
+This is shown in @inlineraw{docbook, <xref linkend="figure-call-new-function"/>}.
+@end ifdocbook
+
+@ifnotdocbook
+@float Figure,figure-call-new-function
+@caption{Calling the new function}
+@ifinfo
+@center @image{api-figure3, , , Calling the new function, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{api-figure3, , , Calling the new function}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-call-new-function" float="0">
+<title>Calling the new function</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure3.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
+
+The @code{do_@var{xxx}()} function, in turn, then uses the function
+pointers in the API @code{struct} to do its work, such as updating
+variables or arrays, printing messages, setting @code{ERRNO}, and so on.
+
+Convenience macros make calling through the function pointers look
+like regular function calls so that extension code is quite readable
+and understandable.
+
+Although all of this sounds somewhat complicated, the result is that
+extension code is quite straightforward to write and to read. You can
+see this in the sample extension @file{filefuncs.c} (@pxref{Extension
+Example}) and also in the @file{testext.c} code for testing the APIs.
+
+Some other bits and pieces:
+
+@itemize @value{BULLET}
+@item
+The API provides access to @command{gawk}'s @code{do_@var{xxx}} values,
+reflecting command-line options, like @code{do_lint}, @code{do_profiling}
+and so on (@pxref{Extension API Variables}).
+These are informational: an extension cannot affect their values
+inside @command{gawk}. In addition, attempting to assign to them
+produces a compile-time error.
+
+@item
+The API also provides major and minor version numbers, so that an
+extension can check if the @command{gawk} it is loaded with supports the
+facilities it was compiled with. (Version mismatches ``shouldn't''
+happen, but we all know how @emph{that} goes.)
+@DBXREF{Extension Versioning} for details.
+@end itemize
+
+@node Extension API Description
+@section API Description
+@cindex extension API
+
+C or C++ code for an extension must include the header file
+@file{gawkapi.h}, which declares the functions and defines the data
+types used to communicate with @command{gawk}.
+This (rather large) @value{SECTION} describes the API in detail.
+
+@menu
+* Extension API Functions Introduction:: Introduction to the API functions.
+* General Data Types:: The data types.
+* Memory Allocation Functions:: Functions for allocating memory.
+* Constructor Functions:: Functions for creating values.
+* Registration Functions:: Functions to register things with
+ @command{gawk}.
+* Printing Messages:: Functions for printing messages.
+* Updating @code{ERRNO}:: Functions for updating @code{ERRNO}.
+* Requesting Values:: How to get a value.
+* Accessing Parameters:: Functions for accessing parameters.
+* Symbol Table Access:: Functions for accessing global
+ variables.
+* Array Manipulation:: Functions for working with arrays.
+* Extension API Variables:: Variables provided by the API.
+* Extension API Boilerplate:: Boilerplate code for using the API.
+@end menu
+
+@node Extension API Functions Introduction
+@subsection Introduction
+
+Access to facilities within @command{gawk} are made available
+by calling through function pointers passed into your extension.
+
+API function pointers are provided for the following kinds of operations:
+
+@itemize @value{BULLET}
+@item
+Allocating, reallocating, and releasing memory.
+
+@item
+Registration functions. You may register:
+
+@c nested list
+@itemize @value{MINUS}
+@item
+Extension functions
+@item
+Exit callbacks
+@item
+A version string
+@item
+Input parsers
+@item
+Output wrappers
+@item
+Two-way processors
+@end itemize
+
+All of these are discussed in detail, later in this @value{CHAPTER}.
+
+@item
+Printing fatal, warning, and ``lint'' warning messages.
+
+@item
+Updating @code{ERRNO}, or unsetting it.
+
+@item
+Accessing parameters, including converting an undefined parameter into
+an array.
+
+@item
+Symbol table access: retrieving a global variable, creating one,
+or changing one.
+
+@item
+Creating and releasing cached values; this provides an
+efficient way to use values for multiple variables and
+can be a big performance win.
+
+@item
+Manipulating arrays:
+
+@itemize @value{MINUS}
+@item
+Retrieving, adding, deleting, and modifying elements
+
+@item
+Getting the count of elements in an array
+
+@item
+Creating a new array
+
+@item
+Clearing an array
+
+@item
+Flattening an array for easy C style looping over all its indices and elements
+@end itemize
+@end itemize
+
+Some points about using the API:
+
+@itemize @value{BULLET}
+@item
+The following types, macros, and/or functions are referenced
+in @file{gawkapi.h}. For correct use, you must therefore include the
+corresponding standard header file @emph{before} including @file{gawkapi.h}:
+
+@multitable {@code{memset()}, @code{memcpy()}} {@code{<sys/types.h>}}
+@headitem C Entity @tab Header File
+@item @code{EOF} @tab @code{<stdio.h>}
+@item Values for @code{errno} @tab @code{<errno.h>}
+@item @code{FILE} @tab @code{<stdio.h>}
+@item @code{NULL} @tab @code{<stddef.h>}
+@item @code{memcpy()} @tab @code{<string.h>}
+@item @code{memset()} @tab @code{<string.h>}
+@item @code{size_t} @tab @code{<sys/types.h>}
+@item @code{struct stat} @tab @code{<sys/stat.h>}
+@end multitable
+
+Due to portability concerns, especially to systems that are not
+fully standards-compliant, it is your responsibility
+to include the correct files in the correct way. This requirement
+is necessary in order to keep @file{gawkapi.h} clean, instead of becoming
+a portability hodge-podge as can be seen in some parts of
+the @command{gawk} source code.
+
+@item
+The @file{gawkapi.h} file may be included more than once without ill effect.
+Doing so, however, is poor coding practice.
+
+@item
+Although the API only uses ISO C 90 features, there is an exception; the
+``constructor'' functions use the @code{inline} keyword. If your compiler
+does not support this keyword, you should either place
+@samp{-Dinline=''} on your command line, or use the GNU Autotools and include a
+@file{config.h} file in your extensions.
+
+@item
+All pointers filled in by @command{gawk} point to memory
+managed by @command{gawk} and should be treated by the extension as
+read-only. Memory for @emph{all} strings passed into @command{gawk}
+from the extension @emph{must} come from calling one of
+@code{gawk_malloc()}, @code{gawk_calloc()} or @code{gawk_realloc()},
+and is managed by @command{gawk} from then on.
+
+@item
+The API defines several simple @code{struct}s that map values as seen
+from @command{awk}. A value can be a @code{double}, a string, or an
+array (as in multidimensional arrays, or when creating a new array).
+String values maintain both pointer and length, because embedded @sc{nul}
+characters are allowed.
+
+@quotation NOTE
+By intent, strings are maintained using the current multibyte encoding (as
+defined by @env{LC_@var{xxx}} environment variables) and not using wide
+characters. This matches how @command{gawk} stores strings internally
+and also how characters are likely to be input and output from files.
+@end quotation
+
+@item
+When retrieving a value (such as a parameter or that of a global variable
+or array element), the extension requests a specific type (number, string,
+scalar, value cookie, array, or ``undefined''). When the request is
+``undefined,'' the returned value will have the real underlying type.
+
+However, if the request and actual type don't match, the access function
+returns ``false'' and fills in the type of the actual value that is there,
+so that the extension can, e.g., print an error message
+(such as ``scalar passed where array expected'').
+
+@c This is documented in the header file and needs some expanding upon.
+@c The table there should be presented here
+@end itemize
+
+You may call the API functions by using the function pointers
+directly, but the interface is not so pretty. To make extension code look
+more like regular code, the @file{gawkapi.h} header file defines several
+macros that you should use in your code. This @value{SECTION} presents
+the macros as if they were functions.
+
+@node General Data Types
+@subsection General-Purpose Data Types
+
+@cindex Robbins, Arnold
+@cindex Ramey, Chet
+@quotation
+@i{I have a true love/hate relationship with unions.}
+@author Arnold Robbins
+@end quotation
+
+@quotation
+@i{That's the thing about unions: the compiler will arrange things so they
+can accommodate both love and hate.}
+@author Chet Ramey
+@end quotation
+
+The extension API defines a number of simple types and structures for
+general-purpose use. Additional, more specialized, data structures are
+introduced in subsequent @value{SECTION}s, together with the functions
+that use them.
+
+@table @code
+@item typedef void *awk_ext_id_t;
+A value of this type is received from @command{gawk} when an extension is loaded.
+That value must then be passed back to @command{gawk} as the first parameter of
+each API function.
+
+@item #define awk_const @dots{}
+This macro expands to @samp{const} when compiling an extension,
+and to nothing when compiling @command{gawk} itself. This makes
+certain fields in the API data structures unwritable from extension code,
+while allowing @command{gawk} to use them as it needs to.
+
+@item typedef enum awk_bool @{
+@itemx @ @ @ @ awk_false = 0,
+@itemx @ @ @ @ awk_true
+@itemx @} awk_bool_t;
+A simple boolean type.
+
+@item typedef struct awk_string @{
+@itemx @ @ @ @ char *str;@ @ @ @ @ @ /* data */
+@itemx @ @ @ @ size_t len;@ @ @ @ @ /* length thereof, in chars */
+@itemx @} awk_string_t;
+This represents a mutable string. @command{gawk}
+owns the memory pointed to if it supplied
+the value. Otherwise, it takes ownership of the memory pointed to.
+@emph{Such memory must come from calling one of the
+@code{gawk_malloc()}, @code{gawk_calloc()}, or
+@code{gawk_realloc()} functions!}
+
+As mentioned earlier, strings are maintained using the current
+multibyte encoding.
+
+@item typedef enum @{
+@itemx @ @ @ @ AWK_UNDEFINED,
+@itemx @ @ @ @ AWK_NUMBER,
+@itemx @ @ @ @ AWK_STRING,
+@itemx @ @ @ @ AWK_ARRAY,
+@itemx @ @ @ @ AWK_SCALAR,@ @ @ @ @ @ @ @ @ /* opaque access to a variable */
+@itemx @ @ @ @ AWK_VALUE_COOKIE@ @ @ @ /* for updating a previously created value */
+@itemx @} awk_valtype_t;
+This @code{enum} indicates the type of a value.
+It is used in the following @code{struct}.
+
+@item typedef struct awk_value @{
+@itemx @ @ @ @ awk_valtype_t val_type;
+@itemx @ @ @ @ union @{
+@itemx @ @ @ @ @ @ @ @ awk_string_t@ @ @ @ @ @ @ s;
+@itemx @ @ @ @ @ @ @ @ double@ @ @ @ @ @ @ @ @ @ @ @ @ d;
+@itemx @ @ @ @ @ @ @ @ awk_array_t@ @ @ @ @ @ @ @ a;
+@itemx @ @ @ @ @ @ @ @ awk_scalar_t@ @ @ @ @ @ @ scl;
+@itemx @ @ @ @ @ @ @ @ awk_value_cookie_t@ vc;
+@itemx @ @ @ @ @} u;
+@itemx @} awk_value_t;
+An ``@command{awk} value.''
+The @code{val_type} member indicates what kind of value the
+@code{union} holds, and each member is of the appropriate type.
+
+@item #define str_value@ @ @ @ @ @ u.s
+@itemx #define num_value@ @ @ @ @ @ u.d
+@itemx #define array_cookie@ @ @ u.a
+@itemx #define scalar_cookie@ @ u.scl
+@itemx #define value_cookie@ @ @ u.vc
+These macros make accessing the fields of the @code{awk_value_t} more
+readable.
+
+@item typedef void *awk_scalar_t;
+Scalars can be represented as an opaque type. These values are obtained
+from @command{gawk} and then passed back into it. This is discussed
+in a general fashion in the text following this list, and in more detail in
+@ref{Symbol table by cookie}.
+
+@item typedef void *awk_value_cookie_t;
+A ``value cookie'' is an opaque type representing a cached value.
+This is also discussed in a general fashion in the text following this list,
+and in more detail in @ref{Cached values}.
+
+@end table
+
+Scalar values in @command{awk} are either numbers or strings. The
+@code{awk_value_t} struct represents values. The @code{val_type} member
+indicates what is in the @code{union}.
+
+Representing numbers is easy---the API uses a C @code{double}. Strings
+require more work. Because @command{gawk} allows embedded @sc{nul} bytes
+in string values, a string must be represented as a pair containing a
+data-pointer and length. This is the @code{awk_string_t} type.
+
+Identifiers (i.e., the names of global variables) can be associated
+with either scalar values or with arrays. In addition, @command{gawk}
+provides true arrays of arrays, where any given array element can
+itself be an array. Discussion of arrays is delayed until
+@ref{Array Manipulation}.
+
+The various macros listed earlier make it easier to use the elements
+of the @code{union} as if they were fields in a @code{struct}; this
+is a common coding practice in C. Such code is easier to write and to
+read, but it remains @emph{your} responsibility to make sure that
+the @code{val_type} member correctly reflects the type of the value in
+the @code{awk_value_t}.
+
+Conceptually, the first three members of the @code{union} (number, string,
+and array) are all that is needed for working with @command{awk} values.
+However, because the API provides routines for accessing and changing
+the value of global scalar variables only by using the variable's name,
+there is a performance penalty: @command{gawk} must find the variable
+each time it is accessed and changed. This turns out to be a real issue,
+not just a theoretical one.
+
+Thus, if you know that your extension will spend considerable time
+reading and/or changing the value of one or more scalar variables, you
+can obtain a @dfn{scalar cookie}@footnote{See
+@uref{http://catb.org/jargon/html/C/cookie.html, the ``cookie'' entry in the Jargon file} for a
+definition of @dfn{cookie}, and @uref{http://catb.org/jargon/html/M/magic-cookie.html,
+the ``magic cookie'' entry in the Jargon file} for a nice example.
+@ifclear FOR_PRINT
+See also the entry for ``Cookie'' in the @ref{Glossary}.
+@end ifclear
+}
+object for that variable, and then use
+the cookie for getting the variable's value or for changing the variable's
+value.
+This is the @code{awk_scalar_t} type and @code{scalar_cookie} macro.
+Given a scalar cookie, @command{gawk} can directly retrieve or
+modify the value, as required, without having to find it first.
+
+The @code{awk_value_cookie_t} type and @code{value_cookie} macro are similar.
+If you know that you wish to
+use the same numeric or string @emph{value} for one or more variables,
+you can create the value once, retaining a @dfn{value cookie} for it,
+and then pass in that value cookie whenever you wish to set the value of a
+variable. This saves both storage space within the running @command{gawk}
+process as well as the time needed to create the value.
+
+@node Memory Allocation Functions
+@subsection Memory Allocation Functions and Convenience Macros
+@cindex allocating memory for extensions
+@cindex extensions, allocating memory
+
+The API provides a number of @dfn{memory allocation} functions for
+allocating memory that can be passed to @command{gawk}, as well as a number of
+convenience macros.
+This @value{SUBSECTION} presents them all as function prototypes, in
+the way that extension code would use them:
+
+@table @code
+@item void *gawk_malloc(size_t size);
+Call the correct version of @code{malloc()} to allocate storage that may
+be passed to @command{gawk}.
+
+@item void *gawk_calloc(size_t nmemb, size_t size);
+Call the correct version of @code{calloc()} to allocate storage that may
+be passed to @command{gawk}.
+
+@item void *gawk_realloc(void *ptr, size_t size);
+Call the correct version of @code{realloc()} to allocate storage that may
+be passed to @command{gawk}.
+
+@item void gawk_free(void *ptr);
+Call the correct version of @code{free()} to release storage that was
+allocated with @code{gawk_malloc()}, @code{gawk_calloc()} or @code{gawk_realloc()}.
+@end table
+
+The API has to provide these functions because it is possible
+for an extension to be compiled and linked against a different
+version of the C library than was used for the @command{gawk}
+executable.@footnote{This is more common on MS-Windows systems, but
+can happen on Unix-like systems as well.} If @command{gawk} were
+to use its version of @code{free()} when the memory came from an
+unrelated version of @code{malloc()}, unexpected behavior would
+likely result.
+
+Two convenience macros may be used for allocating storage
+from @code{gawk_malloc()} and
+@code{gawk_realloc()}. If the allocation fails, they cause @command{gawk}
+to exit with a fatal error message. They should be used as if they were
+procedure calls that do not return a value.
+
+@table @code
+@item #define emalloc(pointer, type, size, message) @dots{}
+The arguments to this macro are as follows:
+
+@c nested table
+@table @code
+@item pointer
+The pointer variable to point at the allocated storage.
+
+@item type
+The type of the pointer variable. This is used to create a cast for
+the call to @code{gawk_malloc()}.
+
+@item size
+The total number of bytes to be allocated.
+
+@item message
+A message to be prefixed to the fatal error message. Typically this is the name
+of the function using the macro.
+@end table
+
+@noindent
+For example, you might allocate a string value like so:
+
+@example
+awk_value_t result;
+char *message;
+const char greet[] = "Don't Panic!";
+
+emalloc(message, char *, sizeof(greet), "myfunc");
+strcpy(message, greet);
+make_malloced_string(message, strlen(message), & result);
+@end example
+
+@item #define erealloc(pointer, type, size, message) @dots{}
+This is like @code{emalloc()}, but it calls @code{gawk_realloc()},
+instead of @code{gawk_malloc()}.
+The arguments are the same as for the @code{emalloc()} macro.
+@end table
+
+@node Constructor Functions
+@subsection Constructor Functions
+
+The API provides a number of @dfn{constructor} functions for creating
+string and numeric values, as well as a number of convenience macros.
+This @value{SUBSECTION} presents them all as function prototypes, in
+the way that extension code would use them:
+
+@table @code
+@item static inline awk_value_t *
+@itemx make_const_string(const char *string, size_t length, awk_value_t *result)
+This function creates a string value in the @code{awk_value_t} variable
+pointed to by @code{result}. It expects @code{string} to be a C string constant
+(or other string data), and automatically creates a @emph{copy} of the data
+for storage in @code{result}. It returns @code{result}.
+
+@item static inline awk_value_t *
+@itemx make_malloced_string(const char *string, size_t length, awk_value_t *result)
+This function creates a string value in the @code{awk_value_t} variable
+pointed to by @code{result}. It expects @code{string} to be a @samp{char *}
+value pointing to data previously obtained from @code{gawk_malloc()}, @code{gawk_calloc()} or @code{gawk_realloc()}. The idea here
+is that the data is passed directly to @command{gawk}, which assumes
+responsibility for it. It returns @code{result}.
+
+@item static inline awk_value_t *
+@itemx make_null_string(awk_value_t *result)
+This specialized function creates a null string (the ``undefined'' value)
+in the @code{awk_value_t} variable pointed to by @code{result}.
+It returns @code{result}.
+
+@item static inline awk_value_t *
+@itemx make_number(double num, awk_value_t *result)
+This function simply creates a numeric value in the @code{awk_value_t} variable
+pointed to by @code{result}.
+@end table
+
+@node Registration Functions
+@subsection Registration Functions
+@cindex register extension
+@cindex extension registration
+
+This @value{SECTION} describes the API functions for
+registering parts of your extension with @command{gawk}.
+
+@menu
+* Extension Functions:: Registering extension functions.
+* Exit Callback Functions:: Registering an exit callback.
+* Extension Version String:: Registering a version string.
+* Input Parsers:: Registering an input parser.
+* Output Wrappers:: Registering an output wrapper.
+* Two-way processors:: Registering a two-way processor.
+@end menu
+
+@node Extension Functions
+@subsubsection Registering An Extension Function
+
+Extension functions are described by the following record:
+
+@example
+typedef struct awk_ext_func @{
+@ @ @ @ const char *name;
+@ @ @ @ awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
+@ @ @ @ size_t num_expected_args;
+@} awk_ext_func_t;
+@end example
+
+The fields are:
+
+@table @code
+@item const char *name;
+The name of the new function.
+@command{awk} level code calls the function by this name.
+This is a regular C string.
+
+Function names must obey the rules for @command{awk}
+identifiers. That is, they must begin with either an English letter
+or an underscore, which may be followed by any number of
+letters, digits, and underscores.
+Letter case in function names is significant.
+
+@item awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
+This is a pointer to the C function that provides the extension's
+functionality.
+The function must fill in @code{*result} with either a number
+or a string. @command{gawk} takes ownership of any string memory.
+As mentioned earlier, string memory @strong{must} come from one of
+@code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}.
+
+The @code{num_actual_args} argument tells the C function how many
+actual parameters were passed from the calling @command{awk} code.
+
+The function must return the value of @code{result}.
+This is for the convenience of the calling code inside @command{gawk}.
+
+@item size_t num_expected_args;
+This is the number of arguments the function expects to receive.
+Each extension function may decide what to do if the number of
+arguments isn't what it expected. As with real @command{awk} functions, it
+is likely OK to ignore extra arguments.
+@end table
+
+Once you have a record representing your extension function, you register
+it with @command{gawk} using this API function:
+
+@table @code
+@item awk_bool_t add_ext_func(const char *namespace, const awk_ext_func_t *func);
+This function returns true upon success, false otherwise.
+The @code{namespace} parameter is currently not used; you should pass in an
+empty string (@code{""}). The @code{func} pointer is the address of a
+@code{struct} representing your function, as just described.
+@end table
+
+@node Exit Callback Functions
+@subsubsection Registering An Exit Callback Function
+
+An @dfn{exit callback} function is a function that
+@command{gawk} calls before it exits.
+Such functions are useful if you have general ``cleanup'' tasks
+that should be performed in your extension (such as closing database
+connections or other resource deallocations).
+You can register such
+a function with @command{gawk} using the following function:
+
+@table @code
+@item void awk_atexit(void (*funcp)(void *data, int exit_status),
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ void *arg0);
+The parameters are:
+
+@c nested table
+@table @code
+@item funcp
+A pointer to the function to be called before @command{gawk} exits. The @code{data}
+parameter will be the original value of @code{arg0}.
+The @code{exit_status} parameter is the exit status value that
+@command{gawk} intends to pass to the @code{exit()} system call.
+
+@item arg0
+A pointer to private data which @command{gawk} saves in order to pass to
+the function pointed to by @code{funcp}.
+@end table
+@end table
+
+Exit callback functions are called in last-in-first-out (LIFO)
+order---that is, in the reverse order in which they are registered with
+@command{gawk}.
+
+@node Extension Version String
+@subsubsection Registering An Extension Version String
+
+You can register a version string which indicates the name and
+version of your extension, with @command{gawk}, as follows:
+
+@table @code
+@item void register_ext_version(const char *version);
+Register the string pointed to by @code{version} with @command{gawk}.
+Note that @command{gawk} does @emph{not} copy the @code{version} string, so
+it should not be changed.
+@end table
+
+@command{gawk} prints all registered extension version strings when it
+is invoked with the @option{--version} option.
+
+@node Input Parsers
+@subsubsection Customized Input Parsers
+@cindex customized input parser
+
+By default, @command{gawk} reads text files as its input. It uses the value
+of @code{RS} to find the end of the record, and then uses @code{FS}
+(or @code{FIELDWIDTHS} or @code{FPAT}) to split it into fields (@pxref{Reading Files}).
+Additionally, it sets the value of @code{RT} (@pxref{Built-in Variables}).
+
+If you want, you can provide your own custom input parser. An input
+parser's job is to return a record to the @command{gawk} record processing
+code, along with indicators for the value and length of the data to be
+used for @code{RT}, if any.
+
+To provide an input parser, you must first provide two functions
+(where @var{XXX} is a prefix name for your extension):
+
+@table @code
+@item awk_bool_t @var{XXX}_can_take_file(const awk_input_buf_t *iobuf);
+This function examines the information available in @code{iobuf}
+(which we discuss shortly). Based on the information there, it
+decides if the input parser should be used for this file.
+If so, it should return true. Otherwise, it should return false.
+It should not change any state (variable values, etc.) within @command{gawk}.
+
+@item awk_bool_t @var{XXX}_take_control_of(awk_input_buf_t *iobuf);
+When @command{gawk} decides to hand control of the file over to the
+input parser, it calls this function. This function in turn must fill
+in certain fields in the @code{awk_input_buf_t} structure, and ensure
+that certain conditions are true. It should then return true. If an
+error of some kind occurs, it should not fill in any fields, and should
+return false; then @command{gawk} will not use the input parser.
+The details are presented shortly.
+@end table
+
+Your extension should package these functions inside an
+@code{awk_input_parser_t}, which looks like this:
+
+@example
+typedef struct awk_input_parser @{
+ const char *name; /* name of parser */
+ awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
+ awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
+ awk_const struct awk_input_parser *awk_const next; /* for gawk */
+@} awk_input_parser_t;
+@end example
+
+The fields are:
+
+@table @code
+@item const char *name;
+The name of the input parser. This is a regular C string.
+
+@item awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
+A pointer to your @code{@var{XXX}_can_take_file()} function.
+
+@item awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
+A pointer to your @code{@var{XXX}_take_control_of()} function.
+
+@item awk_const struct input_parser *awk_const next;
+This is for use by @command{gawk};
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
+@end table
+
+The steps are as follows:
+
+@enumerate
+@item
+Create a @code{static awk_input_parser_t} variable and initialize it
+appropriately.
+
+@item
+When your extension is loaded, register your input parser with
+@command{gawk} using the @code{register_input_parser()} API function
+(described next).
+@end enumerate
+
+An @code{awk_input_buf_t} looks like this:
+
+@example
+typedef struct awk_input @{
+ const char *name; /* filename */
+ int fd; /* file descriptor */
+#define INVALID_HANDLE (-1)
+ void *opaque; /* private data for input parsers */
+ int (*get_record)(char **out, struct awk_input *iobuf,
+ int *errcode, char **rt_start, size_t *rt_len);
+ ssize_t (*read_func)();
+ void (*close_func)(struct awk_input *iobuf);
+ struct stat sbuf; /* stat buf */
+@} awk_input_buf_t;
+@end example
+
+The fields can be divided into two categories: those for use (initially,
+at least) by @code{@var{XXX}_can_take_file()}, and those for use by
+@code{@var{XXX}_take_control_of()}. The first group of fields and their uses
+are as follows:
+
+@table @code
+@item const char *name;
+The name of the file.
+
+@item int fd;
+A file descriptor for the file. If @command{gawk} was able to
+open the file, then @code{fd} will @emph{not} be equal to
+@code{INVALID_HANDLE}. Otherwise, it will.
+
+@item struct stat sbuf;
+If the file descriptor is valid, then @command{gawk} will have filled
+in this structure via a call to the @code{fstat()} system call.
+@end table
+
+The @code{@var{XXX}_can_take_file()} function should examine these
+fields and decide if the input parser should be used for the file.
+The decision can be made based upon @command{gawk} state (the value
+of a variable defined previously by the extension and set by
+@command{awk} code), the name of the
+file, whether or not the file descriptor is valid, the information
+in the @code{struct stat}, or any combination of these factors.
+
+Once @code{@var{XXX}_can_take_file()} has returned true, and
+@command{gawk} has decided to use your input parser, it calls
+@code{@var{XXX}_take_control_of()}. That function then fills one of
+either the @code{get_record} field or the @code{read_func} field in
+the @code{awk_input_buf_t}. It must also ensure that @code{fd} is @emph{not}
+set to @code{INVALID_HANDLE}. The following list describes the fields that
+may be filled by @code{@var{XXX}_take_control_of()}:
+
+@table @code
+@item void *opaque;
+This is used to hold any state information needed by the input parser
+for this file. It is ``opaque'' to @command{gawk}. The input parser
+is not required to use this pointer.
+
+@item int@ (*get_record)(char@ **out,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct@ awk_input *iobuf,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ int *errcode,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ char **rt_start,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ size_t *rt_len);
+This function pointer should point to a function that creates the input
+records. Said function is the core of the input parser. Its behavior
+is described in the text following this list.
+
+@item ssize_t (*read_func)();
+This function pointer should point to function that has the
+same behavior as the standard POSIX @code{read()} system call.
+It is an alternative to the @code{get_record} pointer. Its behavior
+is also described in the text following this list.
+
+@item void (*close_func)(struct awk_input *iobuf);
+This function pointer should point to a function that does
+the ``tear down.'' It should release any resources allocated by
+@code{@var{XXX}_take_control_of()}. It may also close the file. If it
+does so, it should set the @code{fd} field to @code{INVALID_HANDLE}.
+
+If @code{fd} is still not @code{INVALID_HANDLE} after the call to this
+function, @command{gawk} calls the regular @code{close()} system call.
+
+Having a ``tear down'' function is optional. If your input parser does
+not need it, do not set this field. Then, @command{gawk} calls the
+regular @code{close()} system call on the file descriptor, so it should
+be valid.
+@end table
+
+The @code{@var{XXX}_get_record()} function does the work of creating
+input records. The parameters are as follows:
+
+@table @code
+@item char **out
+This is a pointer to a @code{char *} variable which is set to point
+to the record. @command{gawk} makes its own copy of the data, so
+the extension must manage this storage.
+
+@item struct awk_input *iobuf
+This is the @code{awk_input_buf_t} for the file. The fields should be
+used for reading data (@code{fd}) and for managing private state
+(@code{opaque}), if any.
+
+@item int *errcode
+If an error occurs, @code{*errcode} should be set to an appropriate
+code from @code{<errno.h>}.
+
+@item char **rt_start
+@itemx size_t *rt_len
+If the concept of a ``record terminator'' makes sense, then
+@code{*rt_start} should be set to point to the data to be used for
+@code{RT}, and @code{*rt_len} should be set to the length of the
+data. Otherwise, @code{*rt_len} should be set to zero.
+@code{gawk} makes its own copy of this data, so the
+extension must manage this storage.
+@end table
+
+The return value is the length of the buffer pointed to by
+@code{*out}, or @code{EOF} if end-of-file was reached or an
+error occurred.
+
+It is guaranteed that @code{errcode} is a valid pointer, so there is no
+need to test for a @code{NULL} value. @command{gawk} sets @code{*errcode}
+to zero, so there is no need to set it unless an error occurs.
+
+If an error does occur, the function should return @code{EOF} and set
+@code{*errcode} to a value greater than zero. In that case, if @code{*errcode}
+does not equal zero, @command{gawk} automatically updates
+the @code{ERRNO} variable based on the value of @code{*errcode}.
+(In general, setting @samp{*errcode = errno} should do the right thing.)
+
+As an alternative to supplying a function that returns an input record,
+you may instead supply a function that simply reads bytes, and let
+@command{gawk} parse the data into records. If you do so, the data
+should be returned in the multibyte encoding of the current locale.
+Such a function should follow the same behavior as the @code{read()}
+system call, and you fill in the @code{read_func} pointer with its
+address in the @code{awk_input_buf_t} structure.
+
+By default, @command{gawk} sets the @code{read_func} pointer to
+point to the @code{read()} system call. So your extension need not
+set this field explicitly.
+
+@quotation NOTE
+You must choose one method or the other: either a function that
+returns a record, or one that returns raw data. In particular,
+if you supply a function to get a record, @command{gawk} will
+call it, and never call the raw read function.
+@end quotation
+
+@command{gawk} ships with a sample extension that reads directories,
+returning records for each entry in the directory (@pxref{Extension
+Sample Readdir}). You may wish to use that code as a guide for writing
+your own input parser.
+
+When writing an input parser, you should think about (and document)
+how it is expected to interact with @command{awk} code. You may want
+it to always be called, and take effect as appropriate (as the
+@code{readdir} extension does). Or you may want it to take effect
+based upon the value of an @code{awk} variable, as the XML extension
+from the @code{gawkextlib} project does (@pxref{gawkextlib}).
+In the latter case, code in a @code{BEGINFILE} section
+can look at @code{FILENAME} and @code{ERRNO} to decide whether or
+not to activate an input parser (@pxref{BEGINFILE/ENDFILE}).
+
+You register your input parser with the following function:
+
+@table @code
+@item void register_input_parser(awk_input_parser_t *input_parser);
+Register the input parser pointed to by @code{input_parser} with
+@command{gawk}.
+@end table
+
+@node Output Wrappers
+@subsubsection Customized Output Wrappers
+@cindex customized output wrapper
+
+@cindex output wrapper
+An @dfn{output wrapper} is the mirror image of an input parser.
+It allows an extension to take over the output to a file opened
+with the @samp{>} or @samp{>>} I/O redirection operators (@pxref{Redirection}).
+
+The output wrapper is very similar to the input parser structure:
+
+@example
+typedef struct awk_output_wrapper @{
+ const char *name; /* name of the wrapper */
+ awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
+ awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
+ awk_const struct awk_output_wrapper *awk_const next; /* for gawk */
+@} awk_output_wrapper_t;
+@end example
+
+The members are as follows:
+
+@table @code
+@item const char *name;
+This is the name of the output wrapper.
+
+@item awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
+This points to a function that examines the information in
+the @code{awk_output_buf_t} structure pointed to by @code{outbuf}.
+It should return true if the output wrapper wants to take over the
+file, and false otherwise. It should not change any state (variable
+values, etc.) within @command{gawk}.
+
+@item awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
+The function pointed to by this field is called when @command{gawk}
+decides to let the output wrapper take control of the file. It should
+fill in appropriate members of the @code{awk_output_buf_t} structure,
+as described next, and return true if successful, false otherwise.
+
+@item awk_const struct output_wrapper *awk_const next;
+This is for use by @command{gawk};
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
+@end table
+
+The @code{awk_output_buf_t} structure looks like this:
+
+@example
+typedef struct awk_output_buf @{
+ const char *name; /* name of output file */
+ const char *mode; /* mode argument to fopen */
+ FILE *fp; /* stdio file pointer */
+ awk_bool_t redirected; /* true if a wrapper is active */
+ void *opaque; /* for use by output wrapper */
+ size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
+ FILE *fp, void *opaque);
+ int (*gawk_fflush)(FILE *fp, void *opaque);
+ int (*gawk_ferror)(FILE *fp, void *opaque);
+ int (*gawk_fclose)(FILE *fp, void *opaque);
+@} awk_output_buf_t;
+@end example
+
+Here too, your extension will define @code{@var{XXX}_can_take_file()}
+and @code{@var{XXX}_take_control_of()} functions that examine and update
+data members in the @code{awk_output_buf_t}.
+The data members are as follows:
+
+@table @code
+@item const char *name;
+The name of the output file.
+
+@item const char *mode;
+The mode string (as would be used in the second argument to @code{fopen()})
+with which the file was opened.
+
+@item FILE *fp;
+The @code{FILE} pointer from @code{<stdio.h>}. @command{gawk} opens the file
+before attempting to find an output wrapper.
+
+@item awk_bool_t redirected;
+This field must be set to true by the @code{@var{XXX}_take_control_of()} function.
+
+@item void *opaque;
+This pointer is opaque to @command{gawk}. The extension should use it to store
+a pointer to any private data associated with the file.
+
+@item size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ FILE *fp, void *opaque);
+@itemx int (*gawk_fflush)(FILE *fp, void *opaque);
+@itemx int (*gawk_ferror)(FILE *fp, void *opaque);
+@itemx int (*gawk_fclose)(FILE *fp, void *opaque);
+These pointers should be set to point to functions that perform
+the equivalent function as the @code{<stdio.h>} functions do, if appropriate.
+@command{gawk} uses these function pointers for all output.
+@command{gawk} initializes the pointers to point to internal, ``pass through''
+functions that just call the regular @code{<stdio.h>} functions, so an
+extension only needs to redefine those functions that are appropriate for
+what it does.
+@end table
+
+The @code{@var{XXX}_can_take_file()} function should make a decision based
+upon the @code{name} and @code{mode} fields, and any additional state
+(such as @command{awk} variable values) that is appropriate.
+
+When @command{gawk} calls @code{@var{XXX}_take_control_of()}, that function should fill
+in the other fields, as appropriate, except for @code{fp}, which it should just
+use normally.
+
+You register your output wrapper with the following function:
+
+@table @code
+@item void register_output_wrapper(awk_output_wrapper_t *output_wrapper);
+Register the output wrapper pointed to by @code{output_wrapper} with
+@command{gawk}.
+@end table
+
+@node Two-way processors
+@subsubsection Customized Two-way Processors
+@cindex customized two-way processor
+
+A @dfn{two-way processor} combines an input parser and an output wrapper for
+two-way I/O with the @samp{|&} operator (@pxref{Redirection}). It makes identical
+use of the @code{awk_input_parser_t} and @code{awk_output_buf_t} structures
+as described earlier.
+
+A two-way processor is represented by the following structure:
+
+@example
+typedef struct awk_two_way_processor @{
+ const char *name; /* name of the two-way processor */
+ awk_bool_t (*can_take_two_way)(const char *name);
+ awk_bool_t (*take_control_of)(const char *name,
+ awk_input_buf_t *inbuf,
+ awk_output_buf_t *outbuf);
+ awk_const struct awk_two_way_processor *awk_const next; /* for gawk */
+@} awk_two_way_processor_t;
+@end example
+
+The fields are as follows:
+
+@table @code
+@item const char *name;
+The name of the two-way processor.
+
+@item awk_bool_t (*can_take_two_way)(const char *name);
+This function returns true if it wants to take over two-way I/O for this @value{FN}.
+It should not change any state (variable
+values, etc.) within @command{gawk}.
+
+@item awk_bool_t (*take_control_of)(const char *name,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_input_buf_t *inbuf,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_output_buf_t *outbuf);
+This function should fill in the @code{awk_input_buf_t} and
+@code{awk_outut_buf_t} structures pointed to by @code{inbuf} and
+@code{outbuf}, respectively. These structures were described earlier.
+
+@item awk_const struct two_way_processor *awk_const next;
+This is for use by @command{gawk};
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
+@end table
+
+As with the input parser and output processor, you provide
+``yes I can take this'' and ``take over for this'' functions,
+@code{@var{XXX}_can_take_two_way()} and @code{@var{XXX}_take_control_of()}.
+
+You register your two-way processor with the following function:
+
+@table @code
+@item void register_two_way_processor(awk_two_way_processor_t *two_way_processor);
+Register the two-way processor pointed to by @code{two_way_processor} with
+@command{gawk}.
+@end table
+
+@node Printing Messages
+@subsection Printing Messages
+@cindex printing messages from extensions
+@cindex messages from extensions
+
+You can print different kinds of warning messages from your
+extension, as described here. Note that for these functions,
+you must pass in the extension id received from @command{gawk}
+when the extension was loaded:@footnote{Because the API uses only ISO C 90
+features, it cannot make use of the ISO C 99 variadic macro feature to hide
+that parameter. More's the pity.}
+
+@table @code
+@item void fatal(awk_ext_id_t id, const char *format, ...);
+Print a message and then cause @command{gawk} to exit immediately.
+
+@item void warning(awk_ext_id_t id, const char *format, ...);
+Print a warning message.
+
+@item void lintwarn(awk_ext_id_t id, const char *format, ...);
+Print a ``lint warning.'' Normally this is the same as printing a
+warning message, but if @command{gawk} was invoked with @samp{--lint=fatal},
+then lint warnings become fatal error messages.
+@end table
+
+All of these functions are otherwise like the C @code{printf()}
+family of functions, where the @code{format} parameter is a string
+with literal characters and formatting codes intermixed.
+
+@node Updating @code{ERRNO}
+@subsection Updating @code{ERRNO}
+
+The following functions allow you to update the @code{ERRNO}
+variable:
+
+@table @code
+@item void update_ERRNO_int(int errno_val);
+Set @code{ERRNO} to the string equivalent of the error code
+in @code{errno_val}. The value should be one of the defined
+error codes in @code{<errno.h>}, and @command{gawk} turns it
+into a (possibly translated) string using the C @code{strerror()} function.
+
+@item void update_ERRNO_string(const char *string);
+Set @code{ERRNO} directly to the string value of @code{ERRNO}.
+@command{gawk} makes a copy of the value of @code{string}.
+
+@item void unset_ERRNO(void);
+Unset @code{ERRNO}.
+@end table
+
+@node Requesting Values
+@subsection Requesting Values
+
+All of the functions that return values from @command{gawk}
+work in the same way. You pass in an @code{awk_valtype_t} value
+to indicate what kind of value you expect. If the actual value
+matches what you requested, the function returns true and fills
+in the @code{awk_value_t} result.
+Otherwise, the function returns false, and the @code{val_type}
+member indicates the type of the actual value. You may then
+print an error message, or reissue the request for the actual
+value type, as appropriate. This behavior is summarized in
+@ref{table-value-types-returned}.
+
+@float Table,table-value-types-returned
+@caption{API value types returned}
+@docbook
+<informaltable>
+<tgroup cols="6">
+ <colspec colwidth="16.6*"/>
+ <colspec colwidth="16.6*"/>
+ <colspec colwidth="19.8*" colname="c3"/>
+ <colspec colwidth="15*" colname="c4"/>
+ <colspec colwidth="15*" colname="c5"/>
+ <colspec colwidth="16.6*" colname="c6"/>
+ <spanspec spanname="hspan" namest="c3" nameend="c6" align="center"/>
+ <thead>
+ <row><entry></entry><entry spanname="hspan"><para>Type of Actual Value</para></entry></row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><para>String</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>Array</para></entry>
+ <entry><para>Undefined</para></entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry></entry>
+ <entry><para><emphasis role="bold">String</emphasis></para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry><para><emphasis role="bold">Number</emphasis></para></entry>
+ <entry><para>Number if can be converted, else false</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry><para><emphasis role="bold">Type</emphasis></para></entry>
+ <entry><para><emphasis role="bold">Array</emphasis></para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>Array</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry><para><emphasis role="bold">Requested</emphasis></para></entry>
+ <entry><para><emphasis role="bold">Scalar</emphasis></para></entry>
+ <entry><para>Scalar</para></entry>
+ <entry><para>Scalar</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry><para><emphasis role="bold">Undefined</emphasis></para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>Array</para></entry>
+ <entry><para>Undefined</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry><para><emphasis role="bold">Value Cookie</emphasis></para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para>
+ </entry><entry><para>false</para></entry>
+ </row>
+ </tbody>
+</tgroup>
+</informaltable>
+@end docbook
+
+@ifnotplaintext
+@ifnotdocbook
+@multitable @columnfractions .50 .50
+@headitem @tab Type of Actual Value
+@end multitable
+@c 10/2014: Thanks to Karl Berry for this bit to reduce the space:
+@tex
+\vglue-1.1\baselineskip
+@end tex
+@multitable @columnfractions .166 .166 .198 .15 .15 .166
+@headitem @tab @tab String @tab Number @tab Array @tab Undefined
+@item @tab @b{String} @tab String @tab String @tab false @tab false
+@item @tab @b{Number} @tab Number if can be converted, else false @tab Number @tab false @tab false
+@item @b{Type} @tab @b{Array} @tab false @tab false @tab Array @tab false
+@item @b{Requested} @tab @b{Scalar} @tab Scalar @tab Scalar @tab false @tab false
+@item @tab @b{Undefined} @tab String @tab Number @tab Array @tab Undefined
+@item @tab @b{Value Cookie} @tab false @tab false @tab false @tab false
+@end multitable
+@end ifnotdocbook
+@end ifnotplaintext
+@ifplaintext
+@example
+ +-------------------------------------------------+
+ | Type of Actual Value: |
+ +------------+------------+-----------+-----------+
+ | String | Number | Array | Undefined |
++-----------+-----------+------------+------------+-----------+-----------+
+| | String | String | String | false | false |
+| |-----------+------------+------------+-----------+-----------+
+| | Number | Number if | Number | false | false |
+| | | can be | | | |
+| | | converted, | | | |
+| | | else false | | | |
+| |-----------+------------+------------+-----------+-----------+
+| Type | Array | false | false | Array | false |
+| Requested |-----------+------------+------------+-----------+-----------+
+| | Scalar | Scalar | Scalar | false | false |
+| |-----------+------------+------------+-----------+-----------+
+| | Undefined | String | Number | Array | Undefined |
+| |-----------+------------+------------+-----------+-----------+
+| | Value | false | false | false | false |
+| | Cookie | | | | |
++-----------+-----------+------------+------------+-----------+-----------+
+@end example
+@end ifplaintext
+@end float
+
+@node Accessing Parameters
+@subsection Accessing and Updating Parameters
+
+Two functions give you access to the arguments (parameters)
+passed to your extension function. They are:
+
+@table @code
+@item awk_bool_t get_argument(size_t count,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_valtype_t wanted,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result);
+Fill in the @code{awk_value_t} structure pointed to by @code{result}
+with the @code{count}'th argument. Return true if the actual
+type matches @code{wanted}, false otherwise. In the latter
+case, @code{result@w{->}val_type} indicates the actual type
+(@pxref{table-value-types-returned}). Counts are zero based---the first
+argument is numbered zero, the second one, and so on. @code{wanted}
+indicates the type of value expected.
+
+@item awk_bool_t set_argument(size_t count, awk_array_t array);
+Convert a parameter that was undefined into an array; this provides
+call-by-reference for arrays. Return false if @code{count} is too big,
+or if the argument's type is not undefined. @DBXREF{Array Manipulation}
+for more information on creating arrays.
+@end table
+
+@node Symbol Table Access
+@subsection Symbol Table Access
+@cindex accessing global variables from extensions
+
+Two sets of routines provide access to global variables, and one set
+allows you to create and release cached values.
+
+@menu
+* Symbol table by name:: Accessing variables by name.
+* Symbol table by cookie:: Accessing variables by ``cookie''.
+* Cached values:: Creating and using cached values.
+@end menu
+
+@node Symbol table by name
+@subsubsection Variable Access and Update by Name
+
+The following routines provide the ability to access and update
+global @command{awk}-level variables by name. In compiler terminology,
+identifiers of different kinds are termed @dfn{symbols}, thus the ``sym''
+in the routines' names. The data structure which stores information
+about symbols is termed a @dfn{symbol table}.
+
+@table @code
+@item awk_bool_t sym_lookup(const char *name,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_valtype_t wanted,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result);
+Fill in the @code{awk_value_t} structure pointed to by @code{result}
+with the value of the variable named by the string @code{name}, which is
+a regular C string. @code{wanted} indicates the type of value expected.
+Return true if the actual type matches @code{wanted}, false otherwise.
+In the latter case, @code{result->val_type} indicates the actual type
+(@pxref{table-value-types-returned}).
+
+@item awk_bool_t sym_update(const char *name, awk_value_t *value);
+Update the variable named by the string @code{name}, which is a regular
+C string. The variable is added to @command{gawk}'s symbol table
+if it is not there. Return true if everything worked, false otherwise.
+
+Changing types (scalar to array or vice versa) of an existing variable
+is @emph{not} allowed, nor may this routine be used to update an array.
+This routine cannot be used to update any of the predefined
+variables (such as @code{ARGC} or @code{NF}).
+@end table
+
+An extension can look up the value of @command{gawk}'s special variables.
+However, with the exception of the @code{PROCINFO} array, an extension
+cannot change any of those variables.
+
+@quotation CAUTION
+It is possible for the lookup of @code{PROCINFO} to fail. This happens if
+the @command{awk} program being run does not reference @code{PROCINFO};
+in this case, @command{gawk} doesn't bother to create the array and
+populate it.
+@end quotation
+
+@node Symbol table by cookie
+@subsubsection Variable Access and Update by Cookie
+
+A @dfn{scalar cookie} is an opaque handle that provides access
+to a global variable or array. It is an optimization that
+avoids looking up variables in @command{gawk}'s symbol table every time
+access is needed. This was discussed earlier in @ref{General Data Types}.
+
+The following functions let you work with scalar cookies:
+
+@table @code
+@item awk_bool_t sym_lookup_scalar(awk_scalar_t cookie,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_valtype_t wanted,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result);
+Retrieve the current value of a scalar cookie.
+Once you have obtained a scalar cookie using @code{sym_lookup()}, you can
+use this function to get its value more efficiently.
+Return false if the value cannot be retrieved.
+
+@item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);
+Update the value associated with a scalar cookie. Return false if
+the new value is not of type @code{AWK_STRING} or @code{AWK_NUMBER}.
+Here too, the predefined variables may not be updated.
+@end table
+
+It is not obvious at first glance how to work with scalar cookies or
+what their @i{raison d'@^etre} really is. In theory, the @code{sym_lookup()}
+and @code{sym_update()} routines are all you really need to work with
+variables. For example, you might have code that looks up the value of
+a variable, evaluates a condition, and then possibly changes the value
+of the variable based on the result of that evaluation, like so:
+
+@example
+/* do_magic --- do something really great */
+
+static awk_value_t *
+do_magic(int nargs, awk_value_t *result)
+@{
+ awk_value_t value;
+
+ if ( sym_lookup("MAGIC_VAR", AWK_NUMBER, & value)
+ && some_condition(value.num_value)) @{
+ value.num_value += 42;
+ sym_update("MAGIC_VAR", & value);
+ @}
+
+ return make_number(0.0, result);
+@}
+@end example
+
+@noindent
+This code looks (and is) simple and straightforward. So what's the problem?
+
+Well, consider what happens if @command{awk}-level code associated
+with your extension calls the @code{magic()} function (implemented in
+C by @code{do_magic()}), once per record, while processing hundreds
+of thousands or millions of records. The @code{MAGIC_VAR} variable is
+looked up in the symbol table once or twice per function call!
+
+The symbol table lookup is really pure overhead; it is considerably
+more efficient to get a cookie that represents the variable, and use
+that to get the variable's value and update it as needed.@footnote{The
+difference is measurable and quite real. Trust us.}
+
+Thus, the way to use cookies is as follows. First, install
+your extension's variable in @command{gawk}'s symbol table using
+@code{sym_update()}, as usual. Then get a scalar cookie for the variable
+using @code{sym_lookup()}:
+
+@example
+static awk_scalar_t magic_var_cookie; /* cookie for MAGIC_VAR */
+
+static void
+my_extension_init()
+@{
+ awk_value_t value;
+
+ /* install initial value */
+ sym_update("MAGIC_VAR", make_number(42.0, & value));
+
+ /* get the cookie */
+ sym_lookup("MAGIC_VAR", AWK_SCALAR, & value);
+
+ /* save the cookie */
+ magic_var_cookie = value.scalar_cookie;
+ @dots{}
+@}
+@end example
+
+Next, use the routines in this section for retrieving and updating
+the value through the cookie. Thus, @code{do_magic()} now becomes
+something like this:
+
+@example
+/* do_magic --- do something really great */
+
+static awk_value_t *
+do_magic(int nargs, awk_value_t *result)
+@{
+ awk_value_t value;
+
+ if ( sym_lookup_scalar(magic_var_cookie, AWK_NUMBER, & value)
+ && some_condition(value.num_value)) @{
+ value.num_value += 42;
+ sym_update_scalar(magic_var_cookie, & value);
+ @}
+ @dots{}
+
+ return make_number(0.0, result);
+@}
+@end example
+
+@quotation NOTE
+The previous code omitted error checking for
+presentation purposes. Your extension code should be more robust
+and carefully check the return values from the API functions.
+@end quotation
+
+@node Cached values
+@subsubsection Creating and Using Cached Values
+
+The routines in this section allow you to create and release
+cached values. As with scalar cookies, in theory, cached values
+are not necessary. You can create numbers and strings using
+the functions in @ref{Constructor Functions}. You can then
+assign those values to variables using @code{sym_update()}
+or @code{sym_update_scalar()}, as you like.
+
+However, you can understand the point of cached values if you remember that
+@emph{every} string value's storage @emph{must} come from @code{gawk_malloc()},
+@code{gawk_calloc()}, or @code{gawk_realloc()}.
+If you have 20 variables, all of which have the same string value, you
+must create 20 identical copies of the string.@footnote{Numeric values
+are clearly less problematic, requiring only a C @code{double} to store.}
+
+It is clearly more efficient, if possible, to create a value once, and
+then tell @command{gawk} to reuse the value for multiple variables. That
+is what the routines in this section let you do. The functions are as follows:
+
+@table @code
+@item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);
+Create a cached string or numeric value from @code{value} for
+efficient later assignment. Only values of type @code{AWK_NUMBER}
+and @code{AWK_STRING} are allowed. Any other type is rejected.
+@code{AWK_UNDEFINED} could be allowed, but doing so would result in
+inferior performance.
+
+@item awk_bool_t release_value(awk_value_cookie_t vc);
+Release the memory associated with a value cookie obtained
+from @code{create_value()}.
+@end table
+
+You use value cookies in a fashion similar to the way you use scalar cookies.
+In the extension initialization routine, you create the value cookie:
+
+@example
+static awk_value_cookie_t answer_cookie; /* static value cookie */
+
+static void
+my_extension_init()
+@{
+ awk_value_t value;
+ char *long_string;
+ size_t long_string_len;
+
+ /* code from earlier */
+ @dots{}
+ /* @dots{} fill in long_string and long_string_len @dots{} */
+ make_malloced_string(long_string, long_string_len, & value);
+ create_value(& value, & answer_cookie); /* create cookie */
+ @dots{}
+@}
+@end example
+
+Once the value is created, you can use it as the value of any number
+of variables:
+
+@example
+static awk_value_t *
+do_magic(int nargs, awk_value_t *result)
+@{
+ awk_value_t new_value;
+
+ @dots{} /* as earlier */
+
+ value.val_type = AWK_VALUE_COOKIE;
+ value.value_cookie = answer_cookie;
+ sym_update("VAR1", & value);
+ sym_update("VAR2", & value);
+ @dots{}
+ sym_update("VAR100", & value);
+ @dots{}
+@}
+@end example
+
+@noindent
+Using value cookies in this way saves considerable storage, as all of
+@code{VAR1} through @code{VAR100} share the same value.
+
+You might be wondering, ``Is this sharing problematic?
+What happens if @command{awk} code assigns a new value to @code{VAR1},
+are all the others changed too?''
+
+That's a great question. The answer is that no, it's not a problem.
+Internally, @command{gawk} uses @dfn{reference-counted strings}. This means
+that many variables can share the same string value, and @command{gawk}
+keeps track of the usage. When a variable's value changes, @command{gawk}
+simply decrements the reference count on the old value and updates
+the variable to use the new value.
+
+Finally, as part of your cleanup action (@pxref{Exit Callback Functions})
+you should release any cached values that you created, using
+@code{release_value()}.
+
+@node Array Manipulation
+@subsection Array Manipulation
+@cindex array manipulation in extensions
+
+The primary data structure@footnote{OK, the only data structure.} in @command{awk}
+is the associative array (@pxref{Arrays}).
+Extensions need to be able to manipulate @command{awk} arrays.
+The API provides a number of data structures for working with arrays,
+functions for working with individual elements, and functions for
+working with arrays as a whole. This includes the ability to
+``flatten'' an array so that it is easy for C code to traverse
+every element in an array. The array data structures integrate
+nicely with the data structures for values to make it easy to
+both work with and create true arrays of arrays (@pxref{General Data Types}).
+
+@menu
+* Array Data Types:: Data types for working with arrays.
+* Array Functions:: Functions for working with arrays.
+* Flattening Arrays:: How to flatten arrays.
+* Creating Arrays:: How to create and populate arrays.
+@end menu
+
+@node Array Data Types
+@subsubsection Array Data Types
+
+The data types associated with arrays are as follows:
+
+@table @code
+@item typedef void *awk_array_t;
+If you request the value of an array variable, you get back an
+@code{awk_array_t} value. This value is opaque@footnote{It is also
+a ``cookie,'' but the @command{gawk} developers did not wish to overuse this
+term.} to the extension; it uniquely identifies the array but can
+only be used by passing it into API functions or receiving it from API
+functions. This is very similar to way @samp{FILE *} values are used
+with the @code{<stdio.h>} library routines.
+
+@item typedef struct awk_element @{
+@itemx @ @ @ @ /* convenience linked list pointer, not used by gawk */
+@itemx @ @ @ @ struct awk_element *next;
+@itemx @ @ @ @ enum @{
+@itemx @ @ @ @ @ @ @ @ AWK_ELEMENT_DEFAULT = 0,@ @ /* set by gawk */
+@itemx @ @ @ @ @ @ @ @ AWK_ELEMENT_DELETE = 1@ @ @ @ /* set by extension */
+@itemx @ @ @ @ @} flags;
+@itemx @ @ @ @ awk_value_t index;
+@itemx @ @ @ @ awk_value_t value;
+@itemx @} awk_element_t;
+The @code{awk_element_t} is a ``flattened''
+array element. @command{awk} produces an array of these
+inside the @code{awk_flat_array_t} (see the next item).
+Individual elements may be marked for deletion. New elements must be added
+individually, one at a time, using the separate API for that purpose.
+The fields are as follows:
+
+@c nested table
+@table @code
+@item struct awk_element *next;
+This pointer is for the convenience of extension writers. It allows
+an extension to create a linked list of new elements that can then be
+added to an array in a loop that traverses the list.
+
+@item enum @{ @dots{} @} flags;
+A set of flag values that convey information between the extension
+and @command{gawk}. Currently there is only one: @code{AWK_ELEMENT_DELETE}.
+Setting it causes @command{gawk} to delete the
+element from the original array upon release of the flattened array.
+
+@item index
+@itemx value
+The index and value of the element, respectively.
+@emph{All} memory pointed to by @code{index} and @code{value} belongs to @command{gawk}.
+@end table
+
+@item typedef struct awk_flat_array @{
+@itemx @ @ @ @ awk_const void *awk_const opaque1;@ @ @ @ /* for use by gawk */
+@itemx @ @ @ @ awk_const void *awk_const opaque2;@ @ @ @ /* for use by gawk */
+@itemx @ @ @ @ awk_const size_t count;@ @ @ @ @ /* how many elements */
+@itemx @ @ @ @ awk_element_t elements[1];@ @ /* will be extended */
+@itemx @} awk_flat_array_t;
+This is a flattened array. When an extension gets one of these
+from @command{gawk}, the @code{elements} array is of actual
+size @code{count}.
+The @code{opaque1} and @code{opaque2} pointers are for use by @command{gawk};
+therefore they are marked @code{awk_const} so that the extension cannot
+modify them.
+@end table
+
+@node Array Functions
+@subsubsection Array Functions
+
+The following functions relate to individual array elements.
+
+@table @code
+@item awk_bool_t get_element_count(awk_array_t a_cookie, size_t *count);
+For the array represented by @code{a_cookie}, place in @code{*count}
+the number of elements it contains. A subarray counts as a single element.
+Return false if there is an error.
+
+@item awk_bool_t get_array_element(awk_array_t a_cookie,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_value_t *const index,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_valtype_t wanted,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result);
+For the array represented by @code{a_cookie}, return in @code{*result}
+the value of the element whose index is @code{index}.
+@code{wanted} specifies the type of value you wish to retrieve.
+Return false if @code{wanted} does not match the actual type or if
+@code{index} is not in the array (@pxref{table-value-types-returned}).
+
+The value for @code{index} can be numeric, in which case @command{gawk}
+converts it to a string. Using non-integral values is possible, but
+requires that you understand how such values are converted to strings
+(@pxref{Conversion}); thus using integral values is safest.
+
+As with @emph{all} strings passed into @code{gawk} from an extension,
+the string value of @code{index} must come from @code{gawk_malloc()},
+@code{gawk_calloc()} or @code{gawk_realloc()}, and
+@command{gawk} releases the storage.
+
+@item awk_bool_t set_array_element(awk_array_t a_cookie,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const@ awk_value_t *const index,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const@ awk_value_t *const value);
+In the array represented by @code{a_cookie}, create or modify
+the element whose index is given by @code{index}.
+The @code{ARGV} and @code{ENVIRON} arrays may not be changed,
+although the @code{PROCINFO} array can be.
+
+@item awk_bool_t set_array_element_by_elem(awk_array_t a_cookie,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_element_t element);
+Like @code{set_array_element()}, but take the @code{index} and @code{value}
+from @code{element}. This is a convenience macro.
+
+@item awk_bool_t del_array_element(awk_array_t a_cookie,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_value_t* const index);
+Remove the element with the given index from the array
+represented by @code{a_cookie}.
+Return true if the element was removed, or false if the element did
+not exist in the array.
+@end table
+
+The following functions relate to arrays as a whole:
+
+@table @code
+@item awk_array_t create_array(void);
+Create a new array to which elements may be added.
+@DBXREF{Creating Arrays} for a discussion of how to
+create a new array and add elements to it.
+
+@item awk_bool_t clear_array(awk_array_t a_cookie);
+Clear the array represented by @code{a_cookie}.
+Return false if there was some kind of problem, true otherwise.
+The array remains an array, but after calling this function, it
+has no elements. This is equivalent to using the @code{delete}
+statement (@pxref{Delete}).
+
+@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
+structure and fill it in. Set the pointer whose address is passed as @code{data}
+to point to this structure.
+Return true upon success, or false otherwise.
+@ifset FOR_PRINT
+See the next section
+@end ifset
+@ifclear FOR_PRINT
+@xref{Flattening Arrays},
+@end ifclear
+for a discussion of how to
+flatten an array and work with it.
+
+@item awk_bool_t release_flattened_array(awk_array_t a_cookie,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_flat_array_t *data);
+When done with a flattened array, release the storage using this function.
+You must pass in both the original array cookie, and the address of
+the created @code{awk_flat_array_t} structure.
+The function returns true upon success, false otherwise.
+@end table
+
+@node Flattening Arrays
+@subsubsection Working With All The Elements of an Array
+
+To @dfn{flatten} an array is to create a structure that
+represents the full array in a fashion that makes it easy
+for C code to traverse the entire array. Test code
+in @file{extension/testext.c} does this, and also serves
+as a nice example showing how to use the APIs.
+
+We walk through that part of the code one step at a time.
+First, the @command{gawk} script that drives the test extension:
+
+@example
+@@load "testext"
+BEGIN @{
+ n = split("blacky rusty sophie raincloud lucky", pets)
+ printf("pets has %d elements\n", length(pets))
+ ret = dump_array_and_delete("pets", "3")
+ printf("dump_array_and_delete(pets) returned %d\n", ret)
+ if ("3" in pets)
+ printf("dump_array_and_delete() did NOT remove index \"3\"!\n")
+ else
+ printf("dump_array_and_delete() did remove index \"3\"!\n")
+ print ""
+@}
+@end example
+
+@noindent
+This code creates an array with @code{split()} (@pxref{String Functions})
+and then calls @code{dump_array_and_delete()}. That function looks up
+the array whose name is passed as the first argument, and
+deletes the element at the index passed in the second argument.
+The @command{awk} code then prints the return value and checks if the element
+was indeed deleted. Here is the C code that implements
+@code{dump_array_and_delete()}. It has been edited slightly for
+presentation.
+
+The first part declares variables, sets up the default
+return value in @code{result}, and checks that the function
+was called with the correct number of arguments:
+
+@example
+static awk_value_t *
+dump_array_and_delete(int nargs, awk_value_t *result)
+@{
+ awk_value_t value, value2, value3;
+ awk_flat_array_t *flat_array;
+ size_t count;
+ char *name;
+ int i;
+
+ assert(result != NULL);
+ make_number(0.0, result);
+
+ if (nargs != 2) @{
+ printf("dump_array_and_delete: nargs not right "
+ "(%d should be 2)\n", nargs);
+ goto out;
+ @}
+@end example
+
+The function then proceeds in steps, as follows. First, retrieve
+the name of the array, passed as the first argument. Then
+retrieve the array itself. If either operation fails, print
+error messages and return:
+
+@example
+ /* get argument named array as flat array and print it */
+ if (get_argument(0, AWK_STRING, & value)) @{
+ name = value.str_value.str;
+ if (sym_lookup(name, AWK_ARRAY, & value2))
+ printf("dump_array_and_delete: sym_lookup of %s passed\n",
+ name);
+ else @{
+ printf("dump_array_and_delete: sym_lookup of %s failed\n",
+ name);
+ goto out;
+ @}
+ @} else @{
+ printf("dump_array_and_delete: get_argument(0) failed\n");
+ goto out;
+ @}
+@end example
+
+For testing purposes and to make sure that the C code sees
+the same number of elements as the @command{awk} code,
+the second step is to get the count of elements in the array
+and print it:
+
+@example
+ if (! get_element_count(value2.array_cookie, & count)) @{
+ printf("dump_array_and_delete: get_element_count failed\n");
+ goto out;
+ @}
+
+ printf("dump_array_and_delete: incoming size is %lu\n",
+ (unsigned long) count);
+@end example
+
+The third step is to actually flatten the array, and then
+to double check that the count in the @code{awk_flat_array_t}
+is the same as the count just retrieved:
+
+@example
+ if (! flatten_array(value2.array_cookie, & flat_array)) @{
+ printf("dump_array_and_delete: could not flatten array\n");
+ goto out;
+ @}
+
+ if (flat_array->count != count) @{
+ printf("dump_array_and_delete: flat_array->count (%lu)"
+ " != count (%lu)\n",
+ (unsigned long) flat_array->count,
+ (unsigned long) count);
+ goto out;
+ @}
+@end example
+
+The fourth step is to retrieve the index of the element
+to be deleted, which was passed as the second argument.
+Remember that argument counts passed to @code{get_argument()}
+are zero-based, thus the second argument is numbered one:
+
+@example
+ if (! get_argument(1, AWK_STRING, & value3)) @{
+ printf("dump_array_and_delete: get_argument(1) failed\n");
+ goto out;
+ @}
+@end example
+
+The fifth step is where the ``real work'' is done. The function
+loops over every element in the array, printing the index and
+element values. In addition, upon finding the element with the
+index that is supposed to be deleted, the function sets the
+@code{AWK_ELEMENT_DELETE} bit in the @code{flags} field
+of the element. When the array is released, @command{gawk}
+traverses the flattened array, and deletes any elements which
+have this flag bit set:
+
+@example
+ for (i = 0; i < flat_array->count; i++) @{
+ printf("\t%s[\"%.*s\"] = %s\n",
+ name,
+ (int) flat_array->elements[i].index.str_value.len,
+ flat_array->elements[i].index.str_value.str,
+ valrep2str(& flat_array->elements[i].value));
+
+ if (strcmp(value3.str_value.str,
+ flat_array->elements[i].index.str_value.str) == 0) @{
+ flat_array->elements[i].flags |= AWK_ELEMENT_DELETE;
+ printf("dump_array_and_delete: marking element \"%s\" "
+ "for deletion\n",
+ flat_array->elements[i].index.str_value.str);
+ @}
+ @}
+@end example
+
+The sixth step is to release the flattened array. This tells
+@command{gawk} that the extension is no longer using the array,
+and that it should delete any elements marked for deletion.
+@command{gawk} also frees any storage that was allocated,
+so you should not use the pointer (@code{flat_array} in this
+code) once you have called @code{release_flattened_array()}:
+
+@example
+ if (! release_flattened_array(value2.array_cookie, flat_array)) @{
+ printf("dump_array_and_delete: could not release flattened array\n");
+ goto out;
+ @}
+@end example
+
+Finally, because everything was successful, the function sets the
+return value to success, and returns:
+
+@example
+ make_number(1.0, result);
+out:
+ return result;
+@}
+@end example
+
+Here is the output from running this part of the test:
+
+@example
+pets has 5 elements
+dump_array_and_delete: sym_lookup of pets passed
+dump_array_and_delete: incoming size is 5
+ pets["1"] = "blacky"
+ pets["2"] = "rusty"
+ pets["3"] = "sophie"
+dump_array_and_delete: marking element "3" for deletion
+ pets["4"] = "raincloud"
+ pets["5"] = "lucky"
+dump_array_and_delete(pets) returned 1
+dump_array_and_delete() did remove index "3"!
+@end example
+
+@node Creating Arrays
+@subsubsection How To Create and Populate Arrays
+
+Besides working with arrays created by @command{awk} code, you can
+create arrays and populate them as you see fit, and then @command{awk}
+code can access them and manipulate them.
+
+There are two important points about creating arrays from extension code:
+
+@itemize @value{BULLET}
+@item
+You must install a new array into @command{gawk}'s symbol
+table immediately upon creating it. Once you have done so,
+you can then populate the array.
+
+@ignore
+Strictly speaking, this is required only
+for arrays that will have subarrays as elements; however it is
+a good idea to always do this. This restriction may be relaxed
+in a subsequent revision of the API.
+@end ignore
+
+Similarly, if installing a new array as a subarray of an existing array,
+you must add the new array to its parent before adding any elements to it.
+
+Thus, the correct way to build an array is to work ``top down.'' Create
+the array, and immediately install it in @command{gawk}'s symbol table
+using @code{sym_update()}, or install it as an element in a previously
+existing array using @code{set_array_element()}. We show example code shortly.
+
+@item
+Due to @command{gawk} internals, after using @code{sym_update()} to install an array
+into @command{gawk}, you have to retrieve the array cookie from the value
+passed in to @command{sym_update()} before doing anything else with it, like so:
+
+@example
+awk_value_t value;
+awk_array_t new_array;
+
+new_array = create_array();
+val.val_type = AWK_ARRAY;
+val.array_cookie = new_array;
+
+/* install array in the symbol table */
+sym_update("array", & val);
+
+new_array = val.array_cookie; /* YOU MUST DO THIS */
+@end example
+
+If installing an array as a subarray, you must also retrieve the value
+of the array cookie after the call to @code{set_element()}.
+@end itemize
+
+The following C code is a simple test extension to create an array
+with two regular elements and with a subarray. The leading @code{#include}
+directives and boilerplate variable declarations
+(@pxref{Extension API Boilerplate})
+are omitted for brevity.
+The first step is to create a new array and then install it
+in the symbol table:
+
+@example
+@ignore
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "gawkapi.h"
+
+static const gawk_api_t *api; /* for convenience macros to work */
+static awk_ext_id_t *ext_id;
+static const char *ext_version = "testarray extension: version 1.0";
+
+int plugin_is_GPL_compatible;
+
+@end ignore
+/* create_new_array --- create a named array */
+
+static void
+create_new_array()
+@{
+ awk_array_t a_cookie;
+ awk_array_t subarray;
+ awk_value_t index, value;
+
+ a_cookie = create_array();
+ value.val_type = AWK_ARRAY;
+ value.array_cookie = a_cookie;
+
+ if (! sym_update("new_array", & value))
+ printf("create_new_array: sym_update(\"new_array\") failed!\n");
+ a_cookie = value.array_cookie;
+@end example
+
+@noindent
+Note how @code{a_cookie} is reset from the @code{array_cookie} field in
+the @code{value} structure.
+
+The second step is to install two regular values into @code{new_array}:
+
+@example
+ (void) make_const_string("hello", 5, & index);
+ (void) make_const_string("world", 5, & value);
+ if (! set_array_element(a_cookie, & index, & value)) @{
+ printf("fill_in_array: set_array_element failed\n");
+ return;
+ @}
+
+ (void) make_const_string("answer", 6, & index);
+ (void) make_number(42.0, & value);
+ if (! set_array_element(a_cookie, & index, & value)) @{
+ printf("fill_in_array: set_array_element failed\n");
+ return;
+ @}
+@end example
+
+The third step is to create the subarray and install it:
+
+@example
+ (void) make_const_string("subarray", 8, & index);
+ subarray = create_array();
+ value.val_type = AWK_ARRAY;
+ value.array_cookie = subarray;
+ if (! set_array_element(a_cookie, & index, & value)) @{
+ printf("fill_in_array: set_array_element failed\n");
+ return;
+ @}
+ subarray = value.array_cookie;
+@end example
+
+The final step is to populate the subarray with its own element:
+
+@example
+ (void) make_const_string("foo", 3, & index);
+ (void) make_const_string("bar", 3, & value);
+ if (! set_array_element(subarray, & index, & value)) @{
+ printf("fill_in_array: set_array_element failed\n");
+ return;
+ @}
+@}
+@ignore
+static awk_ext_func_t func_table[] = @{
+ @{ NULL, NULL, 0 @}
+@};
+
+/* init_testarray --- additional initialization function */
+
+static awk_bool_t init_testarray(void)
+@{
+ create_new_array();
+
+ return awk_true;
+@}
+
+static awk_bool_t (*init_func)(void) = init_testarray;
+
+dl_load_func(func_table, testarray, "")
+@end ignore
+@end example
+
+Here is a sample script that loads the extension
+and then dumps the array:
+
+@example
+@@load "subarray"
+
+function dumparray(name, array, i)
+@{
+ for (i in array)
+ if (isarray(array[i]))
+ dumparray(name "[\"" i "\"]", array[i])
+ else
+ printf("%s[\"%s\"] = %s\n", name, i, array[i])
+@}
+
+BEGIN @{
+ dumparray("new_array", new_array);
+@}
+@end example
+
+Here is the result of running the script:
+
+@example
+$ @kbd{AWKLIBPATH=$PWD ./gawk -f subarray.awk}
+@print{} new_array["subarray"]["foo"] = bar
+@print{} new_array["hello"] = world
+@print{} new_array["answer"] = 42
+@end example
+
+@noindent
+(@DBXREF{Finding Extensions} for more information on the
+@env{AWKLIBPATH} environment variable.)
+
+@node Extension API Variables
+@subsection API Variables
+
+The API provides two sets of variables. The first provides information
+about the version of the API (both with which the extension was compiled,
+and with which @command{gawk} was compiled). The second provides
+information about how @command{gawk} was invoked.
+
+@menu
+* Extension Versioning:: API Version information.
+* Extension API Informational Variables:: Variables providing information about
+ @command{gawk}'s invocation.
+@end menu
+
+@node Extension Versioning
+@subsubsection API Version Constants and Variables
+@cindex API version
+@cindex extension API version
+
+The API provides both a ``major'' and a ``minor'' version number.
+The API versions are available at compile time as constants:
+
+@table @code
+@item GAWK_API_MAJOR_VERSION
+The major version of the API.
+
+@item GAWK_API_MINOR_VERSION
+The minor version of the API.
+@end table
+
+The minor version increases when new functions are added to the API. Such
+new functions are always added to the end of the API @code{struct}.
+
+The major version increases (and the minor version is reset to zero) if any
+of the data types change size or member order, or if any of the existing
+functions change signature.
+
+It could happen that an extension may be compiled against one version
+of the API but loaded by a version of @command{gawk} using a different
+version. For this reason, the major and minor API versions of the
+running @command{gawk} are included in the API @code{struct} as read-only
+constant integers:
+
+@table @code
+@item api->major_version
+The major version of the running @command{gawk}.
+
+@item api->minor_version
+The minor version of the running @command{gawk}.
+@end table
+
+It is up to the extension to decide if there are API incompatibilities.
+Typically a check like this is enough:
+
+@example
+if (api->major_version != GAWK_API_MAJOR_VERSION
+ || api->minor_version < GAWK_API_MINOR_VERSION) @{
+ fprintf(stderr, "foo_extension: version mismatch with gawk!\n");
+ fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n",
+ GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION,
+ api->major_version, api->minor_version);
+ exit(1);
+@}
+@end example
+
+Such code is included in the boilerplate @code{dl_load_func()} macro
+provided in @file{gawkapi.h} (discussed later, in
+@ref{Extension API Boilerplate}).
+
+@node Extension API Informational Variables
+@subsubsection Informational Variables
+@cindex API informational variables
+@cindex extension API informational variables
+
+The API provides access to several variables that describe
+whether the corresponding command-line options were enabled when
+@command{gawk} was invoked. The variables are:
+
+@table @code
+@item do_debug
+This variable is true if @command{gawk} was invoked with @option{--debug} option.
+
+@item do_lint
+This variable is true if @command{gawk} was invoked with @option{--lint} option.
+
+@item do_mpfr
+This variable is true if @command{gawk} was invoked with @option{--bignum} option.
+
+@item do_profile
+This variable is true if @command{gawk} was invoked with @option{--profile} option.
+
+@item do_sandbox
+This variable is true if @command{gawk} was invoked with @option{--sandbox} option.
+
+@item do_traditional
+This variable is true if @command{gawk} was invoked with @option{--traditional} option.
+@end table
+
+The value of @code{do_lint} can change if @command{awk} code
+modifies the @code{LINT} predefined variable (@pxref{Built-in Variables}).
+The others should not change during execution.
+
+@node Extension API Boilerplate
+@subsection Boilerplate Code
+
+As mentioned earlier (@pxref{Extension Mechanism Outline}), the function
+definitions as presented are really macros. To use these macros, your
+extension must provide a small amount of boilerplate code (variables and
+functions) toward the top of your source file, using predefined names
+as described here. The boilerplate needed is also provided in comments
+in the @file{gawkapi.h} header file:
+
+@example
+/* Boiler plate code: */
+int plugin_is_GPL_compatible;
+
+static gawk_api_t *const api;
+static awk_ext_id_t ext_id;
+static const char *ext_version = NULL; /* or @dots{} = "some string" */
+
+static awk_ext_func_t func_table[] = @{
+ @{ "name", do_name, 1 @},
+ /* @dots{} */
+@};
+
+/* EITHER: */
+
+static awk_bool_t (*init_func)(void) = NULL;
+
+/* OR: */
+
+static awk_bool_t
+init_my_extension(void)
+@{
+ @dots{}
+@}
+
+static awk_bool_t (*init_func)(void) = init_my_extension;
+
+dl_load_func(func_table, some_name, "name_space_in_quotes")
+@end example
+
+These variables and functions are as follows:
+
+@table @code
+@item int plugin_is_GPL_compatible;
+This asserts that the extension is compatible with
+@ifclear FOR_PRINT
+the GNU GPL (@pxref{Copying}).
+@end ifclear
+@ifset FOR_PRINT
+the GNU GPL.
+@end ifset
+If your extension does not have this, @command{gawk}
+will not load it (@pxref{Plugin License}).
+
+@item static gawk_api_t *const api;
+This global @code{static} variable should be set to point to
+the @code{gawk_api_t} pointer that @command{gawk} passes to your
+@code{dl_load()} function. This variable is used by all of the macros.
+
+@item static awk_ext_id_t ext_id;
+This global static variable should be set to the @code{awk_ext_id_t}
+value that @command{gawk} passes to your @code{dl_load()} function.
+This variable is used by all of the macros.
+
+@item static const char *ext_version = NULL; /* or @dots{} = "some string" */
+This global @code{static} variable should be set either
+to @code{NULL}, or to point to a string giving the name and version of
+your extension.
+
+@item static awk_ext_func_t func_table[] = @{ @dots{} @};
+This is an array of one or more @code{awk_ext_func_t} structures
+as described earlier (@pxref{Extension Functions}).
+It can then be looped over for multiple calls to
+@code{add_ext_func()}.
+
+@c Use @var{OR} for docbook
+@item static awk_bool_t (*init_func)(void) = NULL;
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @var{OR}
+@itemx static awk_bool_t init_my_extension(void) @{ @dots{} @}
+@itemx static awk_bool_t (*init_func)(void) = init_my_extension;
+If you need to do some initialization work, you should define a
+function that does it (creates variables, opens files, etc.)
+and then define the @code{init_func} pointer to point to your
+function.
+The function should return @code{awk_false} upon failure, or @code{awk_true}
+if everything goes well.
+
+If you don't need to do any initialization, define the pointer and
+initialize it to @code{NULL}.
+
+@item dl_load_func(func_table, some_name, "name_space_in_quotes")
+This macro expands to a @code{dl_load()} function that performs
+all the necessary initializations.
+@end table
+
+The point of all the variables and arrays is to let the
+@code{dl_load()} function (from the @code{dl_load_func()}
+macro) do all the standard work. It does the following:
+
+@enumerate 1
+@item
+Check the API versions. If the extension major version does not match
+@command{gawk}'s, or if the extension minor version is greater than
+@command{gawk}'s, it prints a fatal error message and exits.
+
+@item
+Load the functions defined in @code{func_table}.
+If any of them fails to load, it prints a warning message but
+continues on.
+
+@item
+If the @code{init_func} pointer is not @code{NULL}, call the
+function it points to. If it returns @code{awk_false}, print a
+warning message.
+
+@item
+If @code{ext_version} is not @code{NULL}, register
+the version string with @command{gawk}.
+@end enumerate
+
+@node Finding Extensions
+@section How @command{gawk} Finds Extensions
+@cindex extension search path
+@cindex finding extensions
+
+Compiled extensions have to be installed in a directory where
+@command{gawk} can find them. If @command{gawk} is configured and
+built in the default fashion, the directory in which to find
+extensions is @file{/usr/local/lib/gawk}. You can also specify a search
+path with a list of directories to search for compiled extensions.
+@DBXREF{AWKLIBPATH Variable} for more information.
+
+@node Extension Example
+@section Example: Some File Functions
+@cindex extension example
+
+@quotation
+@i{No matter where you go, there you are.}
+@author Buckaroo Banzai
+@end quotation
+
+@c It's enough to show chdir and stat, no need for fts
+
+Two useful functions that are not in @command{awk} are @code{chdir()} (so
+that an @command{awk} program can change its directory) and @code{stat()}
+(so that an @command{awk} program can gather information about a file).
+In order to illustrate the API in action, this @value{SECTION} implements
+these functions for @command{gawk} in an extension.
+
+@menu
+* Internal File Description:: What the new functions will do.
+* Internal File Ops:: The code for internal file operations.
+* Using Internal File Ops:: How to use an external extension.
+@end menu
+
+@node Internal File Description
+@subsection Using @code{chdir()} and @code{stat()}
+
+This @value{SECTION} shows how to use the new functions at
+the @command{awk} level once they've been integrated into the
+running @command{gawk} interpreter. Using @code{chdir()} is very
+straightforward. It takes one argument, the new directory to change to:
+
+@example
+@@load "filefuncs"
+@dots{}
+newdir = "/home/arnold/funstuff"
+ret = chdir(newdir)
+if (ret < 0) @{
+ printf("could not change to %s: %s\n", newdir, ERRNO) > "/dev/stderr"
+ exit 1
+@}
+@dots{}
+@end example
+
+The return value is negative if the @code{chdir()} failed, and
+@code{ERRNO} (@pxref{Built-in Variables}) is set to a string indicating
+the error.
+
+Using @code{stat()} is a bit more complicated. The C @code{stat()}
+function fills in a structure that has a fair amount of information.
+The right way to model this in @command{awk} is to fill in an associative
+array with the appropriate information:
+
+@c broke printf for page breaking
+@example
+file = "/home/arnold/.profile"
+ret = stat(file, fdata)
+if (ret < 0) @{
+ printf("could not stat %s: %s\n",
+ file, ERRNO) > "/dev/stderr"
+ exit 1
+@}
+printf("size of %s is %d bytes\n", file, fdata["size"])
+@end example
+
+The @code{stat()} function always clears the data array, even if
+the @code{stat()} fails. It fills in the following elements:
+
+@table @code
+@item "name"
+The name of the file that was @code{stat()}'ed.
+
+@item "dev"
+@itemx "ino"
+The file's device and inode numbers, respectively.
+
+@item "mode"
+The file's mode, as a numeric value. This includes both the file's
+type and its permissions.
+
+@item "nlink"
+The number of hard links (directory entries) the file has.
+
+@item "uid"
+@itemx "gid"
+The numeric user and group ID numbers of the file's owner.
+
+@item "size"
+The size in bytes of the file.
+
+@item "blocks"
+The number of disk blocks the file actually occupies. This may not
+be a function of the file's size if the file has holes.
+
+@item "atime"
+@itemx "mtime"
+@itemx "ctime"
+The file's last access, modification, and inode update times,
+respectively. These are numeric timestamps, suitable for formatting
+with @code{strftime()}
+(@pxref{Time Functions}).
+
+@item "pmode"
+The file's ``printable mode.'' This is a string representation of
+the file's type and permissions, such as is produced by
+@samp{ls -l}---for example, @code{"drwxr-xr-x"}.
+
+@item "type"
+A printable string representation of the file's type. The value
+is one of the following:
+
+@table @code
+@item "blockdev"
+@itemx "chardev"
+The file is a block or character device (``special file'').
+
+@ignore
+@item "door"
+The file is a Solaris ``door'' (special file used for
+interprocess communications).
+@end ignore
+
+@item "directory"
+The file is a directory.
+
+@item "fifo"
+The file is a named-pipe (also known as a FIFO).
+
+@item "file"
+The file is just a regular file.
+
+@item "socket"
+The file is an @code{AF_UNIX} (``Unix domain'') socket in the
+filesystem.
+
+@item "symlink"
+The file is a symbolic link.
+@end table
+
+@c 5/2013: Thanks to Corinna Vinschen for this information.
+@item "devbsize"
+The size of a block for the element indexed by @code{"blocks"}.
+This information is derived from either the @code{DEV_BSIZE}
+constant defined in @code{<sys/param.h>} on most systems,
+or the @code{S_BLKSIZE} constant in @code{<sys/stat.h>} on BSD systems.
+For some other systems, @dfn{a priori} knowledge is used to provide
+a value. Where no value can be determined, it defaults to 512.
+@end table
+
+Several additional elements may be present depending upon the operating
+system and the type of the file. You can test for them in your @command{awk}
+program by using the @code{in} operator
+(@pxref{Reference to Elements}):
+
+@table @code
+@item "blksize"
+The preferred block size for I/O to the file. This field is not
+present on all POSIX-like systems in the C @code{stat} structure.
+
+@item "linkval"
+If the file is a symbolic link, this element is the name of the
+file the link points to (i.e., the value of the link).
+
+@item "rdev"
+@itemx "major"
+@itemx "minor"
+If the file is a block or character device file, then these values
+represent the numeric device number and the major and minor components
+of that number, respectively.
+@end table
+
+@node Internal File Ops
+@subsection C Code for @code{chdir()} and @code{stat()}
+
+Here is the C code for these extensions.@footnote{This version is
+edited slightly for presentation. See @file{extension/filefuncs.c}
+in the @command{gawk} distribution for the complete version.}
+
+The file includes a number of standard header files, and then includes
+the @file{gawkapi.h} header file which provides the API definitions.
+Those are followed by the necessary variable declarations
+to make use of the API macros and boilerplate code
+(@pxref{Extension API Boilerplate}):
+
+@example
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "gawkapi.h"
+
+#include "gettext.h"
+#define _(msgid) gettext(msgid)
+#define N_(msgid) msgid
+
+#include "gawkfts.h"
+#include "stack.h"
+
+static const gawk_api_t *api; /* for convenience macros to work */
+static awk_ext_id_t *ext_id;
+static awk_bool_t init_filefuncs(void);
+static awk_bool_t (*init_func)(void) = init_filefuncs;
+static const char *ext_version = "filefuncs extension: version 1.0";
+
+int plugin_is_GPL_compatible;
+@end example
+
+@cindex programming conventions, @command{gawk} extensions
+By convention, for an @command{awk} function @code{foo()}, the C function
+that implements it is called @code{do_foo()}. The function should have
+two arguments: the first is an @code{int} usually called @code{nargs},
+that represents the number of actual arguments for the function.
+The second is a pointer to an @code{awk_value_t}, usually named
+@code{result}:
+
+@example
+/* do_chdir --- provide dynamically loaded chdir() function for gawk */
+
+static awk_value_t *
+do_chdir(int nargs, awk_value_t *result)
+@{
+ awk_value_t newdir;
+ int ret = -1;
+
+ assert(result != NULL);
+
+ if (do_lint && nargs != 1)
+ lintwarn(ext_id,
+ _("chdir: called with incorrect number of arguments, "
+ "expecting 1"));
+@end example
+
+The @code{newdir}
+variable represents the new directory to change to, which is retrieved
+with @code{get_argument()}. Note that the first argument is
+numbered zero.
+
+If the argument is retrieved successfully, the function calls the
+@code{chdir()} system call. If the @code{chdir()} fails, @code{ERRNO}
+is updated:
+
+@example
+ if (get_argument(0, AWK_STRING, & newdir)) @{
+ ret = chdir(newdir.str_value.str);
+ if (ret < 0)
+ update_ERRNO_int(errno);
+ @}
+@end example
+
+Finally, the function returns the return value to the @command{awk} level:
+
+@example
+ return make_number(ret, result);
+@}
+@end example
+
+The @code{stat()} extension is more involved. First comes a function
+that turns a numeric mode into a printable representation
+(e.g., 644 becomes @samp{-rw-r--r--}). This is omitted here for brevity:
+
+@example
+/* format_mode --- turn a stat mode field into something readable */
+
+static char *
+format_mode(unsigned long fmode)
+@{
+ @dots{}
+@}
+@end example
+
+Next comes a function for reading symbolic links, which is also
+omitted here for brevity:
+
+@example
+/* read_symlink --- read a symbolic link into an allocated buffer.
+ @dots{} */
+
+static char *
+read_symlink(const char *fname, size_t bufsize, ssize_t *linksize)
+@{
+ @dots{}
+@}
+@end example
+
+Two helper functions simplify entering values in the
+array that will contain the result of the @code{stat()}:
+
+@example
+/* array_set --- set an array element */
+
+static void
+array_set(awk_array_t array, const char *sub, awk_value_t *value)
+@{
+ awk_value_t index;
+
+ set_array_element(array,
+ make_const_string(sub, strlen(sub), & index),
+ value);
+
+@}
+
+/* array_set_numeric --- set an array element with a number */
+
+static void
+array_set_numeric(awk_array_t array, const char *sub, double num)
+@{
+ awk_value_t tmp;
+
+ array_set(array, sub, make_number(num, & tmp));
+@}
+@end example
+
+The following function does most of the work to fill in
+the @code{awk_array_t} result array with values obtained
+from a valid @code{struct stat}. It is done in a separate function
+to support the @code{stat()} function for @command{gawk} and also
+to support the @code{fts()} extension which is included in
+the same file but whose code is not shown here
+(@pxref{Extension Sample File Functions}).
+
+The first part of the function is variable declarations,
+including a table to map file types to strings:
+
+@example
+/* fill_stat_array --- do the work to fill an array with stat info */
+
+static int
+fill_stat_array(const char *name, awk_array_t array, struct stat *sbuf)
+@{
+ char *pmode; /* printable mode */
+ const char *type = "unknown";
+ awk_value_t tmp;
+ static struct ftype_map @{
+ unsigned int mask;
+ const char *type;
+ @} ftype_map[] = @{
+ @{ S_IFREG, "file" @},
+ @{ S_IFBLK, "blockdev" @},
+ @{ S_IFCHR, "chardev" @},
+ @{ S_IFDIR, "directory" @},
+#ifdef S_IFSOCK
+ @{ S_IFSOCK, "socket" @},
+#endif
+#ifdef S_IFIFO
+ @{ S_IFIFO, "fifo" @},
+#endif
+#ifdef S_IFLNK
+ @{ S_IFLNK, "symlink" @},
+#endif
+#ifdef S_IFDOOR /* Solaris weirdness */
+ @{ S_IFDOOR, "door" @},
+#endif /* S_IFDOOR */
+ @};
+ int j, k;
+@end example
+
+The destination array is cleared, and then code fills in
+various elements based on values in the @code{struct stat}:
+
+@example
+ /* empty out the array */
+ clear_array(array);
+
+ /* fill in the array */
+ array_set(array, "name", make_const_string(name, strlen(name),
+ & tmp));
+ array_set_numeric(array, "dev", sbuf->st_dev);
+ array_set_numeric(array, "ino", sbuf->st_ino);
+ array_set_numeric(array, "mode", sbuf->st_mode);
+ array_set_numeric(array, "nlink", sbuf->st_nlink);
+ array_set_numeric(array, "uid", sbuf->st_uid);
+ array_set_numeric(array, "gid", sbuf->st_gid);
+ array_set_numeric(array, "size", sbuf->st_size);
+ array_set_numeric(array, "blocks", sbuf->st_blocks);
+ array_set_numeric(array, "atime", sbuf->st_atime);
+ array_set_numeric(array, "mtime", sbuf->st_mtime);
+ array_set_numeric(array, "ctime", sbuf->st_ctime);
+
+ /* for block and character devices, add rdev,
+ major and minor numbers */
+ if (S_ISBLK(sbuf->st_mode) || S_ISCHR(sbuf->st_mode)) @{
+ array_set_numeric(array, "rdev", sbuf->st_rdev);
+ array_set_numeric(array, "major", major(sbuf->st_rdev));
+ array_set_numeric(array, "minor", minor(sbuf->st_rdev));
+ @}
+@end example
+
+@noindent
+The latter part of the function makes selective additions
+to the destination array, depending upon the availability of
+certain members and/or the type of the file. It then returns zero,
+for success:
+
+@example
+#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
+ array_set_numeric(array, "blksize", sbuf->st_blksize);
+#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
+
+ pmode = format_mode(sbuf->st_mode);
+ array_set(array, "pmode", make_const_string(pmode, strlen(pmode),
+ & tmp));
+
+ /* for symbolic links, add a linkval field */
+ if (S_ISLNK(sbuf->st_mode)) @{
+ char *buf;
+ ssize_t linksize;
+
+ if ((buf = read_symlink(name, sbuf->st_size,
+ & linksize)) != NULL)
+ array_set(array, "linkval",
+ make_malloced_string(buf, linksize, & tmp));
+ else
+ warning(ext_id, _("stat: unable to read symbolic link `%s'"),
+ name);
+ @}
+
+ /* add a type field */
+ type = "unknown"; /* shouldn't happen */
+ for (j = 0, k = sizeof(ftype_map)/sizeof(ftype_map[0]); j < k; j++) @{
+ if ((sbuf->st_mode & S_IFMT) == ftype_map[j].mask) @{
+ type = ftype_map[j].type;
+ break;
+ @}
+ @}
+
+ array_set(array, "type", make_const_string(type, strlen(type), & tmp));
+
+ return 0;
+@}
+@end example
+
+The third argument to @code{stat()} was not discussed previously. This
+argument is optional. If present, it causes @code{do_stat()} to use
+the @code{stat()} system call instead of the @code{lstat()} system
+call. This is done by using a function pointer: @code{statfunc}.
+@code{statfunc} is initialized to point to @code{lstat()} (instead
+of @code{stat()}) to get the file information, in case the file is a
+symbolic link. However, if there were three arguments, @code{statfunc}
+is set point to @code{stat()}, instead.
+
+Here is the @code{do_stat()} function, which starts with
+variable declarations and argument checking:
+
+@ignore
+Changed message for page breaking. Used to be:
+ "stat: called with incorrect number of arguments (%d), should be 2",
+@end ignore
+@example
+/* do_stat --- provide a stat() function for gawk */
+
+static awk_value_t *
+do_stat(int nargs, awk_value_t *result)
+@{
+ awk_value_t file_param, array_param;
+ char *name;
+ awk_array_t array;
+ int ret;
+ struct stat sbuf;
+ /* default is lstat() */
+ int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
+
+ assert(result != NULL);
+
+ if (nargs != 2 && nargs != 3) @{
+ if (do_lint)
+ lintwarn(ext_id,
+ _("stat: called with wrong number of arguments"));
+ return make_number(-1, result);
+ @}
+@end example
+
+Then comes the actual work. First, the function gets the arguments.
+Next, it gets the information for the file. If the called function
+(@code{lstat()} or @code{stat()}) returns an error, the code sets
+@code{ERRNO} and returns:
+
+@example
+ /* file is first arg, array to hold results is second */
+ if ( ! get_argument(0, AWK_STRING, & file_param)
+ || ! get_argument(1, AWK_ARRAY, & array_param)) @{
+ warning(ext_id, _("stat: bad parameters"));
+ return make_number(-1, result);
+ @}
+
+ if (nargs == 3) @{
+ statfunc = stat;
+ @}
+
+ name = file_param.str_value.str;
+ array = array_param.array_cookie;
+
+ /* always empty out the array */
+ clear_array(array);
+
+ /* stat the file, if error, set ERRNO and return */
+ ret = statfunc(name, & sbuf);
+ if (ret < 0) @{
+ update_ERRNO_int(errno);
+ return make_number(ret, result);
+ @}
+@end example
+
+The tedious work is done by @code{fill_stat_array()}, shown
+earlier. When done, the function returns the result from @code{fill_stat_array()}:
+
+@example
+ ret = fill_stat_array(name, array, & sbuf);
+
+ return make_number(ret, result);
+@}
+@end example
+
+Finally, it's necessary to provide the ``glue'' that loads the
+new function(s) into @command{gawk}.
+
+The @code{filefuncs} extension also provides an @code{fts()}
+function, which we omit here. For its sake there is an initialization
+function:
+
+@example
+/* init_filefuncs --- initialization routine */
+
+static awk_bool_t
+init_filefuncs(void)
+@{
+ @dots{}
+@}
+@end example
+
+We are almost done. We need an array of @code{awk_ext_func_t}
+structures for loading each function into @command{gawk}:
+
+@example
+static awk_ext_func_t func_table[] = @{
+ @{ "chdir", do_chdir, 1 @},
+ @{ "stat", do_stat, 2 @},
+#ifndef __MINGW32__
+ @{ "fts", do_fts, 3 @},
+#endif
+@};
+@end example
+
+Each extension must have a routine named @code{dl_load()} to load
+everything that needs to be loaded. It is simplest to use the
+@code{dl_load_func()} macro in @code{gawkapi.h}:
+
+@example
+/* define the dl_load() function using the boilerplate macro */
+
+dl_load_func(func_table, filefuncs, "")
+@end example
+
+And that's it!
+
+@node Using Internal File Ops
+@subsection Integrating the Extensions
+
+@cindex @command{gawk}, interpreter@comma{} adding code to
+Now that the code is written, it must be possible to add it at
+runtime to the running @command{gawk} interpreter. First, the
+code must be compiled. Assuming that the functions are in
+a file named @file{filefuncs.c}, and @var{idir} is the location
+of the @file{gawkapi.h} header file,
+the following steps@footnote{In practice, you would probably want to
+use the GNU Autotools (Automake, Autoconf, Libtool, and @command{gettext}) to
+configure and build your libraries. Instructions for doing so are beyond
+the scope of this @value{DOCUMENT}. @DBXREF{gawkextlib} for Internet links to
+the tools.} create a GNU/Linux shared library:
+
+@example
+$ @kbd{gcc -fPIC -shared -DHAVE_CONFIG_H -c -O -g -I@var{idir} filefuncs.c}
+$ @kbd{gcc -o filefuncs.so -shared filefuncs.o}
+@end example
+
+Once the library exists, it is loaded by using the @code{@@load} keyword:
+
+@example
+# file testff.awk
+@@load "filefuncs"
+
+BEGIN @{
+ "pwd" | getline curdir # save current directory
+ close("pwd")
+
+ chdir("/tmp")
+ system("pwd") # test it
+ chdir(curdir) # go back
+
+ print "Info for testff.awk"
+ ret = stat("testff.awk", data)
+ print "ret =", ret
+ for (i in data)
+ printf "data[\"%s\"] = %s\n", i, data[i]
+ print "testff.awk modified:",
+ strftime("%m %d %Y %H:%M:%S", data["mtime"])
+
+ print "\nInfo for JUNK"
+ ret = stat("JUNK", data)
+ print "ret =", ret
+ for (i in data)
+ printf "data[\"%s\"] = %s\n", i, data[i]
+ print "JUNK modified:", strftime("%m %d %Y %H:%M:%S", data["mtime"])
+@}
+@end example
+
+The @env{AWKLIBPATH} environment variable tells
+@command{gawk} where to find extensions (@pxref{Finding Extensions}).
+We set it to the current directory and run the program:
+
+@example
+$ @kbd{AWKLIBPATH=$PWD gawk -f testff.awk}
+@print{} /tmp
+@print{} Info for testff.awk
+@print{} ret = 0
+@print{} data["blksize"] = 4096
+@print{} data["devbsize"] = 512
+@print{} data["mtime"] = 1412004710
+@print{} data["mode"] = 33204
+@print{} data["type"] = file
+@print{} data["dev"] = 2053
+@print{} data["gid"] = 1000
+@print{} data["ino"] = 10358899
+@print{} data["ctime"] = 1412004710
+@print{} data["blocks"] = 8
+@print{} data["nlink"] = 1
+@print{} data["name"] = testff.awk
+@print{} data["atime"] = 1412004716
+@print{} data["pmode"] = -rw-rw-r--
+@print{} data["size"] = 666
+@print{} data["uid"] = 1000
+@print{} testff.awk modified: 09 29 2014 18:31:50
+@print{}
+@print{} Info for JUNK
+@print{} ret = -1
+@print{} JUNK modified: 01 01 1970 02:00:00
+@end example
+
+@node Extension Samples
+@section The Sample Extensions in the @command{gawk} Distribution
+@cindex extensions distributed with @command{gawk}
+
+This @value{SECTION} provides brief overviews of the sample extensions
+that come in the @command{gawk} distribution. Some of them are intended
+for production use (e.g., the @code{filefuncs}, @code{readdir} and
+@code{inplace} extensions). Others mainly provide example code that
+shows how to use the extension API.
+
+@menu
+* Extension Sample File Functions:: The file functions sample.
+* Extension Sample Fnmatch:: An interface to @code{fnmatch()}.
+* Extension Sample Fork:: An interface to @code{fork()} and other
+ process functions.
+* Extension Sample Inplace:: Enabling in-place file editing.
+* Extension Sample Ord:: Character to value to character
+ conversions.
+* Extension Sample Readdir:: An interface to @code{readdir()}.
+* Extension Sample Revout:: Reversing output sample output wrapper.
+* Extension Sample Rev2way:: Reversing data sample two-way processor.
+* Extension Sample Read write array:: Serializing an array to a file.
+* Extension Sample Readfile:: Reading an entire file into a string.
+* Extension Sample Time:: An interface to @code{gettimeofday()}
+ and @code{sleep()}.
+* Extension Sample API Tests:: Tests for the API.
+@end menu
+
+@node Extension Sample File Functions
+@subsection File-Related Functions
+
+The @code{filefuncs} extension provides three different functions, as follows.
+The usage is:
+
+@table @asis
+@item @code{@@load "filefuncs"}
+This is how you load the extension.
+
+@cindex @code{chdir()} extension function
+@item @code{result = chdir("/some/directory")}
+The @code{chdir()} function is a direct hook to the @code{chdir()}
+system call to change the current directory. It returns zero
+upon success or less than zero upon error. In the latter case, it updates
+@code{ERRNO}.
+
+@cindex @code{stat()} extension function
+@item @code{result = stat("/some/path", statdata} [@code{, follow}]@code{)}
+The @code{stat()} function provides a hook into the
+@code{stat()} system call.
+It returns zero upon success or less than zero upon error.
+In the latter case, it updates @code{ERRNO}.
+
+By default, it uses the @code{lstat()} system call. However, if passed
+a third argument, it uses @code{stat()} instead.
+
+In all cases, it clears the @code{statdata} array.
+When the call is successful, @code{stat()} fills the @code{statdata}
+array with information retrieved from the filesystem, as follows:
+
+@multitable @columnfractions .15 .50 .20
+@headitem Subscript @tab Field in @code{struct stat} @tab File type
+@item @code{"name"} @tab The @value{FN} @tab All
+@item @code{"dev"} @tab @code{st_dev} @tab All
+@item @code{"ino"} @tab @code{st_ino} @tab All
+@item @code{"mode"} @tab @code{st_mode} @tab All
+@item @code{"nlink"} @tab @code{st_nlink} @tab All
+@item @code{"uid"} @tab @code{st_uid} @tab All
+@item @code{"gid"} @tab @code{st_gid} @tab All
+@item @code{"size"} @tab @code{st_size} @tab All
+@item @code{"atime"} @tab @code{st_atime} @tab All
+@item @code{"mtime"} @tab @code{st_mtime} @tab All
+@item @code{"ctime"} @tab @code{st_ctime} @tab All
+@item @code{"rdev"} @tab @code{st_rdev} @tab Device files
+@item @code{"major"} @tab @code{st_major} @tab Device files
+@item @code{"minor"} @tab @code{st_minor} @tab Device files
+@item @code{"blksize"} @tab @code{st_blksize} @tab All
+@item @code{"pmode"} @tab A human-readable version of the mode value, such as printed by
+@command{ls}. For example, @code{"-rwxr-xr-x"} @tab All
+@item @code{"linkval"} @tab The value of the symbolic link @tab Symbolic links
+@item @code{"type"} @tab The type of the file as a string. One of
+@code{"file"},
+@code{"blockdev"},
+@code{"chardev"},
+@code{"directory"},
+@code{"socket"},
+@code{"fifo"},
+@code{"symlink"},
+@code{"door"},
+or
+@code{"unknown"}.
+Not all systems support all file types. @tab All
+@end multitable
+
+@cindex @code{fts()} extension function
+@item @code{flags = or(FTS_PHYSICAL, ...)}
+@itemx @code{result = fts(pathlist, flags, filedata)}
+Walk the file trees provided in @code{pathlist} and fill in the
+@code{filedata} array as described next. @code{flags} is the bitwise
+OR of several predefined values, also described in a moment.
+Return zero if there were no errors, otherwise return @minus{}1.
+@end table
+
+The @code{fts()} function provides a hook to the C library @code{fts()}
+routines for traversing file hierarchies. Instead of returning data
+about one file at a time in a stream, it fills in a multidimensional
+array with data about each file and directory encountered in the requested
+hierarchies.
+
+The arguments are as follows:
+
+@table @code
+@item pathlist
+An array of @value{FN}s. The element values are used; the index values are ignored.
+
+@item flags
+This should be the bitwise OR of one or more of the following
+predefined constant flag values. At least one of
+@code{FTS_LOGICAL} or @code{FTS_PHYSICAL} must be provided; otherwise
+@code{fts()} returns an error value and sets @code{ERRNO}.
+The flags are:
+
+@c nested table
+@table @code
+@item FTS_LOGICAL
+Do a ``logical'' file traversal, where the information returned for
+a symbolic link refers to the linked-to file, and not to the symbolic
+link itself. This flag is mutually exclusive with @code{FTS_PHYSICAL}.
+
+@item FTS_PHYSICAL
+Do a ``physical'' file traversal, where the information returned for a
+symbolic link refers to the symbolic link itself. This flag is mutually
+exclusive with @code{FTS_LOGICAL}.
+
+@item FTS_NOCHDIR
+As a performance optimization, the C library @code{fts()} routines
+change directory as they traverse a file hierarchy. This flag disables
+that optimization.
+
+@item FTS_COMFOLLOW
+Immediately follow a symbolic link named in @code{pathlist},
+whether or not @code{FTS_LOGICAL} is set.
+
+@item FTS_SEEDOT
+By default, the C library @code{fts()} routines do not return entries for
+@file{.} (dot) and @file{..} (dot-dot). This option causes entries for
+dot-dot to also be included. (The extension always includes an entry
+for dot; more on this in a moment.)
+
+@item FTS_XDEV
+During a traversal, do not cross onto a different mounted filesystem.
+@end table
+
+@item filedata
+The @code{filedata} array is first cleared. Then, @code{fts()} creates
+an element in @code{filedata} for every element in @code{pathlist}.
+The index is the name of the directory or file given in @code{pathlist}.
+The element for this index is itself an array. There are two cases:
+
+@c nested table
+@table @emph
+@item The path is a file
+In this case, the array contains two or three elements:
+
+@c doubly nested table
+@table @code
+@item "path"
+The full path to this file, starting from the ``root'' that was given
+in the @code{pathlist} array.
+
+@item "stat"
+This element is itself an array, containing the same information as provided
+by the @code{stat()} function described earlier for its
+@code{statdata} argument. The element may not be present if
+the @code{stat()} system call for the file failed.
+
+@item "error"
+If some kind of error was encountered, the array will also
+contain an element named @code{"error"}, which is a string describing the error.
+@end table
+
+@item The path is a directory
+In this case, the array contains one element for each entry in the
+directory. If an entry is a file, that element is the same as for files, just
+described. If the entry is a directory, that element is (recursively)
+an array describing the subdirectory. If @code{FTS_SEEDOT} was provided
+in the flags, then there will also be an element named @code{".."}. This
+element will be an array containing the data as provided by @code{stat()}.
+
+In addition, there will be an element whose index is @code{"."}.
+This element is an array containing the same two or three elements as
+for a file: @code{"path"}, @code{"stat"}, and @code{"error"}.
+@end table
+@end table
+
+The @code{fts()} function returns zero if there were no errors.
+Otherwise it returns @minus{}1.
+
+@quotation NOTE
+The @code{fts()} extension does not exactly mimic the
+interface of the C library @code{fts()} routines, choosing instead to
+provide an interface that is based on associative arrays, which is
+more comfortable to use from an @command{awk} program. This includes the
+lack of a comparison function, because @command{gawk} already provides
+powerful array sorting facilities. Although an @code{fts_read()}-like
+interface could have been provided, this felt less natural than simply
+creating a multidimensional array to represent the file hierarchy and
+its information.
+@end quotation
+
+See @file{test/fts.awk} in the @command{gawk} distribution for an example
+use of the @code{fts()} extension function.
+
+@node Extension Sample Fnmatch
+@subsection Interface to @code{fnmatch()}
+
+This extension provides an interface to the C library
+@code{fnmatch()} function. The usage is:
+
+@table @code
+@item @@load "fnmatch"
+This is how you load the extension.
+
+@cindex @code{fnmatch()} extension function
+@item result = fnmatch(pattern, string, flags)
+The return value is zero on success, @code{FNM_NOMATCH}
+if the string did not match the pattern, or
+a different nonzero value if an error occurred.
+@end table
+
+In addition to the @code{fnmatch()} function, the @code{fnmatch} extension
+adds one constant (@code{FNM_NOMATCH}), and an array of flag values
+named @code{FNM}.
+
+The arguments to @code{fnmatch()} are:
+
+@table @code
+@item pattern
+The @value{FN} wildcard to match.
+
+@item string
+The @value{FN} string.
+
+@item flag
+Either zero, or the bitwise OR of one or more of the
+flags in the @code{FNM} array.
+@end table
+
+The flags are as follows:
+
+@multitable @columnfractions .25 .75
+@headitem Array element @tab Corresponding flag defined by @code{fnmatch()}
+@item @code{FNM["CASEFOLD"]} @tab @code{FNM_CASEFOLD}
+@item @code{FNM["FILE_NAME"]} @tab @code{FNM_FILE_NAME}
+@item @code{FNM["LEADING_DIR"]} @tab @code{FNM_LEADING_DIR}
+@item @code{FNM["NOESCAPE"]} @tab @code{FNM_NOESCAPE}
+@item @code{FNM["PATHNAME"]} @tab @code{FNM_PATHNAME}
+@item @code{FNM["PERIOD"]} @tab @code{FNM_PERIOD}
+@end multitable
+
+Here is an example:
+
+@example
+@@load "fnmatch"
+@dots{}
+flags = or(FNM["PERIOD"], FNM["NOESCAPE"])
+if (fnmatch("*.a", "foo.c", flags) == FNM_NOMATCH)
+ print "no match"
+@end example
+
+@node Extension Sample Fork
+@subsection Interface to @code{fork()}, @code{wait()}, and @code{waitpid()}
+
+The @code{fork} extension adds three functions, as follows:
+
+@table @code
+@item @@load "fork"
+This is how you load the extension.
+
+@cindex @code{fork()} extension function
+@item pid = fork()
+This function creates a new process. The return value is zero in the
+child and the process-ID number of the child in the parent, or @minus{}1
+upon error. In the latter case, @code{ERRNO} indicates the problem.
+In the child, @code{PROCINFO["pid"]} and @code{PROCINFO["ppid"]} are
+updated to reflect the correct values.
+
+@cindex @code{waitpid()} extension function
+@item ret = waitpid(pid)
+This function takes a numeric argument, which is the process-ID to
+wait for. The return value is that of the
+@code{waitpid()} system call.
+
+@cindex @code{wait()} extension function
+@item ret = wait()
+This function waits for the first child to die.
+The return value is that of the
+@code{wait()} system call.
+@end table
+
+There is no corresponding @code{exec()} function.
+
+Here is an example:
+
+@example
+@@load "fork"
+@dots{}
+if ((pid = fork()) == 0)
+ print "hello from the child"
+else
+ print "hello from the parent"
+@end example
+
+@node Extension Sample Inplace
+@subsection Enabling In-Place File Editing
+
+@cindex @code{inplace} extension
+The @code{inplace} extension emulates GNU @command{sed}'s @option{-i} option
+which performs ``in place'' editing of each input file.
+It uses the bundled @file{inplace.awk} include file to invoke the extension
+properly:
+
+@example
+@c file eg/lib/inplace.awk
+@group
+# inplace --- load and invoke the inplace extension.
+
+@@load "inplace"
+
+# Please set INPLACE_SUFFIX to make a backup copy. For example, you may
+# want to set INPLACE_SUFFIX to .bak on the command line or in a BEGIN rule.
+
+BEGINFILE @{
+ inplace_begin(FILENAME, INPLACE_SUFFIX)
+@}
+
+ENDFILE @{
+ inplace_end(FILENAME, INPLACE_SUFFIX)
+@}
+@end group
+@c endfile
+@end example
+
+For each regular file that is processed, the extension redirects
+standard output to a temporary file configured to have the same owner
+and permissions as the original. After the file has been processed,
+the extension restores standard output to its original destination.
+If @code{INPLACE_SUFFIX} is not an empty string, the original file is
+linked to a backup @value{FN} created by appending that suffix. Finally,
+the temporary file is renamed to the original @value{FN}.
+
+If any error occurs, the extension issues a fatal error to terminate
+processing immediately without damaging the original file.
+
+Here are some simple examples:
+
+@example
+$ @kbd{gawk -i inplace '@{ gsub(/foo/, "bar") @}; @{ print @}' file1 file2 file3}
+@end example
+
+To keep a backup copy of the original files, try this:
+
+@example
+$ @kbd{gawk -i inplace -v INPLACE_SUFFIX=.bak '@{ gsub(/foo/, "bar") @}}
+> @kbd{@{ print @}' file1 file2 file3}
+@end example
+
+@node Extension Sample Ord
+@subsection Character and Numeric values: @code{ord()} and @code{chr()}
+
+The @code{ordchr} extension adds two functions, named
+@code{ord()} and @code{chr()}, as follows:
+
+@table @code
+@item @@load "ordchr"
+This is how you load the extension.
+
+@cindex @code{ord()} extension function
+@item number = ord(string)
+Return the numeric value of the first character in @code{string}.
+
+@cindex @code{chr()} extension function
+@item char = chr(number)
+Return a string whose first character is that represented by @code{number}.
+@end table
+
+These functions are inspired by the Pascal language functions
+of the same name. Here is an example:
+
+@example
+@@load "ordchr"
+@dots{}
+printf("The numeric value of 'A' is %d\n", ord("A"))
+printf("The string value of 65 is %s\n", chr(65))
+@end example
+
+@node Extension Sample Readdir
+@subsection Reading Directories
+
+The @code{readdir} extension adds an input parser for directories.
+The usage is as follows:
+
+@cindex @code{readdir} extension
+@example
+@@load "readdir"
+@end example
+
+When this extension is in use, instead of skipping directories named
+on the command line (or with @code{getline}),
+they are read, with each entry returned as a record.
+
+The record consists of three fields. The first two are the inode number and the
+@value{FN}, separated by a forward slash character.
+On systems where the directory entry contains the file type, the record
+has a third field (also separated by a slash) which is a single letter
+indicating the type of the file. The letters and their corresponding file
+types are shown in @ref{table-readdir-file-types}.
+
+@float Table,table-readdir-file-types
+@caption{File types returned by the @code{readdir} extension}
+@multitable @columnfractions .1 .9
+@headitem Letter @tab File Type
+@item @code{b} @tab Block device
+@item @code{c} @tab Character device
+@item @code{d} @tab Directory
+@item @code{f} @tab Regular file
+@item @code{l} @tab Symbolic link
+@item @code{p} @tab Named pipe (FIFO)
+@item @code{s} @tab Socket
+@item @code{u} @tab Anything else (unknown)
+@end multitable
+@end float
+
+On systems without the file type information, the third field is always
+@samp{u}.
+
+@quotation NOTE
+On GNU/Linux systems, there are filesystems that don't support the
+@code{d_type} entry (see the @i{readdir}(3) manual page), and so the file
+type is always @samp{u}. You can use the @code{filefuncs} extension to call
+@code{stat()} in order to get correct type information.
+@end quotation
+
+Here is an example:
+
+@example
+@@load "readdir"
+@dots{}
+BEGIN @{ FS = "/" @}
+@{ print "file name is", $2 @}
+@end example
+
+@node Extension Sample Revout
+@subsection Reversing Output
+
+The @code{revoutput} extension adds a simple output wrapper that reverses
+the characters in each output line. Its main purpose is to show how to
+write an output wrapper, although it may be mildly amusing for the unwary.
+Here is an example:
+
+@cindex @code{revoutput} extension
+@example
+@@load "revoutput"
+
+BEGIN @{
+ REVOUT = 1
+ print "don't panic" > "/dev/stdout"
+@}
+@end example
+
+The output from this program is:
+@samp{cinap t'nod}.
+
+@node Extension Sample Rev2way
+@subsection Two-Way I/O Example
+
+The @code{revtwoway} extension adds a simple two-way processor that
+reverses the characters in each line sent to it for reading back by
+the @command{awk} program. Its main purpose is to show how to write
+a two-way processor, although it may also be mildly amusing.
+The following example shows how to use it:
+
+@cindex @code{revtwoway} extension
+@example
+@@load "revtwoway"
+
+BEGIN @{
+ cmd = "/magic/mirror"
+ print "don't panic" |& cmd
+ cmd |& getline result
+ print result
+ close(cmd)
+@}
+@end example
+
+The output from this program
+@ifnotinfo
+also is:
+@end ifnotinfo
+@ifinfo
+is:
+@end ifinfo
+@samp{cinap t'nod}.
+
+@node Extension Sample Read write array
+@subsection Dumping and Restoring an Array
+
+The @code{rwarray} extension adds two functions,
+named @code{writea()} and @code{reada()}, as follows:
+
+@table @code
+@item @@load "rwarray"
+This is how you load the extension.
+
+@cindex @code{writea()} extension function
+@item ret = writea(file, array)
+This function takes a string argument, which is the name of the file
+to which to dump the array, and the array itself as the second argument.
+@code{writea()} understands arrays of arrays. It returns one on
+success, or zero upon failure.
+
+@cindex @code{reada()} extension function
+@item ret = reada(file, array)
+@code{reada()} is the inverse of @code{writea()};
+it reads the file named as its first argument, filling in
+the array named as the second argument. It clears the array first.
+Here too, the return value is one on success and zero upon failure.
+@end table
+
+The array created by @code{reada()} is identical to that written by
+@code{writea()} in the sense that the contents are the same. However,
+due to implementation issues, the array traversal order of the re-created
+array is likely to be different from that of the original array. As array
+traversal order in @command{awk} is by default undefined, this is (technically)
+not a problem. If you need to guarantee a particular traversal
+order, use the array sorting features in @command{gawk} to do so
+(@pxref{Array Sorting}).
+
+The file contains binary data. All integral values are written in network
+byte order. However, double-precision floating-point values are written
+as native binary data. Thus, arrays containing only string data can
+theoretically be dumped on systems with one byte order and restored on
+systems with a different one, but this has not been tried.
+
+Here is an example:
+
+@example
+@@load "rwarray"
+@dots{}
+ret = writea("arraydump.bin", array)
+@dots{}
+ret = reada("arraydump.bin", array)
+@end example
+
+@node Extension Sample Readfile
+@subsection Reading an Entire File
+
+The @code{readfile} extension adds a single function
+named @code{readfile()}, and an input parser:
+
+@table @code
+@item @@load "readfile"
+This is how you load the extension.
+
+@cindex @code{readfile()} extension function
+@item result = readfile("/some/path")
+The argument is the name of the file to read. The return value is a
+string containing the entire contents of the requested file. Upon error,
+the function returns the empty string and sets @code{ERRNO}.
+
+@item BEGIN @{ PROCINFO["readfile"] = 1 @}
+In addition, the extension adds an input parser that is activated if
+@code{PROCINFO["readfile"]} exists.
+When activated, each input file is returned in its entirety as @code{$0}.
+@code{RT} is set to the null string.
+@end table
+
+Here is an example:
+
+@example
+@@load "readfile"
+@dots{}
+contents = readfile("/path/to/file");
+if (contents == "" && ERRNO != "") @{
+ print("problem reading file", ERRNO) > "/dev/stderr"
+ ...
+@}
+@end example
+
+@node Extension Sample Time
+@subsection Extension Time Functions
+
+The @code{time} extension adds two functions, named @code{gettimeofday()}
+and @code{sleep()}, as follows:
+
+@table @code
+@item @@load "time"
+This is how you load the extension.
+
+@cindex @code{gettimeofday()} extension function
+@item the_time = gettimeofday()
+Return the time in seconds that has elapsed since 1970-01-01 UTC as a
+floating-point value. If the time is unavailable on this platform, return
+@minus{}1 and set @code{ERRNO}. The returned time should have sub-second
+precision, but the actual precision may vary based on the platform.
+If the standard C @code{gettimeofday()} system call is available on this
+platform, then it simply returns the value. Otherwise, if on MS-Windows,
+it tries to use @code{GetSystemTimeAsFileTime()}.
+
+@cindex @code{sleep()} extension function
+@item result = sleep(@var{seconds})
+Attempt to sleep for @var{seconds} seconds. If @var{seconds} is negative,
+or the attempt to sleep fails, return @minus{}1 and set @code{ERRNO}.
+Otherwise, return zero after sleeping for the indicated amount of time.
+Note that @var{seconds} may be a floating-point (non-integral) value.
+Implementation details: depending on platform availability, this function
+tries to use @code{nanosleep()} or @code{select()} to implement the delay.
+@end table
+
+@node Extension Sample API Tests
+@subsection API Tests
+@cindex @code{testext} extension
+
+The @code{testext} extension exercises parts of the extension API that
+are not tested by the other samples. The @file{extension/testext.c}
+file contains both the C code for the extension and @command{awk}
+test code inside C comments that run the tests. The testing framework
+extracts the @command{awk} code and runs the tests. See the source file
+for more information.
+
+@node gawkextlib
+@section The @code{gawkextlib} Project
+@cindex @code{gawkextlib}
+@cindex extensions, where to find
+
+@cindex @code{gawkextlib} project
+The @uref{http://sourceforge.net/projects/gawkextlib/, @code{gawkextlib}}
+project provides a number of @command{gawk} extensions, including one for
+processing XML files. This is the evolution of the original @command{xgawk}
+(XML @command{gawk}) project.
+
+As of this writing, there are six extensions:
+
+@itemize @value{BULLET}
+@item
+GD graphics library extension
+
+@item
+PDF extension
+
+@item
+PostgreSQL extension
+
+@item
+MPFR library extension
+(this provides access to a number of MPFR functions which @command{gawk}'s
+native MPFR support does not)
+
+@item
+Redis extension
+
+@item
+XML parser extension, using the @uref{http://expat.sourceforge.net, Expat}
+XML parsing library
+@end itemize
+
+@cindex @command{git} utility
+You can check out the code for the @code{gawkextlib} project
+using the @uref{http://git-scm.com, Git} distributed source
+code control system. The command is as follows:
+
+@example
+git clone git://git.code.sf.net/p/gawkextlib/code gawkextlib-code
+@end example
+
+@cindex Expat XML parser library
+You will need to have the @uref{http://expat.sourceforge.net, Expat}
+XML parser library installed in order to build and use the XML extension.
+
+In addition, you must have the GNU Autotools installed
+(@uref{http://www.gnu.org/software/autoconf, Autoconf},
+@uref{http://www.gnu.org/software/automake, Automake},
+@uref{http://www.gnu.org/software/libtool, Libtool},
+and
+@uref{http://www.gnu.org/software/gettext, GNU @command{gettext}}).
+
+The simple recipe for building and testing @code{gawkextlib} is as follows.
+First, build and install @command{gawk}:
+
+@example
+cd .../path/to/gawk/code
+./configure --prefix=/tmp/newgawk @ii{Install in /tmp/newgawk for now}
+make && make check @ii{Build and check that all is OK}
+make install @ii{Install gawk}
+@end example
+
+Next, build @code{gawkextlib} and test it:
+
+@example
+cd .../path/to/gawkextlib-code
+./update-autotools @ii{Generate configure, etc.}
+ @ii{You may have to run this command twice}
+./configure --with-gawk=/tmp/newgawk @ii{Configure, point at ``installed'' gawk}
+make && make check @ii{Build and check that all is OK}
+make install @ii{Install the extensions}
+@end example
+
+If you have installed @command{gawk} in the standard way, then you
+will likely not need the @option{--with-gawk} option when configuring
+@code{gawkextlib}. You may also need to use the @command{sudo} utility
+to install both @command{gawk} and @code{gawkextlib}, depending upon
+how your system works.
+
+If you write an extension that you wish to share with other
+@command{gawk} users, consider doing so through the
+@code{gawkextlib} project.
+See the project's website for more information.
+
+@node Extension summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+You can write extensions (sometimes called plug-ins) for @command{gawk}
+in C or C++ using the application programming interface (API) defined
+by the @command{gawk} developers.
+
+@item
+Extensions must have a license compatible with the GNU General Public
+License (GPL), and they must assert that fact by declaring a variable
+named @code{plugin_is_GPL_compatible}.
+
+@item
+Communication between @command{gawk} and an extension is two-way.
+@command{gawk} passes a @code{struct} to the extension which contains
+various data fields and function pointers. The extension can then call
+into @command{gawk} via the supplied function pointers to accomplish
+certain tasks.
+
+@item
+One of these tasks is to ``register'' the name and implementation of
+new @command{awk}-level functions with @command{gawk}. The implementation
+takes the form of a C function pointer with a defined signature.
+By convention, implementation functions are named @code{do_@var{XXXX}()}
+for some @command{awk}-level function @code{@var{XXXX}()}.
+
+@item
+The API is defined in a header file named @file{gawkpi.h}. You must include
+a number of standard header files @emph{before} including it in your source file.
+
+@item
+API function pointers are provided for the following kinds of operations:
+
+@itemize @value{BULLET}
+@item
+Allocating, reallocating, and releasing memory
+
+@item
+Registration functions (you may register
+extension functions,
+exit callbacks,
+a version string,
+input parsers,
+output wrappers,
+and two-way processors)
+
+@item
+Printing fatal, warning, and ``lint'' warning messages
+
+@item
+Updating @code{ERRNO}, or unsetting it
+
+@item
+Accessing parameters, including converting an undefined parameter into
+an array
+
+@item
+Symbol table access (retrieving a global variable, creating one,
+or changing one)
+
+@item
+Creating and releasing cached values; this provides an
+efficient way to use values for multiple variables and
+can be a big performance win
+
+@item
+Manipulating arrays
+(retrieving, adding, deleting, and modifying elements;
+getting the count of elements in an array;
+creating a new array;
+clearing an array;
+and
+flattening an array for easy C style looping over all its indices and elements)
+@end itemize
+
+@item
+The API defines a number of standard data types for representing
+@command{awk} values, array elements, and arrays.
+
+@item
+The API provide convenience functions for constructing values.
+It also provides memory management functions to ensure compatibility
+between memory allocated by @command{gawk} and memory allocated by an
+extension.
+
+@item
+@emph{All} memory passed from @command{gawk} to an extension must be
+treated as read-only by the extension.
+
+@item
+@emph{All} memory passed from an extension to @command{gawk} must come from
+the API's memory allocation functions. @command{gawk} takes responsibility for
+the memory and releases it when appropriate.
+
+@item
+The API provides information about the running version of @command{gawk} so
+that an extension can make sure it is compatible with the @command{gawk}
+that loaded it.
+
+@item
+It is easiest to start a new extension by copying the boilerplate code
+described in this @value{CHAPTER}. Macros in the @file{gawkapi.h} header
+file make this easier to do.
+
+@item
+The @command{gawk} distribution includes a number of small but useful
+sample extensions. The @code{gawkextlib} project includes several more,
+larger, extensions. If you wish to write an extension and contribute it
+to the community of @command{gawk} users, the @code{gawkextlib} project
+is the place to do so.
+
+@end itemize
+
+@c EXCLUDE START
+@node Extension Exercises
+@section Exercises
+
+@enumerate
+@item
+Add functions to implement system calls such as @code{chown()},
+@code{chmod()}, and @code{umask()} to the file operations extension
+presented in @ref{Internal File Ops}.
+
+@item
+(Hard.)
+How would you provide namespaces in @command{gawk}, so that the
+names of functions in different extensions don't conflict with each other?
+If you come up with a really good scheme, contact the @command{gawk}
+maintainer to tell him about it.
+
+@item
+Write a wrapper script that provides an interface similar to
+@samp{sed -i} for the ``inplace'' extension presented in
+@ref{Extension Sample Inplace}.
+
+@end enumerate
+@c EXCLUDE END
+
+@ifnotinfo
+@part @value{PART4}Appendices
+@end ifnotinfo
+
+@ifdocbook
+
+@ifclear FOR_PRINT
+Part IV contains the appendices (including the two licenses that cover
+the @command{gawk} source code and this @value{DOCUMENT}, respectively)
+and the Glossary:
+@end ifclear
+
+@ifset FOR_PRINT
+Part IV contains three appendices, the last of which is the license that
+covers the @command{gawk} source code:
+@end ifset
+
+@itemize @value{BULLET}
+@item
+@ref{Language History}
+
+@item
+@ref{Installation}
+
+@ifclear FOR_PRINT
+@item
+@ref{Notes}
+
+@item
+@ref{Basic Concepts}
+
+@item
+@ref{Glossary}
+@end ifclear
+
+@item
+@ref{Copying}
+
+@ifclear FOR_PRINT
+@item
+@ref{GNU Free Documentation License}
+@end ifclear
+@end itemize
+@end ifdocbook
+
+@node Language History
+@appendix The Evolution of the @command{awk} Language
+
+This @value{DOCUMENT} describes the GNU implementation of @command{awk},
+which follows the POSIX specification. Many longtime @command{awk}
+users learned @command{awk} programming with the original @command{awk}
+implementation in Version 7 Unix. (This implementation was the basis for
+@command{awk} in Berkeley Unix, through 4.3-Reno. Subsequent versions
+of Berkeley Unix, and, for a while, some systems derived from 4.4BSD-Lite, used various
+versions of @command{gawk} for their @command{awk}.) This @value{CHAPTER}
+briefly describes the evolution of the @command{awk} language, with
+cross-references to other parts of the @value{DOCUMENT} where you can
+find more information.
+
+@ifset FOR_PRINT
+To save space, we have omitted
+information on the history of features in @command{gawk} from this
+edition. You can find it in the
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Feature-History.html,
+online documentation}.
+@end ifset
+
+@menu
+* V7/SVR3.1:: The major changes between V7 and System V
+ Release 3.1.
+* SVR4:: Minor changes between System V Releases 3.1
+ and 4.
+* POSIX:: New features from the POSIX standard.
+* BTL:: New features from Brian Kernighan's version of
+ @command{awk}.
+* POSIX/GNU:: The extensions in @command{gawk} not in POSIX
+ @command{awk}.
+* Feature History:: The history of the features in @command{gawk}.
+* Common Extensions:: Common Extensions Summary.
+* Ranges and Locales:: How locales used to affect regexp ranges.
+* Contributors:: The major contributors to @command{gawk}.
+* History summary:: History summary.
+@end menu
+
+@node V7/SVR3.1
+@appendixsec Major Changes Between V7 and SVR3.1
+@cindex @command{awk}, versions of
+@cindex @command{awk}, versions of, changes between V7 and SVR3.1
+
+The @command{awk} language evolved considerably between the release of
+Version 7 Unix (1978) and the new version that was first made generally available in
+System V Release 3.1 (1987). This @value{SECTION} summarizes the changes, with
+cross-references to further details:
+
+@itemize @value{BULLET}
+@item
+The requirement for @samp{;} to separate rules on a line
+(@pxref{Statements/Lines}).
+
+@item
+User-defined functions and the @code{return} statement
+(@pxref{User-defined}).
+
+@item
+The @code{delete} statement (@pxref{Delete}).
+
+@item
+The @code{do}-@code{while} statement
+(@pxref{Do Statement}).
+
+@item
+The built-in functions @code{atan2()}, @code{cos()}, @code{sin()}, @code{rand()}, and
+@code{srand()} (@pxref{Numeric Functions}).
+
+@item
+The built-in functions @code{gsub()}, @code{sub()}, and @code{match()}
+(@pxref{String Functions}).
+
+@item
+The built-in functions @code{close()} and @code{system()}
+(@pxref{I/O Functions}).
+
+@item
+The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
+and @code{SUBSEP} predefined variables (@pxref{Built-in Variables}).
+
+@item
+Assignable @code{$0} (@pxref{Changing Fields}).
+
+@item
+The conditional expression using the ternary operator @samp{?:}
+(@pxref{Conditional Exp}).
+
+@item
+The expression @samp{@var{index-variable} in @var{array}} outside of @code{for}
+statements (@pxref{Reference to Elements}).
+
+@item
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops}).
+
+@item
+C-compatible operator precedence, which breaks some old @command{awk}
+programs (@pxref{Precedence}).
+
+@item
+Regexps as the value of @code{FS}
+(@pxref{Field Separators}) and as the
+third argument to the @code{split()} function
+(@pxref{String Functions}), rather than using only the first character
+of @code{FS}.
+
+@item
+Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
+(@pxref{Computed Regexps}).
+
+@item
+The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
+(@pxref{Escape Sequences}).
+
+@item
+Redirection of input for the @code{getline} function
+(@pxref{Getline}).
+
+@item
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END}).
+
+@item
+Multidimensional arrays
+(@pxref{Multidimensional}).
+@end itemize
+
+@node SVR4
+@appendixsec Changes Between SVR3.1 and SVR4
+
+@cindex @command{awk}, versions of, changes between SVR3.1 and SVR4
+The System V Release 4 (1989) version of Unix @command{awk} added these features
+(some of which originated in @command{gawk}):
+
+@itemize @value{BULLET}
+@item
+The @code{ENVIRON} array (@pxref{Built-in Variables}).
+@c gawk and MKS awk
+
+@item
+Multiple @option{-f} options on the command line
+(@pxref{Options}).
+@c MKS awk
+
+@item
+The @option{-v} option for assigning variables before program execution begins
+(@pxref{Options}).
+@c GNU, Bell Laboratories & MKS together
+
+@item
+The @option{--} signal for terminating command-line options.
+
+@item
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Escape Sequences}).
+@c GNU, for ANSI C compat
+
+@item
+A defined return value for the @code{srand()} built-in function
+(@pxref{Numeric Functions}).
+
+@item
+The @code{toupper()} and @code{tolower()} built-in string functions
+for case translation
+(@pxref{String Functions}).
+
+@item
+A cleaner specification for the @samp{%c} format-control letter in the
+@code{printf} function
+(@pxref{Control Letters}).
+
+@item
+The ability to dynamically pass the field width and precision (@code{"%*.*d"})
+in the argument list of @code{printf} and @code{sprintf()}
+(@pxref{Control Letters}).
+
+@item
+The use of regexp constants, such as @code{/foo/}, as expressions, where
+they are equivalent to using the matching operator, as in @samp{$0 ~ /foo/}
+(@pxref{Using Constant Regexps}).
+
+@item
+Processing of escape sequences inside command-line variable assignments
+(@pxref{Assignment Options}).
+@end itemize
+
+@node POSIX
+@appendixsec Changes Between SVR4 and POSIX @command{awk}
+@cindex @command{awk}, versions of, changes between SVR4 and POSIX @command{awk}
+@cindex POSIX @command{awk}, changes in @command{awk} versions
+
+The POSIX Command Language and Utilities standard for @command{awk} (1992)
+introduced the following changes into the language:
+
+@itemize @value{BULLET}
+@item
+The use of @option{-W} for implementation-specific options
+(@pxref{Options}).
+
+@item
+The use of @code{CONVFMT} for controlling the conversion of numbers
+to strings (@pxref{Conversion}).
+
+@item
+The concept of a numeric string and tighter comparison rules to go
+with it (@pxref{Typing and Comparison}).
+
+@item
+The use of predefined variables as function parameter names is forbidden
+(@pxref{Definition Syntax}).
+
+@item
+More complete documentation of many of the previously undocumented
+features of the language.
+@end itemize
+
+In 2012, a number of extensions that had been commonly available for
+many years were finally added to POSIX. They are:
+
+@itemize @value{BULLET}
+@item
+The @code{fflush()} built-in function for flushing buffered output
+(@pxref{I/O Functions}).
+
+@item
+The @code{nextfile} statement
+(@pxref{Nextfile Statement}).
+
+@item
+The ability to delete all of an array at once with @samp{delete @var{array}}
+(@pxref{Delete}).
+
+@end itemize
+
+@DBXREF{Common Extensions} for a list of common extensions
+not permitted by the POSIX standard.
+
+The 2008 POSIX standard can be found online at
+@url{http://www.opengroup.org/onlinepubs/9699919799/}.
+
+
+@node BTL
+@appendixsec Extensions in Brian Kernighan's @command{awk}
+
+@cindex @command{awk}, versions of, See Also Brian Kernighan's @command{awk}
+@cindex extensions, Brian Kernighan's @command{awk}
+@cindex Brian Kernighan's @command{awk}, extensions
+@cindex Kernighan, Brian
+Brian Kernighan
+has made his version available via his home page
+(@pxref{Other Versions}).
+
+This @value{SECTION} describes common extensions that
+originally appeared in his version of @command{awk}:
+
+@itemize @value{BULLET}
+@item
+The @samp{**} and @samp{**=} operators
+(@pxref{Arithmetic Ops}
+and
+@ref{Assignment Ops}).
+
+@item
+The use of @code{func} as an abbreviation for @code{function}
+(@pxref{Definition Syntax}).
+
+@item
+The @code{fflush()} built-in function for flushing buffered output
+(@pxref{I/O Functions}).
+
+@ignore
+@item
+The @code{SYMTAB} array, that allows access to @command{awk}'s internal symbol
+table. This feature was never documented for his @command{awk}, largely because
+it is somewhat shakily implemented. For instance, you cannot access arrays
+or array elements through it.
+@end ignore
+@end itemize
+
+@DBXREF{Common Extensions} for a full list of the extensions
+available in his @command{awk}.
+
+@node POSIX/GNU
+@appendixsec Extensions in @command{gawk} Not in POSIX @command{awk}
+
+@cindex compatibility mode (@command{gawk}), extensions
+@cindex extensions, in @command{gawk}, not in POSIX @command{awk}
+@cindex POSIX, @command{gawk} extensions not included in
+The GNU implementation, @command{gawk}, adds a large number of features.
+They can all be disabled with either the @option{--traditional} or
+@option{--posix} options
+(@pxref{Options}).
+
+A number of features have come and gone over the years. This @value{SECTION}
+summarizes the additional features over POSIX @command{awk} that are
+in the current version of @command{gawk}.
+
+@itemize @value{BULLET}
+
+@item
+Additional predefined variables:
+
+@itemize @value{MINUS}
+@item
+The
+@code{ARGIND}
+@code{BINMODE},
+@code{ERRNO},
+@code{FIELDWIDTHS},
+@code{FPAT},
+@code{IGNORECASE},
+@code{LINT},
+@code{PROCINFO},
+@code{RT},
+and
+@code{TEXTDOMAIN}
+variables
+(@pxref{Built-in Variables}).
+@end itemize
+
+@item
+Special files in I/O redirections:
+
+@itemize @value{MINUS}
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and
+@file{/dev/fd/@var{N}} special @value{FN}s
+(@pxref{Special Files}).
+
+@item
+The @file{/inet}, @file{/inet4}, and @samp{/inet6} special files for
+TCP/IP networking using @samp{|&} to specify which version of the
+IP protocol to use
+(@pxref{TCP/IP Networking}).
+@end itemize
+
+@item
+Changes and/or additions to the language:
+
+@itemize @value{MINUS}
+@item
+The @samp{\x} escape sequence
+(@pxref{Escape Sequences}).
+
+@item
+Full support for both POSIX and GNU regexps
+(@pxref{Regexp}).
+
+@item
+The ability for @code{FS} and for the third
+argument to @code{split()} to be null strings
+(@pxref{Single Character Fields}).
+
+@item
+The ability for @code{RS} to be a regexp
+(@pxref{Records}).
+
+@item
+The ability to use octal and hexadecimal constants in @command{awk}
+program source code
+(@pxref{Nondecimal-numbers}).
+
+@item
+The @samp{|&} operator for two-way I/O to a coprocess
+(@pxref{Two-way I/O}).
+
+@item
+Indirect function calls
+(@pxref{Indirect Calls}).
+
+@item
+Directories on the command line produce a warning and are skipped
+(@pxref{Command-line directories}).
+@end itemize
+
+@item
+New keywords:
+
+@itemize @value{MINUS}
+@item
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}).
+
+@item
+The @code{switch} statement
+(@pxref{Switch Statement}).
+@end itemize
+
+@item
+Changes to standard @command{awk} functions:
+
+@itemize @value{MINUS}
+@item
+The optional second argument to @code{close()} that allows closing one end
+of a two-way pipe to a coprocess
+(@pxref{Two-way I/O}).
+
+@item
+POSIX compliance for @code{gsub()} and @code{sub()} with @option{--posix}.
+
+@item
+The @code{length()} function accepts an array argument
+and returns the number of elements in the array
+(@pxref{String Functions}).
+
+@item
+The optional third argument to the @code{match()} function
+for capturing text-matching subexpressions within a regexp
+(@pxref{String Functions}).
+
+@item
+Positional specifiers in @code{printf} formats for
+making translations easier
+(@pxref{Printf Ordering}).
+
+@item
+The @code{split()} function's additional optional fourth
+argument which is an array to hold the text of the field separators
+(@pxref{String Functions}).
+@end itemize
+
+@item
+Additional functions only in @command{gawk}:
+
+@itemize @value{MINUS}
+@item
+The @code{gensub()}, @code{patsplit()}, and @code{strtonum()} functions
+for more powerful text manipulation
+(@pxref{String Functions}).
+
+@item
+The @code{asort()} and @code{asorti()} functions for sorting arrays
+(@pxref{Array Sorting}).
+
+@item
+The @code{mktime()}, @code{systime()}, and @code{strftime()}
+functions for working with timestamps
+(@pxref{Time Functions}).
+
+@item
+The
+@code{and()},
+@code{compl()},
+@code{lshift()},
+@code{or()},
+@code{rshift()},
+and
+@code{xor()}
+functions for bit manipulation
+(@pxref{Bitwise Functions}).
+@c In 4.1, and(), or() and xor() grew the ability to take > 2 arguments
+
+@item
+The @code{isarray()} function to check if a variable is an array or not
+(@pxref{Type Functions}).
+
+@item
+The @code{bindtextdomain()}, @code{dcgettext()} and @code{dcngettext()}
+functions for internationalization
+(@pxref{Programmer i18n}).
+@end itemize
+
+@item
+Changes and/or additions in the command-line options:
+
+@itemize @value{MINUS}
+@item
+The @env{AWKPATH} environment variable for specifying a path search for
+the @option{-f} command-line option
+(@pxref{Options}).
+
+@item
+The @env{AWKLIBPATH} environment variable for specifying a path search for
+the @option{-l} command-line option
+(@pxref{Options}).
+
+@item
+The
+@option{-b},
+@option{-c},
+@option{-C},
+@option{-d},
+@option{-D},
+@option{-e},
+@option{-E},
+@option{-g},
+@option{-h},
+@option{-i},
+@option{-l},
+@option{-L},
+@option{-M},
+@option{-n},
+@option{-N},
+@option{-o},
+@option{-O},
+@option{-p},
+@option{-P},
+@option{-r},
+@option{-S},
+@option{-t},
+and
+@option{-V}
+short options. Also, the
+ability to use GNU-style long-named options that start with @option{--}
+and the
+@option{--assign},
+@option{--bignum},
+@option{--characters-as-bytes},
+@option{--copyright},
+@option{--debug},
+@option{--dump-variables},
+@option{--exec},
+@option{--field-separator},
+@option{--file},
+@option{--gen-pot},
+@option{--help},
+@option{--include},
+@option{--lint},
+@option{--lint-old},
+@option{--load},
+@option{--non-decimal-data},
+@option{--optimize},
+@option{--posix},
+@option{--pretty-print},
+@option{--profile},
+@option{--re-interval},
+@option{--sandbox},
+@option{--source},
+@option{--traditional},
+@option{--use-lc-numeric},
+and
+@option{--version}
+long options
+(@pxref{Options}).
+@end itemize
+
+@c new ports
+
+@item
+Support for the following obsolete systems was removed from the code
+and the documentation for @command{gawk} @value{PVERSION} 4.0:
+
+@c nested table
+@itemize @value{MINUS}
+@item
+Amiga
+
+@item
+Atari
+
+@item
+BeOS
+
+@item
+Cray
+
+@item
+MIPS RiscOS
+
+@item
+MS-DOS with the Microsoft Compiler
+
+@item
+MS-Windows with the Microsoft Compiler
+
+@item
+NeXT
+
+@item
+SunOS 3.x, Sun 386 (Road Runner)
+
+@item
+Tandem (non-POSIX)
+
+@item
+Prestandard VAX C compiler for VAX/VMS
+
+@item
+GCC for VAX and Alpha has not been tested for a while.
+
+@end itemize
+
+@item
+Support for the following obsolete systems was removed from the code
+for @command{gawk} @value{PVERSION} 4.1:
+
+@c nested table
+@itemize @value{MINUS}
+@item
+Ultrix
+@end itemize
+
+@item
+@c FIXME: Verify the version here.
+Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2.
+
+@end itemize
+
+@c XXX ADD MORE STUFF HERE
+
+
+@c This does not need to be in the formal book.
+@ifclear FOR_PRINT
+@node Feature History
+@appendixsec History of @command{gawk} Features
+
+@ignore
+See the thread:
+https://groups.google.com/forum/#!topic/comp.lang.awk/SAUiRuff30c
+This motivated me to add this section.
+@end ignore
+
+@ignore
+I've tried to follow this general order, esp.@: for the 3.0 and 3.1 sections:
+ variables
+ special files
+ language changes (e.g., hex constants)
+ differences in standard awk functions
+ new gawk functions
+ new keywords
+ new command-line options
+ behavioral changes
+ new ports
+Within each category, be alphabetical.
+@end ignore
+
+This @value{SECTION} describes the features in @command{gawk}
+over and above those in POSIX @command{awk},
+in the order they were added to @command{gawk}.
+
+Version 2.10 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+@item
+The @env{AWKPATH} environment variable for specifying a path search for
+the @option{-f} command-line option
+(@pxref{Options}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity}).
+
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and
+@file{/dev/fd/@var{N}} special @value{FN}s
+(@pxref{Special Files}).
+@end itemize
+
+Version 2.13 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+@item
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size}).
+
+@item
+The @code{systime()} and @code{strftime()} built-in functions for obtaining
+and printing timestamps
+(@pxref{Time Functions}).
+
+@item
+Additional command-line options
+(@pxref{Options}):
+
+@itemize @value{MINUS}
+@item
+The @option{-W lint} option to provide error and portability checking
+for both the source code and at runtime.
+
+@item
+The @option{-W compat} option to turn off the GNU extensions.
+
+@item
+The @option{-W posix} option for full POSIX compliance.
+@end itemize
+@end itemize
+
+Version 2.14 of @command{gawk} introduced the following feature:
+
+@itemize @value{BULLET}
+@item
+The @code{next file} statement for skipping to the next @value{DF}
+(@pxref{Nextfile Statement}).
+@end itemize
+
+Version 2.15 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+@item
+New variables (@pxref{Built-in Variables}):
+
+@itemize @value{MINUS}
+@item
+@code{ARGIND}, which tracks the movement of @code{FILENAME}
+through @code{ARGV}.
+
+@item
+@code{ERRNO}, which contains the system error message when
+@code{getline} returns @minus{}1 or @code{close()} fails.
+@end itemize
+
+@item
+The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and
+@file{/dev/user} special @value{FN}s. These have since been removed.
+
+@item
+The ability to delete all of an array at once with @samp{delete @var{array}}
+(@pxref{Delete}).
+
+@item
+Command-line option changes
+(@pxref{Options}):
+
+@itemize @value{MINUS}
+@item
+The ability to use GNU-style long-named options that start with @option{--}.
+
+@item
+The @option{--source} option for mixing command-line and library-file
+source code.
+@end itemize
+@end itemize
+
+Version 3.0 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+@item
+New or changed variables:
+
+@itemize @value{MINUS}
+@item
+@code{IGNORECASE} changed, now applying to string comparison as well
+as regexp operations
+(@pxref{Case-sensitivity}).
+
+@item
+@code{RT}, which contains the input text that matched @code{RS}
+(@pxref{Records}).
+@end itemize
+
+@item
+Full support for both POSIX and GNU regexps
+(@pxref{Regexp}).
+
+@item
+The @code{gensub()} function for more powerful text manipulation
+(@pxref{String Functions}).
+
+@item
+The @code{strftime()} function acquired a default time format,
+allowing it to be called with no arguments
+(@pxref{Time Functions}).
+
+@item
+The ability for @code{FS} and for the third
+argument to @code{split()} to be null strings
+(@pxref{Single Character Fields}).
+
+@item
+The ability for @code{RS} to be a regexp
+(@pxref{Records}).
+
+@item
+The @code{next file} statement became @code{nextfile}
+(@pxref{Nextfile Statement}).
+
+@item
+The @code{fflush()} function from
+BWK @command{awk}
+(then at Bell Laboratories;
+@pxref{I/O Functions}).
+
+@item
+New command-line options:
+
+@itemize @value{MINUS}
+@item
+The @option{--lint-old} option to
+warn about constructs that are not available in
+the original Version 7 Unix version of @command{awk}
+(@pxref{V7/SVR3.1}).
+
+@item
+The @option{-m} option from BWK @command{awk}. (Brian was
+still at Bell Laboratories at the time.) This was later removed from
+both his @command{awk} and from @command{gawk}.
+
+@item
+The @option{--re-interval} option to provide interval expressions in regexps
+(@pxref{Regexp Operators}).
+
+@item
+The @option{--traditional} option was added as a better name for
+@option{--compat} (@pxref{Options}).
+@end itemize
+
+@item
+The use of GNU Autoconf to control the configuration process
+(@pxref{Quick Installation}).
+
+@item
+Amiga support.
+This has since been removed.
+
+@end itemize
+
+Version 3.1 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+@item
+New variables
+(@pxref{Built-in Variables}):
+
+@itemize @value{MINUS}
+@item
+@code{BINMODE}, for non-POSIX systems,
+which allows binary I/O for input and/or output files
+(@pxref{PC Using}).
+
+@item
+@code{LINT}, which dynamically controls lint warnings.
+
+@item
+@code{PROCINFO}, an array for providing process-related information.
+
+@item
+@code{TEXTDOMAIN}, for setting an application's internationalization text domain
+(@pxref{Internationalization}).
+@end itemize
+
+@item
+The ability to use octal and hexadecimal constants in @command{awk}
+program source code
+(@pxref{Nondecimal-numbers}).
+
+@item
+The @samp{|&} operator for two-way I/O to a coprocess
+(@pxref{Two-way I/O}).
+
+@item
+The @file{/inet} special files for TCP/IP networking using @samp{|&}
+(@pxref{TCP/IP Networking}).
+
+@item
+The optional second argument to @code{close()} that allows closing one end
+of a two-way pipe to a coprocess
+(@pxref{Two-way I/O}).
+
+@item
+The optional third argument to the @code{match()} function
+for capturing text-matching subexpressions within a regexp
+(@pxref{String Functions}).
+
+@item
+Positional specifiers in @code{printf} formats for
+making translations easier
+(@pxref{Printf Ordering}).
+
+@item
+A number of new built-in functions:
+
+@itemize @value{MINUS}
+@item
+The @code{asort()} and @code{asorti()} functions for sorting arrays
+(@pxref{Array Sorting}).
+
+@item
+The @code{bindtextdomain()}, @code{dcgettext()} and @code{dcngettext()} functions
+for internationalization
+(@pxref{Programmer i18n}).
+
+@item
+The @code{extension()} function and the ability to add
+new built-in functions dynamically
+(@pxref{Dynamic Extensions}).
+
+@item
+The @code{mktime()} function for creating timestamps
+(@pxref{Time Functions}).
+
+@item
+The @code{and()}, @code{or()}, @code{xor()}, @code{compl()},
+@code{lshift()}, @code{rshift()}, and @code{strtonum()} functions
+(@pxref{Bitwise Functions}).
+@end itemize
+
+@item
+@cindex @code{next file} statement
+The support for @samp{next file} as two words was removed completely
+(@pxref{Nextfile Statement}).
+
+@item
+Additional command-line options
+(@pxref{Options}):
+
+@itemize @value{MINUS}
+@item
+The @option{--dump-variables} option to print a list of all global variables.
+
+@item
+The @option{--exec} option, for use in CGI scripts.
+
+@item
+The @option{--gen-po} command-line option and the use of a leading
+underscore to mark strings that should be translated
+(@pxref{String Extraction}).
+
+@item
+The @option{--non-decimal-data} option to allow non-decimal
+input data
+(@pxref{Nondecimal Data}).
+
+@item
+The @option{--profile} option and @command{pgawk}, the
+profiling version of @command{gawk}, for producing execution
+profiles of @command{awk} programs
+(@pxref{Profiling}).
+
+@item
+The @option{--use-lc-numeric} option to force @command{gawk}
+to use the locale's decimal point for parsing input data
+(@pxref{Conversion}).
+@end itemize
+
+@item
+The use of GNU Automake to help in standardizing the configuration process
+(@pxref{Quick Installation}).
+
+@item
+The use of GNU @command{gettext} for @command{gawk}'s own message output
+(@pxref{Gawk I18N}).
+
+@item
+BeOS support. This was later removed.
+
+@item
+Tandem support. This was later removed.
+
+@item
+The Atari port became officially unsupported and was
+later removed entirely.
+
+@item
+The source code changed to use ISO C standard-style function definitions.
+
+@item
+POSIX compliance for @code{sub()} and @code{gsub()}
+(@pxref{Gory Details}).
+
+@item
+The @code{length()} function was extended to accept an array argument
+and return the number of elements in the array
+(@pxref{String Functions}).
+
+@item
+The @code{strftime()} function acquired a third argument to
+enable printing times as UTC
+(@pxref{Time Functions}).
+@end itemize
+
+Version 4.0 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+
+@item
+Variable additions:
+
+@itemize @value{MINUS}
+@item
+@code{FPAT}, which allows you to specify a regexp that matches
+the fields, instead of matching the field separator
+(@pxref{Splitting By Content}).
+
+@item
+If @code{PROCINFO["sorted_in"]} exists, @samp{for(iggy in foo)} loops sort the
+indices before looping over them. The value of this element
+provides control over how the indices are sorted before the loop
+traversal starts
+(@pxref{Controlling Scanning}).
+
+@item
+@code{PROCINFO["strftime"]}, which holds
+the default format for @code{strftime()}
+(@pxref{Time Functions}).
+@end itemize
+
+@item
+The special files @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}
+and @file{/dev/user} were removed.
+
+@item
+Support for IPv6 was added via the @file{/inet6} special file.
+@file{/inet4} forces IPv4 and @file{/inet} chooses the system
+default, which is probably IPv4
+(@pxref{TCP/IP Networking}).
+
+@item
+The use of @samp{\s} and @samp{\S} escape sequences in regular expressions
+(@pxref{GNU Regexp Operators}).
+
+@item
+Interval expressions became part of default regular expressions
+(@pxref{Regexp Operators}).
+
+@item
+POSIX character classes work even with @option{--traditional}
+(@pxref{Regexp Operators}).
+
+@item
+@code{break} and @code{continue} became invalid outside a loop,
+even with @option{--traditional}
+(@pxref{Break Statement}, and also see
+@ref{Continue Statement}).
+
+@item
+@code{fflush()}, @code{nextfile}, and @samp{delete @var{array}}
+are allowed if @option{--posix} or @option{--traditional}, since they
+are all now part of POSIX.
+
+@item
+An optional third argument to
+@code{asort()} and @code{asorti()}, specifying how to sort
+(@pxref{String Functions}).
+
+@item
+The behavior of @code{fflush()} changed to match BWK @command{awk}
+and for POSIX; now both @samp{fflush()} and @samp{fflush("")}
+flush all open output redirections
+(@pxref{I/O Functions}).
+
+@item
+The @code{isarray()}
+function which distinguishes if an item is an array
+or not, to make it possible to traverse arrays of arrays
+(@pxref{Type Functions}).
+
+@item
+The @code{patsplit()}
+function which gives the same capability as @code{FPAT}, for splitting
+(@pxref{String Functions}).
+
+@item
+An optional fourth argument to the @code{split()} function,
+which is an array to hold the values of the separators
+(@pxref{String Functions}).
+
+@item
+Arrays of arrays
+(@pxref{Arrays of Arrays}).
+
+@item
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}).
+
+@item
+Indirect function calls
+(@pxref{Indirect Calls}).
+
+@item
+@code{switch} / @code{case} are enabled by default
+(@pxref{Switch Statement}).
+
+@item
+Command-line option changes
+(@pxref{Options}):
+
+@itemize @value{MINUS}
+@item
+The @option{-b} and @option{--characters-as-bytes} options
+which prevent @command{gawk} from treating input as a multibyte string.
+
+@item
+The redundant @option{--compat}, @option{--copyleft}, and @option{--usage}
+long options were removed.
+
+@item
+The @option{--gen-po} option was finally renamed to the correct @option{--gen-pot}.
+
+@item
+The @option{--sandbox} option which disables certain features.
+
+@item
+All long options acquired corresponding short options, for use in @samp{#!} scripts.
+@end itemize
+
+@item
+Directories named on the command line now produce a warning, not a fatal
+error, unless @option{--posix} or @option{--traditional} are used
+(@pxref{Command-line directories}).
+
+@item
+The @command{gawk} internals were rewritten, bringing the @command{dgawk}
+debugger and possibly improved performance
+(@pxref{Debugger}).
+
+@item
+Per the GNU Coding Standards, dynamic extensions must now define
+a global symbol indicating that they are GPL-compatible
+(@pxref{Plugin License}).
+
+@item
+In POSIX mode, string comparisons use @code{strcoll()} / @code{wcscoll()}
+(@pxref{POSIX String Comparison}).
+
+@item
+The option for raw sockets was removed, since it was never implemented
+(@pxref{TCP/IP Networking}).
+
+@item
+Ranges of the form @samp{[d-h]} are treated as if they were in the
+C locale, no matter what kind of regexp is being used, and even if
+@option{--posix}
+(@pxref{Ranges and Locales}).
+
+@item
+Support was removed for the following systems:
+
+@itemize @value{MINUS}
+@item
+Atari
+
+@item
+Amiga
+
+@item
+BeOS
+
+@item
+Cray
+
+@item
+MIPS RiscOS
+
+@item
+MS-DOS with Microsoft Compiler
+
+@item
+MS-Windows with Microsoft Compiler
+
+@item
+NeXT
+
+@item
+SunOS 3.x, Sun 386 (Road Runner)
+
+@item
+Tandem (non-POSIX)
+
+@item
+Prestandard VAX C compiler for VAX/VMS
+@end itemize
+@end itemize
+
+Version 4.1 of @command{gawk} introduced the following features:
+
+@itemize @value{BULLET}
+
+@item
+Three new arrays:
+@code{SYMTAB}, @code{FUNCTAB}, and @code{PROCINFO["identifiers"]}
+(@pxref{Auto-set}).
+
+@item
+The three executables @command{gawk}, @command{pgawk}, and @command{dgawk}, were merged into
+one, named just @command{gawk}. As a result the command-line options changed.
+
+@item
+Command-line option changes
+(@pxref{Options}):
+
+@itemize @value{MINUS}
+@item
+The @option{-D} option invokes the debugger.
+
+@item
+The @option{-i} and @option{--include} options
+load @command{awk} library files.
+
+@item
+The @option{-l} and @option{--load} options load compiled dynamic extensions.
+
+@item
+The @option{-M} and @option{--bignum} options enable MPFR.
+
+@item
+The @option{-o} option only does pretty-printing.
+
+@item
+The @option{-p} option is used for profiling.
+
+@item
+The @option{-R} option was removed.
+@end itemize
+
+@item
+Support for high precision arithmetic with MPFR.
+(@pxref{Arbitrary Precision Arithmetic}).
+
+@item
+The @code{and()}, @code{or()} and @code{xor()} functions
+changed to allow any number of arguments,
+with a minimum of two
+(@pxref{Bitwise Functions}).
+
+@item
+The dynamic extension interface was completely redone
+(@pxref{Dynamic Extensions}).
+
+@end itemize
+
+@c XXX ADD MORE STUFF HERE
+@end ifclear
+
+@node Common Extensions
+@appendixsec Common Extensions Summary
+
+@cindex extensions, Brian Kernighan's @command{awk}
+@cindex extensions, @command{mawk}
+The following table summarizes the common extensions supported
+by @command{gawk}, Brian Kernighan's @command{awk}, and @command{mawk},
+the three most widely used freely available versions of @command{awk}
+(@pxref{Other Versions}).
+
+@multitable {@file{/dev/stderr} special file} {BWK Awk} {Mawk} {GNU Awk} {Now standard}
+@headitem Feature @tab BWK Awk @tab Mawk @tab GNU Awk @tab Now standard
+@item @samp{\x} Escape sequence @tab X @tab X @tab X @tab
+@item @code{FS} as null string @tab X @tab X @tab X @tab
+@item @file{/dev/stdin} special file @tab X @tab X @tab X @tab
+@item @file{/dev/stdout} special file @tab X @tab X @tab X @tab
+@item @file{/dev/stderr} special file @tab X @tab X @tab X @tab
+@item @code{delete} without subscript @tab X @tab X @tab X @tab X
+@item @code{fflush()} function @tab X @tab X @tab X @tab X
+@item @code{length()} of an array @tab X @tab X @tab X @tab
+@item @code{nextfile} statement @tab X @tab X @tab X @tab X
+@item @code{**} and @code{**=} operators @tab X @tab @tab X @tab
+@item @code{func} keyword @tab X @tab @tab X @tab
+@item @code{BINMODE} variable @tab @tab X @tab X @tab
+@item @code{RS} as regexp @tab @tab X @tab X @tab
+@item Time-related functions @tab @tab X @tab X @tab
+@end multitable
+
+@node Ranges and Locales
+@appendixsec Regexp Ranges and Locales: A Long Sad Story
+
+This @value{SECTION} describes the confusing history of ranges within
+regular expressions and their interactions with locales, and how this
+affected different versions of @command{gawk}.
+
+The original Unix tools that worked with regular expressions defined
+character ranges (such as @samp{[a-z]}) to match any character between
+the first character in the range and the last character in the range,
+inclusive. Ordering was based on the numeric value of each character
+in the machine's native character set. Thus, on ASCII-based systems,
+@samp{[a-z]} matched all the lowercase letters, and only the lowercase
+letters, as the numeric values for the letters from @samp{a} through
+@samp{z} were contiguous. (On an EBCDIC system, the range @samp{[a-z]}
+includes additional, non-alphabetic characters as well.)
+
+Almost all introductory Unix literature explained range expressions
+as working in this fashion, and in particular, would teach that the
+``correct'' way to match lowercase letters was with @samp{[a-z]}, and
+that @samp{[A-Z]} was the ``correct'' way to match uppercase letters.
+And indeed, this was true.@footnote{And Life was good.}
+
+The 1992 POSIX standard introduced the idea of locales (@pxref{Locales}).
+Because many locales include other letters besides the plain 26
+letters of the English alphabet, the POSIX standard added
+character classes (@pxref{Bracket Expressions}) as a way to match
+different kinds of characters besides the traditional ones in the ASCII
+character set.
+
+However, the standard @emph{changed} the interpretation of range expressions.
+In the @code{"C"} and @code{"POSIX"} locales, a range expression like
+@samp{[a-dx-z]} is still equivalent to @samp{[abcdxyz]}, as in ASCII.
+But outside those locales, the ordering was defined to be based on
+@dfn{collation order}.
+
+What does that mean?
+In many locales, @samp{A} and @samp{a} are both less than @samp{B}.
+In other words, these locales sort characters in dictionary order,
+and @samp{[a-dx-z]} is typically not equivalent to @samp{[abcdxyz]};
+instead it might be equivalent to @samp{[ABCXYabcdxyz]}, for example.
+
+This point needs to be emphasized: much literature teaches that you should
+use @samp{[a-z]} to match a lowercase character. But on systems with
+non-ASCII locales, this also matches all of the uppercase characters
+except @samp{A} or @samp{Z}! This was a continuous cause of confusion, even well
+into the twenty-first century.
+
+To demonstrate these issues, the following example uses the @code{sub()}
+function, which does text replacement (@pxref{String Functions}). Here,
+the intent is to remove trailing uppercase characters:
+
+@example
+$ @kbd{echo something1234abc | gawk-3.1.8 '@{ sub("[A-Z]*$", ""); print @}'}
+@print{} something1234a
+@end example
+
+@noindent
+This output is unexpected, as the @samp{bc} at the end of
+@samp{something1234abc} should not normally match @samp{[A-Z]*}.
+This result is due to the locale setting (and thus you may not see
+it on your system).
+
+@cindex Unicode
+Similar considerations apply to other ranges. For example, @samp{["-/]}
+is perfectly valid in ASCII, but is not valid in many Unicode locales,
+such as @code{en_US.UTF-8}.
+
+Early versions of @command{gawk} used regexp matching code that was not
+locale aware, so ranges had their traditional interpretation.
+
+When @command{gawk} switched to using locale-aware regexp matchers,
+the problems began; especially as both GNU/Linux and commercial Unix
+vendors started implementing non-ASCII locales, @emph{and making them
+the default}. Perhaps the most frequently asked question became something
+like ``why does @samp{[A-Z]} match lowercase letters?!?''
+
+@cindex Berry, Karl
+This situation existed for close to 10 years, if not more, and
+the @command{gawk} maintainer grew weary of trying to explain that
+@command{gawk} was being nicely standards compliant, and that the issue
+was in the user's locale. During the development of @value{PVERSION} 4.0,
+he modified @command{gawk} to always treat ranges in the original,
+pre-POSIX fashion, unless @option{--posix} was used (@pxref{Options}).@footnote{And
+thus was born the Campaign for Rational Range Interpretation (or
+RRI). A number of GNU tools have either implemented this change,
+or will soon. Thanks to Karl Berry for coining the phrase ``Rational
+Range Interpretation.''}
+
+Fortunately, shortly before the final release of @command{gawk} 4.0,
+the maintainer learned that the 2008 standard had changed the
+definition of ranges, such that outside the @code{"C"} and @code{"POSIX"}
+locales, the meaning of range expressions was @emph{undefined}.@footnote{See
+@uref{http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05, the standard}
+and
+@uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
+
+By using this lovely technical term, the standard gives license
+to implementors to implement ranges in whatever way they choose.
+The @command{gawk} maintainer chose to apply the pre-POSIX meaning in all
+cases: the default regexp matching; with @option{--traditional} and with
+@option{--posix}; in all cases, @command{gawk} remains POSIX compliant.
+
+@node Contributors
+@appendixsec Major Contributors to @command{gawk}
+@cindex @command{gawk}, list of contributors to
+@quotation
+@i{Always give credit where credit is due.}
+@author Anonymous
+@end quotation
+
+This @value{SECTION} names the major contributors to @command{gawk}
+and/or this @value{DOCUMENT}, in approximate chronological order:
+
+@itemize @value{BULLET}
+@item
+@cindex Aho, Alfred
+@cindex Weinberger, Peter
+@cindex Kernighan, Brian
+Dr.@: Alfred V.@: Aho,
+Dr.@: Peter J.@: Weinberger, and
+Dr.@: Brian W.@: Kernighan, all of Bell Laboratories,
+designed and implemented Unix @command{awk},
+from which @command{gawk} gets the majority of its feature set.
+
+@item
+@cindex Rubin, Paul
+Paul Rubin
+did the initial design and implementation in 1986, and wrote
+the first draft (around 40 pages) of this @value{DOCUMENT}.
+
+@item
+@cindex Fenlason, Jay
+Jay Fenlason
+finished the initial implementation.
+
+@item
+@cindex Close, Diane
+Diane Close
+revised the first draft of this @value{DOCUMENT}, bringing it
+to around 90 pages.
+
+@item
+@cindex Stallman, Richard
+Richard Stallman
+helped finish the implementation and the initial draft of this
+@value{DOCUMENT}.
+He is also the founder of the FSF and the GNU project.
+
+@item
+@cindex Woods, John
+John Woods
+contributed parts of the code (mostly fixes) in
+the initial version of @command{gawk}.
+
+@item
+@cindex Trueman, David
+In 1988,
+David Trueman
+took over primary maintenance of @command{gawk},
+making it compatible with ``new'' @command{awk}, and
+greatly improving its performance.
+
+@item
+@cindex Kwok, Conrad
+@cindex Garfinkle, Scott
+@cindex Williams, Kent
+Conrad Kwok,
+Scott Garfinkle,
+and
+Kent Williams
+did the initial ports to MS-DOS with various versions of MSC.
+
+@item
+@cindex Rankin, Pat
+Pat Rankin
+provided the VMS port and its documentation.
+
+@item
+@cindex Peterson, Hal
+Hal Peterson
+provided help in porting @command{gawk} to Cray systems.
+(This is no longer supported.)
+
+@item
+@cindex Rommel, Kai Uwe
+Kai Uwe Rommel
+provided the initial port to OS/2 and its documentation.
+
+@item
+@cindex Jaegermann, Michal
+Michal Jaegermann
+provided the port to Atari systems and its documentation.
+(This port is no longer supported.)
+He continues to provide portability checking,
+and has done a lot of work to make sure @command{gawk}
+works on non-32-bit systems.
+
+@item
+@cindex Fish, Fred
+Fred Fish
+provided the port to Amiga systems and its documentation.
+(With Fred's sad passing, this is no longer supported.)
+
+@item
+@cindex Deifik, Scott
+Scott Deifik
+currently maintains the MS-DOS port using DJGPP.
+
+@item
+@cindex Zaretskii, Eli
+Eli Zaretskii
+currently maintains the MS-Windows port using MinGW.
+
+
+@item
+@cindex Grigera, Juan
+Juan Grigera
+provided a port to Windows32 systems.
+(This is no longer supported.)
+
+@item
+@cindex Hankerson, Darrel
+For many years,
+Dr.@: Darrel Hankerson
+acted as coordinator for the various ports to different PC platforms
+and created binary distributions for various PC operating systems.
+He was also instrumental in keeping the documentation up to date for
+the various PC platforms.
+
+@item
+@cindex Zoulas, Christos
+Christos Zoulas
+provided the @code{extension()}
+built-in function for dynamically adding new functions.
+(This was obsoleted at @command{gawk} 4.1.)
+
+@item
+@cindex Kahrs, J@"urgen
+J@"urgen Kahrs
+contributed the initial version of the TCP/IP networking
+code and documentation, and motivated the inclusion of the @samp{|&} operator.
+
+@item
+@cindex Davies, Stephen
+Stephen Davies
+provided the initial port to Tandem systems and its documentation.
+(However, this is no longer supported.)
+He was also instrumental in the initial work to integrate the
+byte-code internals into the @command{gawk} code base.
+
+@item
+@cindex Woehlke, Matthew
+Matthew Woehlke
+provided improvements for Tandem's POSIX-compliant systems.
+
+@item
+@cindex Brown, Martin
+Martin Brown
+provided the port to BeOS and its documentation.
+(This is no longer supported.)
+
+@item
+@cindex Peters, Arno
+Arno Peters
+did the initial work to convert @command{gawk} to use
+GNU Automake and GNU @command{gettext}.
+
+@item
+@cindex Broder, Alan J.@:
+Alan J.@: Broder
+provided the initial version of the @code{asort()} function
+as well as the code for the optional third argument to the
+@code{match()} function.
+
+@item
+@cindex Buening, Andreas
+Andreas Buening
+updated the @command{gawk} port for OS/2.
+
+@item
+@cindex Hasegawa, Isamu
+Isamu Hasegawa,
+of IBM in Japan, contributed support for multibyte characters.
+
+@item
+@cindex Benzinger, Michael
+Michael Benzinger contributed the initial code for @code{switch} statements.
+
+@item
+@cindex McPhee, Patrick
+Patrick T.J.@: McPhee contributed the code for dynamic loading in Windows32
+environments.
+(This is no longer supported.)
+
+@item
+@cindex Wallin, Anders
+Anders Wallin helped keep the VMS port going for several years.
+
+@item
+@cindex Gordon, Assaf
+Assaf Gordon contributed the code to implement the
+@option{--sandbox} option.
+
+@item
+@cindex Haque, John
+John Haque made the following contributions:
+
+@itemize @value{MINUS}
+@item
+The modifications to convert @command{gawk}
+into a byte-code interpreter, including the debugger.
+
+@item
+The addition of true arrays of arrays.
+
+@item
+The additional modifications for support of arbitrary-precision arithmetic.
+
+@item
+The initial text of
+@ref{Arbitrary Precision Arithmetic}.
+
+@item
+The work to merge the three versions of @command{gawk}
+into one, for the 4.1 release.
+
+@item
+Improved array internals for arrays indexed by integers.
+
+@item
+The improved array sorting features were driven by John together
+with Pat Rankin.
+@end itemize
+
+@cindex Papadopoulos, Panos
+@item
+Panos Papadopoulos contributed the original text for @ref{Include Files}.
+
+@item
+@cindex Yawitz, Efraim
+Efraim Yawitz contributed the original text for @ref{Debugger}.
+
+@item
+@cindex Schorr, Andrew
+The development of the extension API first released with
+@command{gawk} 4.1 was driven primarily by
+Arnold Robbins and Andrew Schorr, with notable contributions from
+the rest of the development team.
+
+@cindex Malmberg, John E.
+@item
+John Malmberg contributed significant improvements to the
+OpenVMS port and the related documentation.
+
+@item
+@cindex Colombo, Antonio
+Antonio Giovanni Colombo rewrote a number of examples in the early
+chapters that were severely dated, for which I am incredibly grateful.
+
+@item
+@cindex Robbins, Arnold
+Arnold Robbins
+has been working on @command{gawk} since 1988, at first
+helping David Trueman, and as the primary maintainer since around 1994.
+@end itemize
+
+@node History summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+The @command{awk} language has evolved over time. The first release
+was with V7 Unix circa 1978. In 1987, for System V Release 3.1,
+major additions, including user-defined functions, were made to the language.
+Additional changes were made for System V Release 4, in 1989.
+Since then, further minor changes happen under the auspices of the
+POSIX standard.
+
+@item
+Brian Kernighan's @command{awk} provides a small number of extensions
+that are implemented in common with other versions of @command{awk}.
+
+@item
+@command{gawk} provides a large number of extensions over POSIX @command{awk}.
+They can be disabled with either the @option{--traditional} or @option{--posix}
+options.
+
+@item
+The interaction of POSIX locales and regexp matching in @command{gawk} has been confusing over
+the years. Today, @command{gawk} implements Rational Range Interpretation, where
+ranges of the form @samp{[a-z]} match @emph{only} the characters numerically between
+@samp{a} through @samp{z} in the machine's native character set. Usually this is ASCII
+but it can be EBCDIC on IBM S/390 systems.
+
+@item
+Many people have contributed to @command{gawk} development over the years.
+We hope that the list provided in this @value{CHAPTER} is complete and gives
+the appropriate credit where credit is due.
+
+@end itemize
+
+@node Installation
+@appendix Installing @command{gawk}
+
+@c last two commas are part of see also
+@cindex operating systems, See Also GNU/Linux@comma{} PC operating systems@comma{} Unix
+@cindex @command{gawk}, installing
+@cindex installing @command{gawk}
+This appendix provides instructions for installing @command{gawk} on the
+various platforms that are supported by the developers. The primary
+developer supports GNU/Linux (and Unix), whereas the other ports are
+contributed.
+@DBXREF{Bugs}
+for the email addresses of the people who maintain
+the respective ports.
+
+@menu
+* Gawk Distribution:: What is in the @command{gawk} distribution.
+* Unix Installation:: Installing @command{gawk} under various
+ versions of Unix.
+* Non-Unix Installation:: Installation on Other Operating Systems.
+* Bugs:: Reporting Problems and Bugs.
+* Other Versions:: Other freely available @command{awk}
+ implementations.
+* Installation summary:: Summary of installation.
+@end menu
+
+@node Gawk Distribution
+@appendixsec The @command{gawk} Distribution
+@cindex source code, @command{gawk}
+
+This @value{SECTION} describes how to get the @command{gawk}
+distribution, how to extract it, and then what is in the various files and
+subdirectories.
+
+@menu
+* Getting:: How to get the distribution.
+* Extracting:: How to extract the distribution.
+* Distribution contents:: What is in the distribution.
+@end menu
+
+@node Getting
+@appendixsubsec Getting the @command{gawk} Distribution
+@cindex @command{gawk}, source code@comma{} obtaining
+There are two ways to get GNU software:
+
+@itemize @value{BULLET}
+@item
+Copy it from someone else who already has it.
+
+@cindex FSF (Free Software Foundation)
+@cindex Free Software Foundation (FSF)
+@item
+Retrieve @command{gawk}
+from the Internet host
+@code{ftp.gnu.org}, in the directory @file{/gnu/gawk}.
+Both anonymous @command{ftp} and @code{http} access are supported.
+If you have the @command{wget} program, you can use a command like
+the following:
+
+@example
+wget http://ftp.gnu.org/gnu/gawk/gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+@end example
+@end itemize
+
+The GNU software archive is mirrored around the world.
+The up-to-date list of mirror sites is available from
+@uref{http://www.gnu.org/order/ftp.html, the main FSF website}.
+Try to use one of the mirrors; they
+will be less busy, and you can usually find one closer to your site.
+
+@node Extracting
+@appendixsubsec Extracting the Distribution
+@command{gawk} is distributed as several @code{tar} files compressed with
+different compression programs: @command{gzip}, @command{bzip2},
+and @command{xz}. For simplicity, the rest of these instructions assume
+you are using the one compressed with the GNU Zip program, @code{gzip}.
+
+Once you have the distribution (e.g.,
+@file{gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz}),
+use @code{gzip} to expand the
+file and then use @code{tar} to extract it. You can use the following
+pipeline to produce the @command{gawk} distribution:
+
+@example
+gzip -d -c gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz | tar -xvpf -
+@end example
+
+On a system with GNU @command{tar}, you can let @command{tar}
+do the decompression for you:
+
+@example
+tar -xvpzf gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+@end example
+
+@noindent
+Extracting the archive
+creates a directory named @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}
+in the current directory.
+
+The distribution @value{FN} is of the form
+@file{gawk-@var{V}.@var{R}.@var{P}.tar.gz}.
+The @var{V} represents the major version of @command{gawk},
+the @var{R} represents the current release of version @var{V}, and
+the @var{P} represents a @dfn{patch level}, meaning that minor bugs have
+been fixed in the release. The current patch level is @value{PATCHLEVEL},
+but when retrieving distributions, you should get the version with the highest
+version, release, and patch level. (Note, however, that patch levels greater than
+or equal to 70 denote ``beta'' or nonproduction software; you might not want
+to retrieve such a version unless you don't mind experimenting.)
+If you are not on a Unix or GNU/Linux system, you need to make other arrangements
+for getting and extracting the @command{gawk} distribution. You should consult
+a local expert.
+
+@node Distribution contents
+@appendixsubsec Contents of the @command{gawk} Distribution
+@cindex @command{gawk}, distribution
+
+The @command{gawk} distribution has a number of C source files,
+documentation files,
+subdirectories, and files related to the configuration process
+(@pxref{Unix Installation}),
+as well as several subdirectories related to different non-Unix
+operating systems:
+
+@table @asis
+@item Various @samp{.c}, @samp{.y}, and @samp{.h} files
+The actual @command{gawk} source code.
+@end table
+
+@table @file
+@item ABOUT-NLS
+Information about GNU @command{gettext} and translations.
+
+@item AUTHORS
+A file with some information about the authorship of @command{gawk}.
+It exists only to satisfy the pedants at the Free Software Foundation.
+
+@item README
+@itemx README_d/README.*
+Descriptive files: @file{README} for @command{gawk} under Unix and the
+rest for the various hardware and software combinations.
+
+@item INSTALL
+A file providing an overview of the configuration and installation process.
+
+@item ChangeLog
+A detailed list of source code changes as bugs are fixed or improvements made.
+
+@item ChangeLog.0
+An older list of source code changes.
+
+@item NEWS
+A list of changes to @command{gawk} since the last release or patch.
+
+@item NEWS.0
+An older list of changes to @command{gawk}.
+
+@item COPYING
+The GNU General Public License.
+
+@item POSIX.STD
+A description of behaviors in the POSIX standard for @command{awk} which
+are left undefined, or where @command{gawk} may not comply fully, as well
+as a list of things that the POSIX standard should describe but does not.
+
+@cindex artificial intelligence@comma{} @command{gawk} and
+@item doc/awkforai.txt
+Pointers to the original draft of
+a short article describing why @command{gawk} is a good language for
+artificial intelligence (AI) programming.
+
+@item doc/bc_notes
+A brief description of @command{gawk}'s ``byte code'' internals.
+
+@item doc/README.card
+@itemx doc/ad.block
+@itemx doc/awkcard.in
+@itemx doc/cardfonts
+@itemx doc/colors
+@itemx doc/macros
+@itemx doc/no.colors
+@itemx doc/setter.outline
+The @command{troff} source for a five-color @command{awk} reference card.
+A modern version of @command{troff} such as GNU @command{troff} (@command{groff}) is
+needed to produce the color version. See the file @file{README.card}
+for instructions if you have an older @command{troff}.
+
+@item doc/gawk.1
+The @command{troff} source for a manual page describing @command{gawk}.
+This is distributed for the convenience of Unix users.
+
+@cindex Texinfo
+@item doc/gawktexi.in
+@itemx doc/sidebar.awk
+The Texinfo source file for this @value{DOCUMENT}.
+It should be processed by @file{doc/sidebar.awk}
+before processing with @command{texi2dvi} or @command{texi2pdf}
+to produce a printed document, and
+with @command{makeinfo} to produce an Info or HTML file.
+The @file{Makefile} takes care of this processing and produces
+printable output via @command{texi2dvi} or @command{texi2pdf}.
+
+@item doc/gawk.texi
+The file produced after processing @file{gawktexi.in}
+with @file{sidebar.awk}.
+
+@item doc/gawk.info
+The generated Info file for this @value{DOCUMENT}.
+
+@item doc/gawkinet.texi
+The Texinfo source file for
+@ifinfo
+@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}}.
+@end ifinfo
+@ifnotinfo
+@cite{TCP/IP Internetworking with @command{gawk}}.
+@end ifnotinfo
+It should be processed with @TeX{}
+(via @command{texi2dvi} or @command{texi2pdf})
+to produce a printed document and
+with @command{makeinfo} to produce an Info or HTML file.
+
+@item doc/gawkinet.info
+The generated Info file for
+@cite{TCP/IP Internetworking with @command{gawk}}.
+
+@item doc/igawk.1
+The @command{troff} source for a manual page describing the @command{igawk}
+program presented in
+@ref{Igawk Program}.
+(Since @command{gawk} can do its own @code{@@include} processing,
+neither @command{igawk} nor @file{igawk.1} are installed.)
+
+@item doc/Makefile.in
+The input file used during the configuration process to generate the
+actual @file{Makefile} for creating the documentation.
+
+@item Makefile.am
+@itemx */Makefile.am
+Files used by the GNU Automake software for generating
+the @file{Makefile.in} files used by Autoconf and
+@command{configure}.
+
+@item Makefile.in
+@itemx aclocal.m4
+@itemx bisonfix.awk
+@itemx config.guess
+@itemx configh.in
+@itemx configure.ac
+@itemx configure
+@itemx custom.h
+@itemx depcomp
+@itemx install-sh
+@itemx missing_d/*
+@itemx mkinstalldirs
+@itemx m4/*
+These files and subdirectories are used when configuring and compiling
+@command{gawk} for various Unix systems. Most of them are explained
+in @ref{Unix Installation}. The rest are there to support the main
+infrastructure.
+
+@item po/*
+The @file{po} library contains message translations.
+
+@item awklib/extract.awk
+@itemx awklib/Makefile.am
+@itemx awklib/Makefile.in
+@itemx awklib/eg/*
+The @file{awklib} directory contains a copy of @file{extract.awk}
+(@pxref{Extract Program}),
+which can be used to extract the sample programs from the Texinfo
+source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} file, which
+@command{configure} uses to generate a @file{Makefile}.
+@file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}.
+The library functions from
+@ref{Library Functions},
+are included as ready-to-use files in the @command{gawk} distribution.
+They are installed as part of the installation process.
+The rest of the programs in this @value{DOCUMENT} are available in appropriate
+subdirectories of @file{awklib/eg}.
+
+@item extension/*
+The source code, manual pages, and infrastructure files for
+the sample extensions included with @command{gawk}.
+@xref{Dynamic Extensions}, for more information.
+
+@item extras/*
+Additional non-essential files. Currently, this directory contains some shell
+startup files to be installed in @file{/etc/profile.d} to aid in manipulating
+the @env{AWKPATH} and @env{AWKLIBPATH} environment variables.
+@xref{Shell Startup Files}, for more information.
+
+@item posix/*
+Files needed for building @command{gawk} on POSIX-compliant systems.
+
+@item pc/*
+Files needed for building @command{gawk} under MS-Windows
+@ifclear FOR_PRINT
+and OS/2
+@end ifclear
+(@DBPXREF{PC Installation} for details).
+
+@item vms/*
+Files needed for building @command{gawk} under Vax/VMS and OpenVMS
+(@DBPXREF{VMS Installation} for details).
+
+@item test/*
+A test suite for
+@command{gawk}. You can use @samp{make check} from the top-level @command{gawk}
+directory to run your version of @command{gawk} against the test suite.
+If @command{gawk} successfully passes @samp{make check}, then you can
+be confident of a successful port.
+@end table
+
+@node Unix Installation
+@appendixsec Compiling and Installing @command{gawk} on Unix-Like Systems
+
+Usually, you can compile and install @command{gawk} by typing only two
+commands. However, if you use an unusual system, you may need
+to configure @command{gawk} for your system yourself.
+
+@menu
+* Quick Installation:: Compiling @command{gawk} under Unix.
+* Shell Startup Files:: Shell convenience functions.
+* Additional Configuration Options:: Other compile-time options.
+* Configuration Philosophy:: How it's all supposed to work.
+@end menu
+
+@node Quick Installation
+@appendixsubsec Compiling @command{gawk} for Unix-Like Systems
+
+The normal installation steps should work on all modern commercial
+Unix-derived systems, GNU/Linux, BSD-based systems, and the Cygwin
+environment for MS-Windows.
+
+After you have extracted the @command{gawk} distribution, @command{cd}
+to @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}. As with most GNU
+software, you configure @command{gawk} for your system by running the
+@command{configure} program. This program is a Bourne shell script that
+is generated automatically using GNU Autoconf.
+@ifnotinfo
+(The Autoconf software is
+described fully in
+@cite{Autoconf---Generating Automatic Configuration Scripts},
+which can be found online at
+@uref{http://www.gnu.org/software/autoconf/manual/index.html,
+the Free Software Foundation's website}.)
+@end ifnotinfo
+@ifinfo
+(The Autoconf software is described fully starting with
+@inforef{Top, , Autoconf, autoconf,Autoconf---Generating Automatic Configuration Scripts}.)
+@end ifinfo
+
+To configure @command{gawk}, simply run @command{configure}:
+
+@example
+sh ./configure
+@end example
+
+This produces a @file{Makefile} and @file{config.h} tailored to your system.
+The @file{config.h} file describes various facts about your system.
+You might want to edit the @file{Makefile} to
+change the @code{CFLAGS} variable, which controls
+the command-line options that are passed to the C compiler (such as
+optimization levels or compiling for debugging).
+
+Alternatively, you can add your own values for most @command{make}
+variables on the command line, such as @code{CC} and @code{CFLAGS}, when
+running @command{configure}:
+
+@example
+CC=cc CFLAGS=-g sh ./configure
+@end example
+
+@noindent
+See the file @file{INSTALL} in the @command{gawk} distribution for
+all the details.
+
+After you have run @command{configure} and possibly edited the @file{Makefile},
+type:
+
+@example
+make
+@end example
+
+@noindent
+Shortly thereafter, you should have an executable version of @command{gawk}.
+That's all there is to it!
+To verify that @command{gawk} is working properly,
+run @samp{make check}. All of the tests should succeed.
+If these steps do not work, or if any of the tests fail,
+check the files in the @file{README_d} directory to see if you've
+found a known problem. If the failure is not described there,
+send in a bug report (@pxref{Bugs}).
+
+Of course, once you've built @command{gawk}, it is likely that you will
+wish to install it. To do so, you need to run the command @samp{make
+install}, as a user with the appropriate permissions. How to do this
+varies by system, but on many systems you can use the @command{sudo}
+command to do so. The command then becomes @samp{sudo make install}. It
+is likely that you will be asked for your password, and you will have
+to have been set up previously as a user who is allowed to run the
+@command{sudo} command.
+
+@node Shell Startup Files
+@appendixsubsec Shell Startup Files
+
+The distribution contains shell startup files @file{gawk.sh} and
+@file{gawk.csh} containing functions to aid in manipulating
+the @env{AWKPATH} and @env{AWKLIBPATH} environment variables.
+On a Fedora system, these files should be installed in @file{/etc/profile.d};
+on other platforms, the appropriate location may be different.
+
+@table @command
+
+@cindex @command{gawkpath_default} shell function
+@item gawkpath_default
+Reset the @env{AWKPATH} environment variable to its default value.
+
+@cindex @command{gawkpath_prepend} shell function
+@item gawkpath_prepend
+Add the argument to the front of the @env{AWKPATH} environment variable.
+
+@cindex @command{gawkpath_append} shell function
+@item gawkpath_append
+Add the argument to the end of the @env{AWKPATH} environment variable.
+
+@cindex @command{gawklibpath_default} shell function
+@item gawklibpath_default
+Reset the @env{AWKLIBPATH} environment variable to its default value.
+
+@cindex @command{gawklibpath_prepend} shell function
+@item gawklibpath_prepend
+Add the argument to the front of the @env{AWKLIBPATH} environment variable.
+
+@cindex @command{gawklibpath_append} shell function
+@item gawklibpath_append
+Add the argument to the end of the @env{AWKLIBPATH} environment variable.
+
+@end table
+
+
+@node Additional Configuration Options
+@appendixsubsec Additional Configuration Options
+@cindex @command{gawk}, configuring, options
+@cindex configuration options@comma{} @command{gawk}
+
+There are several additional options you may use on the @command{configure}
+command line when compiling @command{gawk} from scratch, including:
+
+@table @code
+
+@cindex @option{--disable-extensions} configuration option
+@cindex configuration option, @code{--disable-extensions}
+@item --disable-extensions
+Disable configuring and building the sample extensions in the
+@file{extension} directory. This is useful for cross-compiling.
+The default action is to dynamically check if the extensions
+can be configured and compiled.
+
+@cindex @option{--disable-lint} configuration option
+@cindex configuration option, @code{--disable-lint}
+@item --disable-lint
+Disable all lint checking within @code{gawk}. The
+@option{--lint} and @option{--lint-old} options
+(@pxref{Options})
+are accepted, but silently do nothing.
+Similarly, setting the @code{LINT} variable
+(@pxref{User-modified})
+has no effect on the running @command{awk} program.
+
+When used with GCC's automatic dead-code-elimination, this option
+cuts almost 23K bytes off the size of the @command{gawk}
+executable on GNU/Linux x86_64 systems. Results on other systems and
+with other compilers are likely to vary.
+Using this option may bring you some slight performance improvement.
+
+Using this option will cause some of the tests in the test suite
+to fail. This option may be removed at a later date.
+
+@cindex @option{--disable-nls} configuration option
+@cindex configuration option, @code{--disable-nls}
+@item --disable-nls
+Disable all message-translation facilities.
+This is usually not desirable, but it may bring you some slight performance
+improvement.
+
+@cindex @option{--with-whiny-user-strftime} configuration option
+@cindex configuration option, @code{--with-whiny-user-strftime}
+@item --with-whiny-user-strftime
+Force use of the included version of the C @code{strftime()}
+function for deficient systems.
+@end table
+
+Use the command @samp{./configure --help} to see the full list of
+options supplied by @command{configure}.
+
+@node Configuration Philosophy
+@appendixsubsec The Configuration Process
+
+@cindex @command{gawk}, configuring
+This @value{SECTION} is of interest only if you know something about using the
+C language and Unix-like operating systems.
+
+The source code for @command{gawk} generally attempts to adhere to formal
+standards wherever possible. This means that @command{gawk} uses library
+routines that are specified by the ISO C standard and by the POSIX
+operating system interface standard.
+The @command{gawk} source code requires using an ISO C compiler (the 1990
+standard).
+
+Many Unix systems do not support all of either the ISO or the
+POSIX standards. The @file{missing_d} subdirectory in the @command{gawk}
+distribution contains replacement versions of those functions that are
+most likely to be missing.
+
+The @file{config.h} file that @command{configure} creates contains
+definitions that describe features of the particular operating system
+where you are attempting to compile @command{gawk}. The three things
+described by this file are: what header files are available, so that
+they can be correctly included, what (supposedly) standard functions
+are actually available in your C libraries, and various miscellaneous
+facts about your operating system. For example, there may not be an
+@code{st_blksize} element in the @code{stat} structure. In this case,
+@samp{HAVE_STRUCT_STAT_ST_BLKSIZE} is undefined.
+
+@cindex @code{custom.h} file
+It is possible for your C compiler to lie to @command{configure}. It may
+do so by not exiting with an error when a library function is not
+available. To get around this, edit the @file{custom.h} file.
+Use an @samp{#ifdef} that is appropriate for your system, and either
+@code{#define} any constants that @command{configure} should have defined but
+didn't, or @code{#undef} any constants that @command{configure} defined and
+should not have. The @file{custom.h} file is automatically included by
+the @file{config.h} file.
+
+It is also possible that the @command{configure} program generated by
+Autoconf will not work on your system in some other fashion.
+If you do have a problem, the @file{configure.ac} file is the input for
+Autoconf. You may be able to change this file and generate a
+new version of @command{configure} that works on your system
+(@DBPXREF{Bugs}
+for information on how to report problems in configuring @command{gawk}).
+The same mechanism may be used to send in updates to @file{configure.ac}
+and/or @file{custom.h}.
+
+@node Non-Unix Installation
+@appendixsec Installation on Other Operating Systems
+
+This @value{SECTION} describes how to install @command{gawk} on
+various non-Unix systems.
+
+@menu
+* PC Installation:: Installing and Compiling @command{gawk} on
+ MS-DOS and OS/2.
+* VMS Installation:: Installing @command{gawk} on VMS.
+@end menu
+
+@c Rewritten by Scott Deifik <scottd.mail@sbcglobal.net>
+@c and Darrel Hankerson <hankedr@mail.auburn.edu>
+
+@node PC Installation
+@appendixsubsec Installation on PC Operating Systems
+
+@cindex PC operating systems@comma{} @command{gawk} on, installing
+@cindex operating systems, PC@comma{} @command{gawk} on, installing
+This @value{SECTION} covers installation and usage of @command{gawk}
+on Intel architecture machines
+@ifclear FOR_PRINT
+running MS-DOS, any version of MS-Windows, or OS/2.
+@end ifclear
+@ifset FOR_PRINT
+running MS-DOS and any version of MS-Windows.
+@end ifset
+In this @value{SECTION}, the term ``Windows32''
+refers to any of Microsoft Windows-95/98/ME/NT/2000/XP/Vista/7/8.
+
+The limitations of MS-DOS (and MS-DOS shells under the other operating
+systems) has meant that various ``DOS extenders'' are often used with
+programs such as @command{gawk}. The varying capabilities of Microsoft
+Windows 3.1 and Windows32 can add to the confusion. For an overview
+of the considerations, refer to @file{README_d/README.pc} in
+the distribution.
+
+@menu
+* PC Binary Installation:: Installing a prepared distribution.
+* PC Compiling:: Compiling @command{gawk} for MS-DOS,
+ Windows32, and OS/2.
+* PC Testing:: Testing @command{gawk} on PC systems.
+* PC Using:: Running @command{gawk} on MS-DOS, Windows32
+ and OS/2.
+* Cygwin:: Building and running @command{gawk} for
+ Cygwin.
+* MSYS:: Using @command{gawk} In The MSYS Environment.
+@end menu
+
+@ifclear FOR_PRINT
+@node PC Binary Installation
+@appendixsubsubsec Installing a Prepared Distribution for PC Systems
+
+If you have received a binary distribution prepared by the MS-DOS
+maintainers, then @command{gawk} and the necessary support files appear
+under the @file{gnu} directory, with executables in @file{gnu/bin},
+libraries in @file{gnu/lib/awk}, and manual pages under @file{gnu/man}.
+This is designed for easy installation to a @file{/gnu} directory on your
+drive---however, the files can be installed anywhere provided @env{AWKPATH} is
+set properly. Regardless of the installation directory, the first line of
+@file{igawk.cmd} and @file{igawk.bat} (in @file{gnu/bin}) may need to be
+edited.
+
+The binary distribution contains a separate file describing the
+contents. In particular, it may include more than one version of the
+@command{gawk} executable.
+
+OS/2 (32 bit, EMX) binary distributions are prepared for the @file{/usr}
+directory of your preferred drive. Set @env{UNIXROOT} to your installation
+drive (e.g., @samp{e:}) if you want to install @command{gawk} onto another drive
+than the hardcoded default @samp{c:}. Executables appear in @file{/usr/bin},
+libraries under @file{/usr/share/awk}, manual pages under @file{/usr/man},
+Texinfo documentation under @file{/usr/info}, and NLS files
+under @file{/usr/share/locale}.
+Note that the files can be installed anywhere provided @env{AWKPATH} is
+set properly.
+
+If you already have a file @file{/usr/info/dir} from another package
+@emph{do not overwrite it!} Instead enter the following commands at your prompt
+(replace @samp{x:} by your installation drive):
+
+@example
+install-info --info-dir=x:/usr/info x:/usr/info/gawk.info
+install-info --info-dir=x:/usr/info x:/usr/info/gawkinet.info
+@end example
+
+The binary distribution may contain a separate file containing additional
+or more detailed installation instructions.
+@end ifclear
+
+@node PC Compiling
+@appendixsubsubsec Compiling @command{gawk} for PC Operating Systems
+
+@ifclear FOR_PRINT
+@command{gawk} can be compiled for MS-DOS, Windows32, and OS/2 using the GNU
+development tools from DJ Delorie (DJGPP: MS-DOS only), MinGW (Windows32) or Eberhard
+Mattes (EMX: MS-DOS, Windows32 and OS/2).
+@end ifclear
+@ifset FOR_PRINT
+@command{gawk} can be compiled for MS-DOS and Windows32 using the GNU
+development tools from DJ Delorie (DJGPP: MS-DOS only) or MinGW (Windows32).
+@end ifset
+The file
+@file{README_d/README.pc} in the @command{gawk} distribution contains
+additional notes, and @file{pc/Makefile} contains important information on
+compilation options.
+
+@cindex compiling @command{gawk} for MS-DOS and MS-Windows
+To build @command{gawk} for MS-DOS and Windows32, copy the files in
+the @file{pc} directory (@emph{except} for @file{ChangeLog}) to the
+directory with the rest of the @command{gawk} sources, then invoke
+@command{make} with the appropriate target name as an argument to
+build @command{gawk}. The @file{Makefile} copied from the @file{pc}
+directory contains a configuration section with comments and may need
+to be edited in order to work with your @command{make} utility.
+
+The @file{Makefile} supports a number of targets for building various
+MS-DOS and Windows32 versions. A list of targets is printed if the
+@command{make} command is given without a target. As an example, to
+build @command{gawk} using the DJGPP tools, enter @samp{make djgpp}.
+(The DJGPP tools needed for the build may be found at
+@uref{ftp://ftp.delorie.com/pub/djgpp/current/v2gnu/}.) To build a
+native MS-Windows binary of @command{gawk} using the MinGW tools,
+type @samp{make mingw32}.
+
+@ifclear FOR_PRINT
+@cindex compiling @command{gawk} with EMX for OS/2
+The 32 bit EMX version of @command{gawk} works ``out of the box'' under OS/2.
+However, it is highly recommended to use GCC 2.95.3 for the compilation.
+In principle, it is possible to compile @command{gawk} the following way:
+
+@example
+$ @kbd{./configure}
+$ @kbd{make}
+@end example
+
+This is not recommended, though. To get an OMF executable you should
+use the following commands at your @command{sh} prompt:
+
+@example
+$ @kbd{CFLAGS="-O2 -Zomf -Zmt"}
+$ @kbd{export CFLAGS}
+$ @kbd{LDFLAGS="-s -Zcrtdll -Zlinker /exepack:2 -Zlinker /pm:vio -Zstack 0x6000"}
+$ @kbd{export LDFLAGS}
+$ @kbd{RANLIB="echo"}
+$ @kbd{export RANLIB}
+$ @kbd{./configure --prefix=c:/usr}
+$ @kbd{make AR=emxomfar}
+@end example
+
+These are just suggestions for use with GCC 2.x. You may use any other set of
+(self-consistent) environment variables and compiler flags.
+
+@ignore
+To get an FHS-compliant file hierarchy it is recommended to use the additional
+@command{configure} options @option{--infodir=c:/usr/share/info}, @option{--mandir=c:/usr/share/man}
+and @option{--libexecdir=c:/usr/lib}.
+@end ignore
+
+@ignore
+The internal @command{gettext} library tends to be problematic. It is therefore recommended
+to use either an external one (@option{--without-included-gettext}) or to disable
+NLS entirely (@option{--disable-nls}).
+@end ignore
+
+If you use GCC 2.95 it is recommended to use also:
+
+@example
+$ @kbd{LIBS="-lgcc"}
+$ @kbd{export LIBS}
+@end example
+
+You can also get an @code{a.out} executable if you prefer:
+
+@example
+$ @kbd{CFLAGS="-O2 -Zmt"}
+$ @kbd{export CFLAGS}
+$ @kbd{LDFLAGS="-s -Zstack 0x6000"}
+$ @kbd{LIBS="-lgcc"}
+$ @kbd{unset RANLIB}
+@c $ ./configure --prefix=c:/usr --without-included-gettext
+$ @kbd{./configure --prefix=c:/usr}
+$ @kbd{make}
+@end example
+
+@quotation NOTE
+Compilation of @code{a.out} executables also works with GCC 3.2.
+Versions later than GCC 3.2 have not been tested successfully.
+@end quotation
+
+@samp{make install} works as expected with the EMX build.
+
+@quotation NOTE
+Ancient OS/2 ports of GNU @command{make} are not able to handle
+the Makefiles of this package. If you encounter any problems with
+@command{make}, try GNU Make 3.79.1 or later versions. You should
+find the latest version on
+@uref{ftp://hobbes.nmsu.edu/pub/os2/}.@footnote{As of November 2014,
+this site is still there, but the author could not find a package
+for GNU Make.}
+@end quotation
+@end ifclear
+
+@node PC Testing
+@appendixsubsubsec Testing @command{gawk} on PC Operating Systems
+
+Using @command{make} to run the standard tests and to install @command{gawk}
+requires additional Unix-like tools, including @command{sh}, @command{sed}, and
+@command{cp}. In order to run the tests, the @file{test/*.ok} files may need to
+be converted so that they have the usual MS-DOS-style end-of-line markers.
+Alternatively, run @command{make check CMP="diff -a"} to use GNU @command{diff}
+in text mode instead of @command{cmp} to compare the resulting files.
+
+@ifclear FOR_PRINT
+Most
+of the tests work properly with Stewartson's shell along with the
+companion utilities or appropriate GNU utilities. However, some editing of
+@file{test/Makefile} is required. It is recommended that you copy the file
+@file{pc/Makefile.tst} over the file @file{test/Makefile} as a
+replacement. Details can be found in @file{README_d/README.pc}
+and in the file @file{pc/Makefile.tst}.
+
+On OS/2 the @code{pid} test fails because @code{spawnl()} is used instead of
+@code{fork()}/@code{execl()} to start child processes.
+Also the @code{mbfw1} and @code{mbprintf1} tests fail because the needed
+multibyte functionality is not available.
+@end ifclear
+
+@node PC Using
+@appendixsubsubsec Using @command{gawk} on PC Operating Systems
+@cindex operating systems, PC, @command{gawk} on
+@cindex PC operating systems, @command{gawk} on
+
+Under MS-DOS and MS-Windows, the Cygwin and MinGW environments support
+both the @samp{|&} operator and TCP/IP networking
+(@pxref{TCP/IP Networking}).
+@ifclear FOR_PRINT
+EMX (OS/2 only) supports at least the @samp{|&} operator.
+@end ifclear
+
+@cindex search paths
+@cindex search paths, for source files
+@cindex @command{gawk}, MS-DOS version of
+@cindex @command{gawk}, MS-Windows version of
+@cindex @code{;} (semicolon), @env{AWKPATH} variable and
+@cindex semicolon (@code{;}), @env{AWKPATH} variable and
+@cindex @env{AWKPATH} environment variable
+The MS-DOS and MS-Windows versions of @command{gawk} search for
+program files as described in @ref{AWKPATH Variable}. However,
+semicolons (rather than colons) separate elements in the @env{AWKPATH}
+variable. If @env{AWKPATH} is not set or is empty, then the default
+search path is @samp{@w{.;c:/lib/awk;c:/gnu/lib/awk}}.
+
+@ifclear FOR_PRINT
+@cindex @command{gawk}, OS/2 version of
+@cindex @code{UNIXROOT} variable, on OS/2 systems
+The search path for OS/2 (32 bit, EMX) is determined by the prefix directory
+(most likely @file{/usr} or @file{c:/usr}) that has been specified as an option of
+the @command{configure} script as is the case for the Unix versions.
+If @file{c:/usr} is the prefix directory then the default search path contains @file{.}
+and @file{c:/usr/share/awk}.
+Additionally, to support binary distributions of @command{gawk} for OS/2
+systems whose drive @samp{c:} might not support long @value{FN}s or might not exist
+at all, there is a special environment variable. If @env{UNIXROOT} specifies
+a drive then this specific drive is also searched for program files.
+E.g., if @env{UNIXROOT} is set to @file{e:} the complete default search path is
+@samp{@w{.;c:/usr/share/awk;e:/usr/share/awk}}.
+
+An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS
+or @command{cmd.exe} under MS-Windows or OS/2) may be useful for @command{awk} programming.
+The DJGPP collection of tools includes an MS-DOS port of Bash,
+and several shells are available for OS/2, including @command{ksh}.
+@end ifclear
+@ifset FOR_PRINT
+An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS
+or @command{cmd.exe} under MS-Windows) may be useful for @command{awk} programming.
+The DJGPP collection of tools includes an MS-DOS port of Bash.
+@end ifset
+
+@cindex common extensions, @code{BINMODE} variable
+@cindex extensions, common@comma{} @code{BINMODE} variable
+@cindex differences in @command{awk} and @command{gawk}, @code{BINMODE} variable
+@cindex @code{BINMODE} variable
+@ifclear FOR_PRINT
+Under MS-Windows, OS/2 and MS-DOS,
+@end ifclear
+@ifset FOR_PRINT
+Under MS-Windows and MS-DOS,
+@end ifset
+@command{gawk} (and many other text programs) silently
+translate end-of-line @samp{\r\n} to @samp{\n} on input and @samp{\n}
+to @samp{\r\n} on output. A special @code{BINMODE} variable @value{COMMONEXT}
+allows control over these translations and is interpreted as follows:
+
+@itemize @value{BULLET}
+@item
+If @code{BINMODE} is @code{"r"} or one,
+then
+binary mode is set on read (i.e., no translations on reads).
+
+@item
+If @code{BINMODE} is @code{"w"} or two,
+then
+binary mode is set on write (i.e., no translations on writes).
+
+@item
+If @code{BINMODE} is @code{"rw"} or @code{"wr"} or three,
+binary mode is set for both read and write.
+
+@item
+@code{BINMODE=@var{non-null-string}} is
+the same as @samp{BINMODE=3} (i.e., no translations on
+reads or writes). However, @command{gawk} issues a warning
+message if the string is not one of @code{"rw"} or @code{"wr"}.
+@end itemize
+
+@noindent
+The modes for standard input and standard output are set one time
+only (after the
+command line is read, but before processing any of the @command{awk} program).
+Setting @code{BINMODE} for standard input or
+standard output is accomplished by using an
+appropriate @samp{-v BINMODE=@var{N}} option on the command line.
+@code{BINMODE} is set at the time a file or pipe is opened and cannot be
+changed mid-stream.
+
+The name @code{BINMODE} was chosen to match @command{mawk}
+(@pxref{Other Versions}).
+@command{mawk} and @command{gawk} handle @code{BINMODE} similarly; however,
+@command{mawk} adds a @samp{-W BINMODE=@var{N}} option and an environment
+variable that can set @code{BINMODE}, @code{RS}, and @code{ORS}. The
+files @file{binmode[1-3].awk} (under @file{gnu/lib/awk} in some of the
+prepared binary distributions) have been chosen to match @command{mawk}'s @samp{-W
+BINMODE=@var{N}} option. These can be changed or discarded; in particular,
+the setting of @code{RS} giving the fewest ``surprises'' is open to debate.
+@command{mawk} uses @samp{RS = "\r\n"} if binary mode is set on read, which is
+appropriate for files with the MS-DOS-style end-of-line.
+
+To illustrate, the following examples set binary mode on writes for standard
+output and other files, and set @code{ORS} as the ``usual'' MS-DOS-style
+end-of-line:
+
+@example
+gawk -v BINMODE=2 -v ORS="\r\n" @dots{}
+@end example
+
+@noindent
+or:
+
+@example
+gawk -v BINMODE=w -f binmode2.awk @dots{}
+@end example
+
+@noindent
+These give the same result as the @samp{-W BINMODE=2} option in
+@command{mawk}.
+The following changes the record separator to @code{"\r\n"} and sets binary
+mode on reads, but does not affect the mode on standard input:
+
+@example
+gawk -v RS="\r\n" -e "BEGIN @{ BINMODE = 1 @}" @dots{}
+@end example
+
+@noindent
+or:
+
+@example
+gawk -f binmode1.awk @dots{}
+@end example
+
+@noindent
+With proper quoting, in the first example the setting of @code{RS} can be
+moved into the @code{BEGIN} rule.
+
+@node Cygwin
+@appendixsubsubsec Using @command{gawk} In The Cygwin Environment
+@cindex compiling @command{gawk} for Cygwin
+
+@command{gawk} can be built and used ``out of the box'' under MS-Windows
+if you are using the @uref{http://www.cygwin.com, Cygwin environment}.
+This environment provides an excellent simulation of GNU/Linux, using the
+GNU tools, such as Bash, the GNU Compiler Collection (GCC), GNU Make,
+and other GNU programs. Compilation and installation for Cygwin is the
+same as for a Unix system:
+
+@example
+tar -xvpzf gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+cd gawk-@value{VERSION}.@value{PATCHLEVEL}
+./configure
+make && make check
+@end example
+
+When compared to GNU/Linux on the same system, the @samp{configure}
+step on Cygwin takes considerably longer. However, it does finish,
+and then the @samp{make} proceeds as usual.
+
+@node MSYS
+@appendixsubsubsec Using @command{gawk} In The MSYS Environment
+
+In the MSYS environment under MS-Windows, @command{gawk} automatically
+uses binary mode for reading and writing files. Thus there is no
+need to use the @code{BINMODE} variable.
+
+This can cause problems with other Unix-like components that have
+been ported to MS-Windows that expect @command{gawk} to do automatic
+translation of @code{"\r\n"}, because it won't.
+
+@node VMS Installation
+@appendixsubsec Compiling and Installing @command{gawk} on Vax/VMS and OpenVMS
+
+@c based on material from Pat Rankin <rankin@eql.caltech.edu>
+@c now rankin@pactechdata.com
+@c now r.pat.rankin@gmail.com
+
+@cindex @command{gawk}, VMS version of
+@cindex installation, VMS
+This @value{SUBSECTION} describes how to compile and install @command{gawk} under VMS.
+The older designation ``VMS'' is used throughout to refer to OpenVMS.
+
+@menu
+* VMS Compilation:: How to compile @command{gawk} under VMS.
+* VMS Dynamic Extensions:: Compiling @command{gawk} dynamic extensions on
+ VMS.
+* VMS Installation Details:: How to install @command{gawk} under VMS.
+* VMS Running:: How to run @command{gawk} under VMS.
+* VMS GNV:: The VMS GNV Project.
+* VMS Old Gawk:: An old version comes with some VMS systems.
+@end menu
+
+@node VMS Compilation
+@appendixsubsubsec Compiling @command{gawk} on VMS
+@cindex compiling @command{gawk} for VMS
+
+To compile @command{gawk} under VMS, there is a @code{DCL} command procedure
+that issues all the necessary @code{CC} and @code{LINK} commands. There is
+also a @file{Makefile} for use with the @code{MMS} and @code{MMK} utilities.
+From the source directory, use either:
+
+@example
+$ @kbd{@@[.vms]vmsbuild.com}
+@end example
+
+@noindent
+or:
+
+@example
+$ @kbd{MMS/DESCRIPTION=[.vms]descrip.mms gawk}
+@end example
+
+@noindent
+or:
+
+@example
+$ @kbd{MMK/DESCRIPTION=[.vms]descrip.mms gawk}
+@end example
+
+@command{MMK} is an open source, free, near-clone of @command{MMS} and
+can better handle ODS-5 volumes with upper- and lowercase @value{FN}s.
+@command{MMK} is available from @uref{https://github.com/endlesssoftware/mmk}.
+
+With ODS-5 volumes and extended parsing enabled, the case of the target
+parameter may need to be exact.
+
+@command{gawk} has been tested under VAX/VMS 7.3 and Alpha/VMS 7.3-1
+using Compaq C V6.4, and Alpha/VMS 7.3, Alpha/VMS 7.3-2, and IA64/VMS 8.3.
+The most recent builds used HP C V7.3 on Alpha VMS 8.3 and both
+Alpha and IA64 VMS 8.4 used HP C 7.3.@footnote{The IA64 architecture
+is also known as ``Itanium.''}
+
+@DBXREF{VMS GNV} for information on building
+@command{gawk} as a PCSI kit that is compatible with the GNV product.
+
+@node VMS Dynamic Extensions
+@appendixsubsubsec Compiling @command{gawk} Dynamic Extensions on VMS
+
+The extensions that have been ported to VMS can be built using one of
+the following commands:
+
+@example
+$ @kbd{MMS/DESCRIPTION=[.vms]descrip.mms extensions}
+@end example
+
+@noindent
+or:
+
+@example
+$ @kbd{MMK/DESCRIPTION=[.vms]descrip.mms extensions}
+@end example
+
+@command{gawk} uses @code{AWKLIBPATH} as either an environment variable
+or a logical name to find the dynamic extensions.
+
+Dynamic extensions need to be compiled with the same compiler options for
+floating-point, pointer size, and symbol name handling as were used
+to compile @command{gawk} itself.
+Alpha and Itanium should use IEEE floating point. The pointer size is 32 bits,
+and the symbol name handling should be exact case with CRC shortening for
+symbols longer than 32 bits.
+
+For Alpha and Itanium:
+
+@example
+/name=(as_is,short)
+/float=ieee/ieee_mode=denorm_results
+@end example
+
+For VAX:
+
+@example
+/name=(as_is,short)
+@end example
+
+Compile time macros need to be defined before the first VMS-supplied
+header file is included, as follows:
+
+@example
+#if (__CRTL_VER >= 70200000) && !defined (__VAX)
+#define _LARGEFILE 1
+#endif
+
+#ifndef __VAX
+#ifdef __CRTL_VER
+#if __CRTL_VER >= 80200000
+#define _USE_STD_STAT 1
+#endif
+#endif
+#endif
+@end example
+
+If you are writing your own extensions to run on VMS, you must supply these
+definitions yourself. The @file{config.h} file created when building @command{gawk}
+on VMS does this for you; if instead you use that file or a similar one, then you
+must remember to include it before any VMS-supplied header files.
+
+@node VMS Installation Details
+@appendixsubsubsec Installing @command{gawk} on VMS
+
+To use @command{gawk}, all you need is a ``foreign'' command, which is a
+@code{DCL} symbol whose value begins with a dollar sign. For example:
+
+@example
+$ @kbd{GAWK :== $disk1:[gnubin]gawk}
+@end example
+
+@noindent
+Substitute the actual location of @command{gawk.exe} for
+@samp{$disk1:[gnubin]}. The symbol should be placed in the
+@file{login.com} of any user who wants to run @command{gawk},
+so that it is defined every time the user logs on.
+Alternatively, the symbol may be placed in the system-wide
+@file{sylogin.com} procedure, which allows all users
+to run @command{gawk}.
+
+If your @command{gawk} was installed by a PCSI kit into the
+@file{GNV$GNU:} directory tree, the program will be known as
+@file{GNV$GNU:[bin]gnv$gawk.exe} and the help file will be
+@file{GNV$GNU:[vms_help]gawk.hlp}.
+
+The PCSI kit also installs a @file{GNV$GNU:[vms_bin]gawk_verb.cld} file
+which can be used to add @command{gawk} and @command{awk} as DCL commands.
+
+For just the current process you can use:
+
+@example
+$ @kbd{set command gnv$gnu:[vms_bin]gawk_verb.cld}
+@end example
+
+Or the system manager can use @file{GNV$GNU:[vms_bin]gawk_verb.cld} to
+add the @command{gawk} and @command{awk} to the system wide @samp{DCLTABLES}.
+
+The DCL syntax is documented in the @file{gawk.hlp} file.
+
+Optionally, the @file{gawk.hlp} entry can be loaded into a VMS help library:
+
+@example
+$ @kbd{LIBRARY/HELP sys$help:helplib [.vms]gawk.hlp}
+@end example
+
+@noindent
+(You may want to substitute a site-specific help library rather than
+the standard VMS library @samp{HELPLIB}.) After loading the help text,
+the command:
+
+@example
+$ @kbd{HELP GAWK}
+@end example
+
+@noindent
+provides information about both the @command{gawk} implementation and the
+@command{awk} programming language.
+
+The logical name @samp{AWK_LIBRARY} can designate a default location
+for @command{awk} program files. For the @option{-f} option, if the specified
+@value{FN} has no device or directory path information in it, @command{gawk}
+looks in the current directory first, then in the directory specified
+by the translation of @samp{AWK_LIBRARY} if the file is not found.
+If, after searching in both directories, the file still is not found,
+@command{gawk} appends the suffix @samp{.awk} to the @value{FN} and retries
+the file search. If @samp{AWK_LIBRARY} has no definition, a default value
+of @samp{SYS$LIBRARY:} is used for it.
+
+@node VMS Running
+@appendixsubsubsec Running @command{gawk} on VMS
+
+Command-line parsing and quoting conventions are significantly different
+on VMS, so examples in this @value{DOCUMENT} or from other sources often need minor
+changes. They @emph{are} minor though, and all @command{awk} programs
+should run correctly.
+
+Here are a couple of trivial tests:
+
+@example
+$ @kbd{gawk -- "BEGIN @{print ""Hello, World!""@}"}
+$ @kbd{gawk -"W" version}
+! could also be -"W version" or "-W version"
+@end example
+
+@noindent
+Note that uppercase and mixed-case text must be quoted.
+
+The VMS port of @command{gawk} includes a @code{DCL}-style interface in addition
+to the original shell-style interface (see the help entry for details).
+One side effect of dual command-line parsing is that if there is only a
+single parameter (as in the quoted string program), the command
+becomes ambiguous. To work around this, the normally optional @option{--}
+flag is required to force Unix-style parsing rather than @code{DCL} parsing. If any
+other dash-type options (or multiple parameters such as @value{DF}s to
+process) are present, there is no ambiguity and @option{--} can be omitted.
+
+@cindex exit status, of VMS
+The @code{exit} value is a Unix-style value and is encoded into a VMS exit
+status value when the program exits.
+
+The VMS severity bits will be set based on the @code{exit} value.
+A failure is indicated by 1 and VMS sets the @code{ERROR} status.
+A fatal error is indicated by 2 and VMS sets the @code{FATAL} status.
+All other values will have the @code{SUCCESS} status. The exit value is
+encoded to comply with VMS coding standards and will have the
+@code{C_FACILITY_NO} of @code{0x350000} with the constant @code{0xA000}
+added to the number shifted over by 3 bits to make room for the severity codes.
+
+To extract the actual @command{gawk} exit code from the VMS status use:
+
+@example
+unix_status = (vms_status .and. &x7f8) / 8
+@end example
+
+@noindent
+A C program that uses @code{exec()} to call @command{gawk} will get the original
+Unix-style exit value.
+
+Older versions of @command{gawk} for VMS treated a Unix exit code 0 as 1, a failure
+as 2, a fatal error as 4, and passed all the other numbers through.
+This violated the VMS exit status coding requirements.
+
+@cindex floating-point, VAX/VMS
+VAX/VMS floating point uses unbiased rounding. @xref{Round Function}.
+
+VMS reports time values in GMT unless one of the @code{SYS$TIMEZONE_RULE}
+or @code{TZ} logical names is set. Older versions of VMS, such as VAX/VMS
+7.3 do not set these logical names.
+
+@c @cindex directory search
+@c @cindex path, search
+@cindex search paths
+@cindex search paths, for source files
+The default search path, when looking for @command{awk} program files specified
+by the @option{-f} option, is @code{"SYS$DISK:[],AWK_LIBRARY:"}. The logical
+name @env{AWKPATH} can be used to override this default. The format
+of @env{AWKPATH} is a comma-separated list of directory specifications.
+When defining it, the value should be quoted so that it retains a single
+translation and not a multitranslation @code{RMS} searchlist.
+
+@node VMS GNV
+@appendixsubsubsec The VMS GNV Project
+
+The VMS GNV package provides a build environment similar to POSIX with ports
+of a collection of open source tools. The @command{gawk} found in the GNV
+base kit is an older port. Currently the GNV project is being reorganized
+to supply individual PCSI packages for each component.
+See @w{@uref{https://sourceforge.net/p/gnv/wiki/InstallingGNVPackages/}.}
+
+The normal build procedure for @command{gawk} produces a program that
+is suitable for use with GNV.
+
+The file @file{vms/gawk_build_steps.txt} in the distribution documents
+the procedure for building a VMS PCSI kit that is compatible with GNV.
+
+@ignore
+@c The VMS POSIX product, also known as POSIX for OpenVMS, is long defunct
+@c and building gawk for it has not been tested in many years, but these
+@c old instructions might still work if anyone is still using it.
+
+@node VMS POSIX
+@appendixsubsubsec Building and Using @command{gawk} on VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library. The source tree should be unpacked
+into a container file subsystem rather than into the ordinary VMS filesystem.
+Make sure that the two scripts, @file{configure} and
+@file{vms/posix-cc.sh}, are executable; use @samp{chmod +x} on them if
+necessary. Then execute the following two commands:
+
+@example
+psx> @kbd{CC=vms/posix-cc.sh configure}
+psx> @kbd{make CC=c89 gawk}
+@end example
+
+@noindent
+The first command constructs files @file{config.h} and @file{Makefile} out
+of templates, using a script to make the C compiler fit @command{configure}'s
+expectations. The second command compiles and links @command{gawk} using
+the C compiler directly; ignore any warnings from @command{make} about being
+unable to redefine @code{CC}. @command{configure} takes a very long
+time to execute, but at least it provides incremental feedback as it runs.
+
+This has been tested with VAX/VMS V6.2, VMS POSIX V2.0, and DEC C V5.2.
+
+Once built, @command{gawk} works like any other shell utility. Unlike
+the normal VMS port of @command{gawk}, no special command-line manipulation is
+needed in the VMS POSIX environment.
+@end ignore
+
+@node VMS Old Gawk
+@appendixsubsubsec Some VMS Systems Have An Old Version of @command{gawk}
+
+@c Thanks to "gerard labadie" <gerard.labadie@gmail.com>
+
+Some versions of VMS have an old version of @command{gawk}. To access it,
+define a symbol, as follows:
+
+@example
+$ @kbd{gawk :== $sys$common:[syshlp.examples.tcpip.snmp]gawk.exe}
+@end example
+
+This is apparently @value{PVERSION} 2.15.6, which is extremely old. We
+recommend compiling and using the current version.
+
+
+@node Bugs
+@appendixsec Reporting Problems and Bugs
+@cindex archaeologists
+@quotation
+@i{There is nothing more dangerous than a bored archaeologist.}
+@author Douglas Adams, @cite{The Hitchhiker's Guide to the Galaxy}
+@end quotation
+@c the radio show, not the book. :-)
+
+@cindex debugging @command{gawk}, bug reports
+@cindex troubleshooting, @command{gawk}, bug reports
+If you have problems with @command{gawk} or think that you have found a bug,
+report it to the developers; we cannot promise to do anything
+but we might well want to fix it.
+
+Before reporting a bug, make sure you have really found a genuine bug.
+Carefully reread the documentation and see if it says you can do
+what you're trying to do. If it's not clear whether you should be able
+to do something or not, report that too; it's a bug in the documentation!
+
+Before reporting a bug or trying to fix it yourself, try to isolate it
+to the smallest possible @command{awk} program and input @value{DF} that
+reproduces the problem. Then send us the program and @value{DF},
+some idea of what kind of Unix system you're using,
+the compiler you used to compile @command{gawk}, and the exact results
+@command{gawk} gave you. Also say what you expected to occur; this helps
+us decide whether the problem is really in the documentation.
+
+Make sure to include the version number of @command{gawk} you are using.
+You can get this information with the command @samp{gawk --version}.
+
+@cindex @code{bug-gawk@@gnu.org} bug reporting address
+@cindex email address for bug reports, @code{bug-gawk@@gnu.org}
+@cindex bug reports, email address, @code{bug-gawk@@gnu.org}
+Once you have a precise problem description, send email to
+@EMAIL{bug-gawk@@gnu.org,bug-gawk at gnu dot org}.
+
+The @command{gawk} maintainers subscribe to this address and
+thus they will receive your bug report.
+Although you can send mail to the maintainers directly,
+the bug reporting address is preferred because the
+email list is archived at the GNU Project.
+@emph{All email must be in English. This is the only language
+understood in common by all the maintainers.}
+
+@cindex @code{comp.lang.awk} newsgroup
+@quotation CAUTION
+Do @emph{not} try to report bugs in @command{gawk} by
+posting to the Usenet/Internet newsgroup @code{comp.lang.awk}.
+The @command{gawk} developers do occasionally read this newsgroup,
+but there is no guarantee that we will see your posting. The steps described
+here are the only officially recognized way for reporting bugs.
+Really.
+@end quotation
+
+@quotation NOTE
+Many distributions of GNU/Linux and the various BSD-based operating systems
+have their own bug reporting systems. If you report a bug using your distribution's
+bug reporting system, you should also send a copy to
+@EMAIL{bug-gawk@@gnu.org,bug-gawk at gnu dot org}.
+
+This is for two reasons. First, although some distributions forward
+bug reports ``upstream'' to the GNU mailing list, many don't, so there is a good
+chance that the @command{gawk} maintainers won't even see the bug report! Second,
+mail to the GNU list is archived, and having everything at the GNU project
+keeps things self-contained and not dependant on other organizations.
+@end quotation
+
+Non-bug suggestions are always welcome as well. If you have questions
+about things that are unclear in the documentation or are just obscure
+features, ask on the bug list; we will try to help you out if we can.
+
+If you find bugs in one of the non-Unix ports of @command{gawk},
+send an email to the bug list, with a copy to the
+person who maintains that port. They are named in the following list,
+as well as in the @file{README} file in the @command{gawk} distribution.
+Information in the @file{README} file should be considered authoritative
+if it conflicts with this @value{DOCUMENT}.
+
+The people maintaining the various @command{gawk} ports are:
+
+@c put the index entries outside the table, for docbook
+@cindex Buening, Andreas
+@cindex Deifik, Scott
+@cindex Malmberg, John
+@cindex Pitts, Dave
+@cindex Robbins, Arnold
+@cindex Zaretskii, Eli
+@multitable {MS-Windows with MinGW} {123456789012345678901234567890123456789001234567890}
+@item Unix and POSIX systems @tab Arnold Robbins, @EMAIL{arnold@@skeeve.com,arnold at skeeve dot com}.
+
+@item MS-DOS with DJGPP @tab Scott Deifik, @EMAIL{scottd.mail@@sbcglobal.net,scottd dot mail at sbcglobal dot net}.
+
+@item MS-Windows with MinGW @tab Eli Zaretskii, @EMAIL{eliz@@gnu.org,eliz at gnu dot org}.
+
+@c Leave this in the print version on purpose.
+@c OS/2 is not mentioned anywhere else in the print version though.
+@item OS/2 @tab Andreas Buening, @EMAIL{andreas.buening@@nexgo.de,andreas dot buening at nexgo dot de}.
+
+@item VMS @tab John Malmberg, @EMAIL{wb8tyw@@qsl.net,wb8tyw at qsl.net}.
+
+@item z/OS (OS/390) @tab Dave Pitts, @EMAIL{dpitts@@cozx.com,dpitts at cozx dot com}.
+@end multitable
+
+If your bug is also reproducible under Unix, send a copy of your
+report to the @EMAIL{bug-gawk@@gnu.org,bug-gawk at gnu dot org} email list as well.
+
+@node Other Versions
+@appendixsec Other Freely Available @command{awk} Implementations
+@cindex @command{awk}, implementations
+@ignore
+From: emory!amc.com!brennan (Michael Brennan)
+Subject: C++ comments in awk programs
+To: arnold@gnu.ai.mit.edu (Arnold Robbins)
+Date: Wed, 4 Sep 1996 08:11:48 -0700 (PDT)
+
+@end ignore
+@cindex Brennan, Michael
+@ifnotdocbook
+@quotation
+@i{It's kind of fun to put comments like this in your awk code.}@*
+@ @ @ @ @ @ @code{// Do C++ comments work? answer: yes! of course}
+@author Michael Brennan
+@end quotation
+@end ifnotdocbook
+
+@docbook
+<blockquote><attribution>Michael Brennan</attribution>
+<literallayout><emphasis>It's kind of fun to put comments like this in your awk code.</emphasis>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<literal>// Do C++ comments work? answer: yes! of course</literal></literallayout>
+</blockquote>
+@end docbook
+
+There are a number of other freely available @command{awk} implementations.
+This @value{SECTION} briefly describes where to get them:
+
+@table @asis
+@cindex Kernighan, Brian
+@cindex source code, Brian Kernighan's @command{awk}
+@cindex @command{awk}, versions of, See Also Brian Kernighan's @command{awk}
+@cindex Brian Kernighan's @command{awk}, source code
+@item Unix @command{awk}
+Brian Kernighan, one of the original designers of Unix @command{awk},
+has made his implementation of
+@command{awk} freely available.
+You can retrieve this version via
+@uref{http://www.cs.princeton.edu/~bwk, his home page}.
+It is available in several archive formats:
+
+@table @asis
+@item Shell archive
+@uref{http://www.cs.princeton.edu/~bwk/btl.mirror/awk.shar}
+
+@item Compressed @command{tar} file
+@uref{http://www.cs.princeton.edu/~bwk/btl.mirror/awk.tar.gz}
+
+@item Zip file
+@uref{http://www.cs.princeton.edu/~bwk/btl.mirror/awk.zip}
+@end table
+
+@cindex @command{git} utility
+You can also retrieve it from Git Hub:
+
+@example
+git clone git://github.com/onetrueawk/awk bwkawk
+@end example
+
+@noindent
+This command creates a copy of the @uref{http://git-scm.com, Git}
+repository in a directory named @file{bwkawk}. If you leave that argument
+off the @command{git} command line, the repository copy is created in a
+directory named @file{awk}.
+
+This version requires an ISO C (1990 standard) compiler; the C compiler
+from GCC (the GNU Compiler Collection) works quite nicely.
+
+@DBXREF{Common Extensions}
+for a list of extensions in this @command{awk} that are not in POSIX @command{awk}.
+
+As a side note, Dan Bornstein has created a Git repository tracking
+all the versions of BWK @command{awk} that he could find. It's
+available at @uref{git://github.com/danfuzz/one-true-awk}.
+
+@cindex Brennan, Michael
+@cindex @command{mawk} utility
+@cindex source code, @command{mawk}
+@item @command{mawk}
+Michael Brennan wrote an independent implementation of @command{awk},
+called @command{mawk}. It is available under the
+@ifclear FOR_PRINT
+GPL (@pxref{Copying}),
+@end ifclear
+@ifset FOR_PRINT
+GPL,
+@end ifset
+just as @command{gawk} is.
+
+The original distribution site for the @command{mawk} source code
+no longer has it. A copy is available at
+@uref{http://www.skeeve.com/gawk/mawk1.3.3.tar.gz}.
+
+In 2009, Thomas Dickey took on @command{mawk} maintenance.
+Basic information is available on
+@uref{http://www.invisible-island.net/mawk, the project's web page}.
+The download URL is
+@url{http://invisible-island.net/datafiles/release/mawk.tar.gz}.
+
+Once you have it,
+@command{gunzip} may be used to decompress this file. Installation
+is similar to @command{gawk}'s
+(@pxref{Unix Installation}).
+
+@DBXREF{Common Extensions}
+for a list of extensions in @command{mawk} that are not in POSIX @command{awk}.
+
+@cindex Sumner, Andrew
+@cindex @command{awka} compiler for @command{awk}
+@cindex source code, @command{awka}
+@item @command{awka}
+Written by Andrew Sumner,
+@command{awka} translates @command{awk} programs into C, compiles them,
+and links them with a library of functions that provides the core
+@command{awk} functionality.
+It also has a number of extensions.
+
+The @command{awk} translator is released under the GPL, and the library
+is under the LGPL.
+
+To get @command{awka}, go to @url{http://sourceforge.net/projects/awka}.
+@c You can reach Andrew Sumner at @email{andrew@@zbcom.net}.
+@c andrewsumner@@yahoo.net
+
+The project seems to be frozen; no new code changes have been made
+since approximately 2003.
+
+@cindex Beebe, Nelson H.F.@:
+@cindex @command{pawk} (profiling version of Brian Kernighan's @command{awk})
+@cindex source code, @command{pawk}
+@item @command{pawk}
+Nelson H.F.@: Beebe at the University of Utah has modified
+BWK @command{awk} to provide timing and profiling information.
+It is different from @command{gawk} with the @option{--profile} option
+(@pxref{Profiling}),
+in that it uses CPU-based profiling, not line-count
+profiling. You may find it at either
+@uref{ftp://ftp.math.utah.edu/pub/pawk/pawk-20030606.tar.gz}
+or
+@uref{http://www.math.utah.edu/pub/pawk/pawk-20030606.tar.gz}.
+
+@item Busybox Awk
+@cindex Busybox Awk
+@cindex source code, Busybox Awk
+Busybox is a GPL-licensed program providing small versions of many
+applications within a single executable. It is aimed at embedded systems.
+It includes a full implementation of POSIX @command{awk}. When building
+it, be careful not to do @samp{make install} as it will overwrite
+copies of other applications in your @file{/usr/local/bin}. For more
+information, see the @uref{http://busybox.net, project's home page}.
+
+@cindex OpenSolaris
+@cindex Solaris, POSIX-compliant @command{awk}
+@cindex source code, Solaris @command{awk}
+@item The OpenSolaris POSIX @command{awk}
+The versions of @command{awk} in @file{/usr/xpg4/bin} and
+@file{/usr/xpg6/bin} on Solaris are more-or-less POSIX-compliant.
+They are based on the @command{awk} from Mortice Kern Systems for PCs.
+We were able to make this code compile and work under GNU/Linux
+with 1--2 hours of work. Making it more generally portable (using
+GNU Autoconf and/or Automake) would take more work, and this
+has not been done, at least to our knowledge.
+
+@cindex Illumos
+@cindex Illumos, POSIX-compliant @command{awk}
+@cindex source code, Illumos @command{awk}
+The source code used to be available from the OpenSolaris website.
+However, that project was ended and the website shut down. Fortunately, the
+@uref{http://wiki.illumos.org/display/illumos/illumos+Home, Illumos project}
+makes this implementation available. You can view the files one at a time from
+@uref{https://github.com/joyent/illumos-joyent/blob/master/usr/src/cmd/awk_xpg4}.
+
+@cindex @command{jawk}
+@cindex Java implementation of @command{awk}
+@cindex source code, @command{jawk}
+@item @command{jawk}
+This is an interpreter for @command{awk} written in Java. It claims
+to be a full interpreter, although because it uses Java facilities
+for I/O and for regexp matching, the language it supports is different
+from POSIX @command{awk}. More information is available on the
+@uref{http://jawk.sourceforge.net, project's home page}.
+
+@item Libmawk
+@cindex libmawk
+@cindex source code, libmawk
+This is an embeddable @command{awk} interpreter derived from
+@command{mawk}. For more information, see
+@uref{http://repo.hu/projects/libmawk/}.
+
+@item @code{pawk}
+@cindex source code, @command{pawk} (Python version)
+@cindex @code{pawk}, @command{awk}-like facilities for Python
+This is a Python module that claims to bring @command{awk}-like
+features to Python. See @uref{https://github.com/alecthomas/pawk}
+for more information. (This is not related to Nelson Beebe's
+modified version of BWK @command{awk}, described earlier.)
+
+@item @w{QSE Awk}
+@cindex QSE Awk
+@cindex source code, QSE Awk
+This is an embeddable @command{awk} interpreter. For more information,
+see @uref{http://code.google.com/p/qse/} and @uref{http://awk.info/?tools/qse}.
+
+@item @command{QTawk}
+@cindex QuikTrim Awk
+@cindex source code, QuikTrim Awk
+This is an independent implementation of @command{awk} distributed
+under the GPL. It has a large number of extensions over standard
+@command{awk} and may not be 100% syntactically compatible with it.
+See @uref{http://www.quiktrim.org/QTawk.html} for more information,
+including the manual and a download link.
+
+The project may also be frozen; no new code changes have been made
+since approximately 2008.
+
+@item Other versions
+See also the ``Versions and implementations'' section of the
+@uref{http://en.wikipedia.org/wiki/Awk_language#Versions_and_implementations,
+Wikipedia article} for information on additional versions.
+
+@end table
+
+@node Installation summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+The @command{gawk} distribution is available from GNU project's main
+distribution site, @code{ftp.gnu.org}. The canonical build recipe is:
+
+@example
+wget http://ftp.gnu.org/gnu/gawk/gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+tar -xvpzf gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+cd gawk-@value{VERSION}.@value{PATCHLEVEL}
+./configure && make && make check
+@end example
+
+@item
+@command{gawk} may be built on non-POSIX systems as well. The currently
+supported systems are MS-Windows using DJGPP, MSYS, MinGW and Cygwin,
+@ifclear FOR_PRINT
+OS/2 using EMX,
+@end ifclear
+and both Vax/VMS and OpenVMS.
+Instructions for each system are included in this @value{CHAPTER}.
+
+@item
+Bug reports should be sent via email to @email{bug-gawk@@gnu.org}.
+Bug reports should be in English, and should include the version of @command{gawk},
+how it was compiled, and a short program and @value{DF} which demonstrate
+the problem.
+
+@item
+There are a number of other freely available @command{awk}
+implementations. Many are POSIX compliant; others are less so.
+
+@end itemize
+
+
+@ifclear FOR_PRINT
+@node Notes
+@appendix Implementation Notes
+@cindex @command{gawk}, implementation issues
+@cindex implementation issues, @command{gawk}
+
+This appendix contains information mainly of interest to implementers and
+maintainers of @command{gawk}. Everything in it applies specifically to
+@command{gawk} and not to other implementations.
+
+@menu
+* Compatibility Mode:: How to disable certain @command{gawk}
+ extensions.
+* Additions:: Making Additions To @command{gawk}.
+* Future Extensions:: New features that may be implemented one day.
+* Implementation Limitations:: Some limitations of the implementation.
+* Extension Design:: Design notes about the extension API.
+* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
+@end menu
+
+@node Compatibility Mode
+@appendixsec Downward Compatibility and Debugging
+@cindex @command{gawk}, implementation issues, downward compatibility
+@cindex @command{gawk}, implementation issues, debugging
+@cindex troubleshooting, @command{gawk}
+@cindex implementation issues@comma{} @command{gawk}, debugging
+
+@xref{POSIX/GNU},
+for a summary of the GNU extensions to the @command{awk} language and program.
+All of these features can be turned off by invoking @command{gawk} with the
+@option{--traditional} option or with the @option{--posix} option.
+
+If @command{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
+is one more option available on the command line:
+
+@table @code
+@item -Y
+@itemx --parsedebug
+Print out the parse stack information as the program is being parsed.
+@end table
+
+This option is intended only for serious @command{gawk} developers
+and not for the casual user. It probably has not even been compiled into
+your version of @command{gawk}, since it slows down execution.
+
+@node Additions
+@appendixsec Making Additions to @command{gawk}
+
+If you find that you want to enhance @command{gawk} in a significant
+fashion, you are perfectly free to do so. That is the point of having
+free software; the source code is available and you are free to change
+it as you want (@pxref{Copying}).
+
+This @value{SECTION} discusses the ways you might want to change @command{gawk}
+as well as any considerations you should bear in mind.
+
+@menu
+* Accessing The Source:: Accessing the Git repository.
+* Adding Code:: Adding code to the main body of
+ @command{gawk}.
+* New Ports:: Porting @command{gawk} to a new operating
+ system.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
+@end menu
+
+@node Accessing The Source
+@appendixsubsec Accessing The @command{gawk} Git Repository
+
+As @command{gawk} is Free Software, the source code is always available.
+@DBREF{Gawk Distribution} describes how to get and build the formal,
+released versions of @command{gawk}.
+
+@cindex @command{git} utility
+However, if you want to modify @command{gawk} and contribute back your
+changes, you will probably wish to work with the development version.
+To do so, you will need to access the @command{gawk} source code
+repository. The code is maintained using the
+@uref{http://git-scm.com, Git distributed version control system}.
+You will need to install it if your system doesn't have it.
+Once you have done so, use the command:
+
+@example
+git clone git://git.savannah.gnu.org/gawk.git
+@end example
+
+@noindent
+This clones the @command{gawk} repository. If you are behind a
+firewall that does not allow you to use the Git native protocol, you
+can still access the repository using:
+
+@example
+git clone http://git.savannah.gnu.org/r/gawk.git
+@end example
+
+Once you have made changes, you can use @samp{git diff} to produce a
+patch, and send that to the @command{gawk} maintainer; see @ref{Bugs},
+for how to do that.
+
+Once upon a time there was Git--CVS gateway for use by people who could
+not install Git. However, this gateway no longer works, so you may have
+better luck using a more modern version control system like Bazaar,
+that has a Git plug-in for working with Git repositories.
+
+@node Adding Code
+@appendixsubsec Adding New Features
+
+@cindex adding, features to @command{gawk}
+@cindex features, adding to @command{gawk}
+@cindex @command{gawk}, features, adding
+You are free to add any new features you like to @command{gawk}.
+However, if you want your changes to be incorporated into the @command{gawk}
+distribution, there are several steps that you need to take in order to
+make it possible to include them:
+
+@enumerate 1
+@item
+Before building the new feature into @command{gawk} itself,
+consider writing it as an extension
+(@pxref{Dynamic Extensions}).
+If that's not possible, continue with the rest of the steps in this list.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your changes, you must either place
+those changes in the public domain and submit a signed statement to that
+effect, or assign the copyright in your changes to the FSF.
+Both of these actions are easy to do and @emph{many} people have done so
+already. If you have questions, please contact me
+(@pxref{Bugs}),
+or @EMAIL{assign@@gnu.org,assign at gnu dot org}.
+
+@item
+Get the latest version.
+It is much easier for me to integrate changes if they are relative to
+the most recent distributed version of @command{gawk}, or better yet,
+relative to the latest code in the Git repository. If your version of
+@command{gawk} is very old, I may not be able to integrate your changes at all.
+(@xref{Getting},
+for information on getting the latest version of @command{gawk}.)
+
+@item
+@ifnotinfo
+Follow the @uref{http://www.gnu.org/prep/standards/, @cite{GNU Coding Standards}}.
+@end ifnotinfo
+@ifinfo
+See @inforef{Top, , Version, standards, GNU Coding Standards}.
+@end ifinfo
+This document describes how GNU software should be written. If you haven't
+read it, please do so, preferably @emph{before} starting to modify @command{gawk}.
+(The @cite{GNU Coding Standards} are available from
+the GNU Project's
+@uref{http://www.gnu.org/prep/standards_toc.html, website}.
+Texinfo, Info, and DVI versions are also available.)
+
+@cindex @command{gawk}, coding style in
+@item
+Use the @command{gawk} coding style.
+The C code for @command{gawk} follows the instructions in the
+@cite{GNU Coding Standards}, with minor exceptions. The code is formatted
+using the traditional ``K&R'' style, particularly as regards to the placement
+of braces and the use of TABs. In brief, the coding rules for @command{gawk}
+are as follows:
+
+@itemize @value{BULLET}
+@item
+Use ANSI/ISO style (prototype) function headers when defining functions.
+
+@item
+Put the name of the function at the beginning of its own line.
+
+@item
+Put the return type of the function, even if it is @code{int}, on the
+line above the line with the name and arguments of the function.
+
+@item
+Put spaces around parentheses used in control structures
+(@code{if}, @code{while}, @code{for}, @code{do}, @code{switch},
+and @code{return}).
+
+@item
+Do not put spaces in front of parentheses used in function calls.
+
+@item
+Put spaces around all C operators and after commas in function calls.
+
+@item
+Do not use the comma operator to produce multiple side effects, except
+in @code{for} loop initialization and increment parts, and in macro bodies.
+
+@item
+Use real TABs for indenting, not spaces.
+
+@item
+Use the ``K&R'' brace layout style.
+
+@item
+Use comparisons against @code{NULL} and @code{'\0'} in the conditions of
+@code{if}, @code{while}, and @code{for} statements, as well as in the @code{case}s
+of @code{switch} statements, instead of just the
+plain pointer or character value.
+
+@item
+Use @code{true} and @code{false} for @code{bool} values,
+the @code{NULL} symbolic constant for pointer values,
+and the character constant @code{'\0'} where appropriate, instead of @code{1}
+and @code{0}.
+
+@item
+Provide one-line descriptive comments for each function.
+
+@item
+Do not use the @code{alloca()} function for allocating memory off the
+stack. Its use causes more portability trouble than is worth the minor
+benefit of not having to free the storage. Instead, use @code{malloc()}
+and @code{free()}.
+
+@item
+Do not use comparisons of the form @samp{! strcmp(a, b)} or similar.
+As Henry Spencer once said, ``@code{strcmp()} is not a boolean!''
+Instead, use @samp{strcmp(a, b) == 0}.
+
+@item
+If adding new bit flag values, use explicit hexadecimal constants
+(@code{0x001}, @code{0x002}, @code{0x004}, and son on) instead of
+shifting one left by successive amounts (@samp{(1<<0)}, @samp{(1<<1)},
+and so on).
+@end itemize
+
+@quotation NOTE
+If I have to reformat your code to follow the coding style used in
+@command{gawk}, I may not bother to integrate your changes at all.
+@end quotation
+
+@cindex Texinfo
+@item
+Update the documentation.
+Along with your new code, please supply new sections and/or chapters
+for this @value{DOCUMENT}. If at all possible, please use real
+Texinfo, instead of just supplying unformatted ASCII text (although
+even that is better than no documentation at all).
+Conventions to be followed in @cite{@value{TITLE}} are provided
+after the @samp{@@bye} at the end of the Texinfo source file.
+If possible, please update the @command{man} page as well.
+
+You will also have to sign paperwork for your documentation changes.
+
+@cindex @command{git} utility
+@item
+Submit changes as unified diffs.
+Use @samp{diff -u -r -N} to compare
+the original @command{gawk} source tree with your version.
+I recommend using the GNU version of @command{diff}, or best of all,
+@samp{git diff} or @samp{git format-patch}.
+Send the output produced by @command{diff} to me when you
+submit your changes.
+(@xref{Bugs}, for the electronic mail
+information.)
+
+Using this format makes it easy for me to apply your changes to the
+master version of the @command{gawk} source code (using @code{patch}).
+If I have to apply the changes manually, using a text editor, I may
+not do so, particularly if there are lots of changes.
+
+@item
+Include an entry for the @file{ChangeLog} file with your submission.
+This helps further minimize the amount of work I have to do,
+making it easier for me to accept patches.
+It is simplest if you just make this part of your diff.
+@end enumerate
+
+Although this sounds like a lot of work, please remember that while you
+may write the new code, I have to maintain it and support it. If it
+isn't possible for me to do that with a minimum of extra work, then I
+probably will not.
+
+@node New Ports
+@appendixsubsec Porting @command{gawk} to a New Operating System
+@cindex portability, @command{gawk}
+@cindex operating systems, porting @command{gawk} to
+
+@cindex porting @command{gawk}
+If you want to port @command{gawk} to a new operating system, there are
+several steps:
+
+@enumerate 1
+@item
+Follow the guidelines in
+@ifinfo
+@ref{Adding Code},
+@end ifinfo
+@ifnotinfo
+the previous @value{SECTION}
+@end ifnotinfo
+concerning coding style, submission of diffs, and so on.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your code, you must either place
+your code in the public domain and submit a signed statement to that
+effect, or assign the copyright in your code to the FSF.
+Both of these actions are easy to do and @emph{many} people have done so
+already. If you have questions, please contact me, or
+@email{gnu@@gnu.org}.
+
+@item
+When doing a port, bear in mind that your code must coexist peacefully
+with the rest of @command{gawk} and the other ports. Avoid gratuitous
+changes to the system-independent parts of the code. If at all possible,
+avoid sprinkling @samp{#ifdef}s just for your port throughout the
+code.
+
+If the changes needed for a particular system affect too much of the
+code, I probably will not accept them. In such a case, you can, of course,
+distribute your changes on your own, as long as you comply
+with the GPL
+(@pxref{Copying}).
+
+@item
+A number of the files that come with @command{gawk} are maintained by other
+people. Thus, you should not change them
+unless it is for a very good reason; i.e., changes are not out of the
+question, but changes to these files are scrutinized extra carefully.
+The files are
+@file{dfa.c},
+@file{dfa.h},
+@file{getopt.c},
+@file{getopt.h},
+@file{getopt1.c},
+@file{getopt_int.h},
+@file{gettext.h},
+@file{regcomp.c},
+@file{regex.c},
+@file{regex.h},
+@file{regex_internal.c},
+@file{regex_internal.h},
+and
+@file{regexec.c}.
+
+@item
+A number of other files are provided by the GNU
+Autotools (Autoconf, Automake, and GNU @command{gettext}).
+You should not change them either, unless it is for a very
+good reason. The files are
+@file{ABOUT-NLS},
+@file{config.guess},
+@file{config.rpath},
+@file{config.sub},
+@file{depcomp},
+@file{INSTALL},
+@file{install-sh},
+@file{missing},
+@file{mkinstalldirs},
+@file{xalloc.h},
+and
+@file{ylwrap}.
+
+@item
+Be willing to continue to maintain the port.
+Non-Unix operating systems are supported by volunteers who maintain
+the code needed to compile and run @command{gawk} on their systems. If no-one
+volunteers to maintain a port, it becomes unsupported and it may
+be necessary to remove it from the distribution.
+
+@item
+Supply an appropriate @file{gawkmisc.???} file.
+Each port has its own @file{gawkmisc.???} that implements certain
+operating system specific functions. This is cleaner than a plethora of
+@samp{#ifdef}s scattered throughout the code. The @file{gawkmisc.c} in
+the main source directory includes the appropriate
+@file{gawkmisc.???} file from each subdirectory.
+Be sure to update it as well.
+
+Each port's @file{gawkmisc.???} file has a suffix reminiscent of the machine
+or operating system for the port---for example, @file{pc/gawkmisc.pc} and
+@file{vms/gawkmisc.vms}. The use of separate suffixes, instead of plain
+@file{gawkmisc.c}, makes it possible to move files from a port's subdirectory
+into the main subdirectory, without accidentally destroying the real
+@file{gawkmisc.c} file. (Currently, this is only an issue for the
+PC operating system ports.)
+
+@item
+Supply a @file{Makefile} as well as any other C source and header files that are
+necessary for your operating system. All your code should be in a
+separate subdirectory, with a name that is the same as, or reminiscent
+of, either your operating system or the computer system. If possible,
+try to structure things so that it is not necessary to move files out
+of the subdirectory into the main source directory. If that is not
+possible, then be sure to avoid using names for your files that
+duplicate the names of files in the main source directory.
+
+@item
+Update the documentation.
+Please write a section (or sections) for this @value{DOCUMENT} describing the
+installation and compilation steps needed to compile and/or install
+@command{gawk} for your system.
+@end enumerate
+
+Following these steps makes it much easier to integrate your changes
+into @command{gawk} and have them coexist happily with other
+operating systems' code that is already there.
+
+In the code that you supply and maintain, feel free to use a
+coding style and brace layout that suits your taste.
+
+@node Derived Files
+@appendixsubsec Why Generated Files Are Kept In Git
+
+@cindex Git, use of for @command{gawk} source code
+@c From emails written March 22, 2012, to the gawk developers list.
+
+If you look at the @command{gawk} source in the Git
+repository, you will notice that it includes files that are automatically
+generated by GNU infrastructure tools, such as @file{Makefile.in} from
+Automake and even @file{configure} from Autoconf.
+
+This is different from many Free Software projects that do not store
+the derived files, because that keeps the repository less cluttered,
+and it is easier to see the substantive changes when comparing versions
+and trying to understand what changed between commits.
+
+However, there are several reasons why the @command{gawk} maintainer
+likes to have everything in the repository.
+
+First, because it is then easy to reproduce any given version completely,
+without relying upon the availability of (older, likely obsolete, and
+maybe even impossible to find) other tools.
+
+As an extreme example, if you ever even think about trying to compile,
+oh, say, the V7 @command{awk}, you will discover that not only do you
+have to bootstrap the V7 @command{yacc} to do so, but you also need the
+V7 @command{lex}. And the latter is pretty much impossible to bring up
+on a modern GNU/Linux system.@footnote{We tried. It was painful.}
+
+(Or, let's say @command{gawk} 1.2 required @command{bison} whatever-it-was
+in 1989 and that there was no @file{awkgram.c} file in the repository. Is
+there a guarantee that we could find that @command{bison} version? Or that
+@emph{it} would build?)
+
+If the repository has all the generated files, then it's easy to just check
+them out and build. (Or @emph{easier}, depending upon how far back we go.)
+
+And that brings us to the second (and stronger) reason why all the files
+really need to be in Git. It boils down to who do you cater
+to---the @command{gawk} developer(s), or the user who just wants to check
+out a version and try it out?
+
+The @command{gawk} maintainer
+wants it to be possible for any interested @command{awk} user in the
+world to just clone the repository, check out the branch of interest and
+build it. Without their having to have the correct version(s) of the
+autotools.@footnote{There is one GNU program that is (in our opinion)
+severely difficult to bootstrap from the Git repository. For
+example, on the author's old (but still working) PowerPC Macintosh with
+Mac OS X 10.5, it was necessary to bootstrap a ton of software, starting
+with Git itself, in order to try to work with the latest code.
+It's not pleasant, and especially on older systems, it's a big waste
+of time.
+
+Starting with the latest tarball was no picnic either. The maintainers
+had dropped @file{.gz} and @file{.bz2} files and only distribute
+@file{.tar.xz} files. It was necessary to bootstrap @command{xz} first!}
+That is the point of the @file{bootstrap.sh} file. It touches the
+various other files in the right order such that
+
+@example
+# The canonical incantation for building GNU software:
+./bootstrap.sh && ./configure && make
+@end example
+
+@noindent
+will @emph{just work}.
+
+This is extremely important for the @code{master} and
+@code{gawk-@var{X}.@var{Y}-stable} branches.
+
+Further, the @command{gawk} maintainer would argue that it's also
+important for the @command{gawk} developers. When he tried to check out
+the @code{xgawk} branch@footnote{A branch (since removed) created by one of the other
+developers that did not include the generated files.} to build it, he
+couldn't. (No @file{ltmain.sh} file, and he had no idea how to create it,
+and that was not the only problem.)
+
+He felt @emph{extremely} frustrated. With respect to that branch,
+the maintainer is no different than Jane User who wants to try to build
+@code{gawk-4.1-stable} or @code{master} from the repository.
+
+Thus, the maintainer thinks that it's not just important, but critical,
+that for any given branch, the above incantation @emph{just works}.
+
+@c Added 9/2014:
+A third reason to have all the files is that without them, using @samp{git
+bisect} to try to find the commit that introduced a bug is exceedingly
+difficult. The maintainer tried to do that on another project that
+requires running bootstrapping scripts just to create @command{configure}
+and so on; it was really painful. When the repository is self-contained,
+using @command{git bisect} in it is very easy.
+
+@c So - that's my reasoning and philosophy.
+
+What are some of the consequences and/or actions to take?
+
+@enumerate 1
+@item
+We don't mind that there are differing files in the different branches
+as a result of different versions of the autotools.
+
+@enumerate A
+@item
+It's the maintainer's job to merge them and he will deal with it.
+
+@item
+He is really good at @samp{git diff x y > /tmp/diff1 ; gvim /tmp/diff1} to
+remove the diffs that aren't of interest in order to review code.
+@end enumerate
+
+@item
+It would certainly help if everyone used the same versions of the GNU tools
+as he does, which in general are the latest released versions of
+Automake,
+Autoconf,
+@command{bison},
+and
+GNU @command{gettext}.
+
+@ignore
+If it would help if I sent out an ``I just upgraded to version x.y
+of tool Z'' kind of message to this list, I can do that. Up until
+now it hasn't been a real issue since I'm the only one who's been
+dorking with the configuration machinery.
+@end ignore
+
+@c @enumerate A
+@c @item
+Installing from source is quite easy. It's how the maintainer worked for years
+(and still works).
+He had @file{/usr/local/bin} at the front of his @env{PATH} and just did:
+
+@example
+wget http://ftp.gnu.org/gnu/@var{package}/@var{package}-@var{x}.@var{y}.@var{z}.tar.gz
+tar -xpzvf @var{package}-@var{x}.@var{y}.@var{z}.tar.gz
+cd @var{package}-@var{x}.@var{y}.@var{z}
+./configure && make && make check
+make install # as root
+@end example
+
+@c @item
+@ignore
+These days the maintainer uses Ubuntu 12.04 which is medium current, but
+he is already doing the above for Automake, Autoconf, and @command{bison}.
+@end ignore
+
+@ignore
+(C. Rant: Recent Linux versions with GNOME 3 really suck. What
+ are all those people thinking? Fedora 15 was such a bust it drove
+ me to Ubuntu, but Ubuntu 11.04 and 11.10 are totally unusable from
+ a UI perspective. Bleah.)
+@end ignore
+@c @end enumerate
+
+@ignore
+@item
+If someone still feels really strongly about all this, then perhaps they
+can have two branches, one for their development with just the clean
+changes, and one that is buildable (xgawk and xgawk-buildable, maybe).
+Or, as I suggested in another mail, make commits in pairs, the first with
+the "real" changes and the second with "everything else needed for
+ building".
+@end ignore
+@end enumerate
+
+Most of the above was originally written by the maintainer to other
+@command{gawk} developers. It raised the objection from one of
+the developers ``@dots{} that anybody pulling down the source from
+Git is not an end user.''
+
+However, this is not true. There are ``power @command{awk} users''
+who can build @command{gawk} (using the magic incantation shown previously)
+but who can't program in C. Thus, the major branches should be
+kept buildable all the time.
+
+It was then suggested that there be a @command{cron} job to create
+nightly tarballs of ``the source.'' Here, the problem is that there
+are source trees, corresponding to the various branches! So,
+nightly tarballs aren't the answer, especially as the repository can go
+for weeks without significant change being introduced.
+
+Fortunately, the Git server can meet this need. For any given
+branch named @var{branchname}, use:
+
+@example
+wget http://git.savannah.gnu.org/cgit/gawk.git/snapshot/gawk-@var{branchname}.tar.gz
+@end example
+
+@noindent
+to retrieve a snapshot of the given branch.
+
+@node Future Extensions
+@appendixsec Probable Future Extensions
+@ignore
+From emory!scalpel.netlabs.com!lwall Tue Oct 31 12:43:17 1995
+Return-Path: <emory!scalpel.netlabs.com!lwall>
+Message-Id: <9510311732.AA28472@scalpel.netlabs.com>
+To: arnold@skeeve.atl.ga.us (Arnold D. Robbins)
+Subject: Re: May I quote you?
+In-Reply-To: Your message of "Tue, 31 Oct 95 09:11:00 EST."
+ <m0tAHPQ-00014MC@skeeve.atl.ga.us>
+Date: Tue, 31 Oct 95 09:32:46 -0800
+From: Larry Wall <emory!scalpel.netlabs.com!lwall>
+
+: Greetings. I am working on the release of gawk 3.0. Part of it will be a
+: thoroughly updated manual. One of the sections deals with planned future
+: extensions and enhancements. I have the following at the beginning
+: of it:
+:
+: @cindex PERL
+: @cindex Wall, Larry
+: @display
+: @i{AWK is a language similar to PERL, only considerably more elegant.} @*
+: Arnold Robbins
+: @sp 1
+: @i{Hey!} @*
+: Larry Wall
+: @end display
+:
+: Before I actually release this for publication, I wanted to get your
+: permission to quote you. (Hopefully, in the spirit of much of GNU, the
+: implied humor is visible... :-)
+
+I think that would be fine.
+
+Larry
+@end ignore
+@cindex Perl
+@cindex Wall, Larry
+@cindex Robbins, Arnold
+@quotation
+@i{AWK is a language similar to PERL, only considerably more elegant.}
+@author Arnold Robbins
+@end quotation
+
+@quotation
+@i{Hey!}
+@author Larry Wall
+@end quotation
+
+The @file{TODO} file in the @code{master} branch of the @command{gawk}
+Git repository lists possible future enhancements. Some of these relate
+to the source code, and others to possible new features. Please see
+that file for the list.
+@xref{Additions},
+if you are interested in tackling any of the projects listed there.
+
+@node Implementation Limitations
+@appendixsec Some Limitations of the Implementation
+
+This following table describes limits of @command{gawk} on a Unix-like
+system (although it is variable even then). Other systems may have
+different limits.
+
+@multitable @columnfractions .40 .60
+@headitem Item @tab Limit
+@item Characters in a character class @tab 2^(number of bits per byte)
+@item Length of input record @tab @code{MAX_INT}
+@item Length of output record @tab Unlimited
+@item Length of source line @tab Unlimited
+@item Number of fields in a record @tab @code{MAX_LONG}
+@item Number of file redirections @tab Unlimited
+@item Number of input records in one file @tab @code{MAX_LONG}
+@item Number of input records total @tab @code{MAX_LONG}
+@item Number of pipe redirections @tab min(number of processes per user, number of open files)
+@item Numeric values @tab Double-precision floating point (if not using MPFR)
+@item Size of a field @tab @code{MAX_INT}
+@item Size of a literal string @tab @code{MAX_INT}
+@item Size of a printf string @tab @code{MAX_INT}
+@end multitable
+
+@node Extension Design
+@appendixsec Extension API Design
+
+This @value{SECTION} documents the design of the extension API,
+including a discussion of some of the history and problems that needed
+to be solved.
+
+The first version of extensions for @command{gawk} was developed in
+the mid-1990s and released with @command{gawk} 3.1 in the late 1990s.
+The basic mechanisms and design remained unchanged for close to 15 years,
+until 2012.
+
+The old extension mechanism used data types and functions from
+@command{gawk} itself, with a ``clever hack'' to install extension
+functions.
+
+@command{gawk} included some sample extensions, of which a few were
+really useful. However, it was clear from the outset that the extension
+mechanism was bolted onto the side and was not really well thought out.
+
+@menu
+* Old Extension Problems:: Problems with the old mechanism.
+* Extension New Mechanism Goals:: Goals for the new mechanism.
+* Extension Other Design Decisions:: Some other design decisions.
+* Extension Future Growth:: Some room for future growth.
+@end menu
+
+@node Old Extension Problems
+@appendixsubsec Problems With The Old Mechanism
+
+The old extension mechanism had several problems:
+
+@itemize @value{BULLET}
+@item
+It depended heavily upon @command{gawk} internals. Any time the
+@code{NODE} structure@footnote{A critical central data structure
+inside @command{gawk}.} changed, an extension would have to be
+recompiled. Furthermore, to really write extensions required understanding
+something about @command{gawk}'s internal functions. There was some
+documentation in this @value{DOCUMENT}, but it was quite minimal.
+
+@item
+Being able to call into @command{gawk} from an extension required linker
+facilities that are common on Unix-derived systems but that did
+not work on MS-Windows systems; users wanting extensions on MS-Windows
+had to statically link them into @command{gawk}, even though MS-Windows supports
+dynamic loading of shared objects.
+
+@item
+The API would change occasionally as @command{gawk} changed; no compatibility
+between versions was ever offered or planned for.
+@end itemize
+
+Despite the drawbacks, the @command{xgawk} project developers forked
+@command{gawk} and developed several significant extensions. They also
+enhanced @command{gawk}'s facilities relating to file inclusion and
+shared object access.
+
+A new API was desired for a long time, but only in 2012 did the
+@command{gawk} maintainer and the @command{xgawk} developers finally
+start working on it together. More information about the @command{xgawk}
+project is provided in @ref{gawkextlib}.
+
+@node Extension New Mechanism Goals
+@appendixsubsec Goals For A New Mechanism
+
+Some goals for the new API were:
+
+@itemize @value{BULLET}
+@item
+The API should be independent of @command{gawk} internals. Changes in
+@command{gawk} internals should not be visible to the writer of an
+extension function.
+
+@item
+The API should provide @emph{binary} compatibility across @command{gawk}
+releases as long as the API itself does not change.
+
+@item
+The API should enable extensions written in C or C++ to have roughly the
+same ``appearance'' to @command{awk}-level code as @command{awk}
+functions do. This means that extensions should have:
+
+@itemize @value{MINUS}
+@item
+The ability to access function parameters.
+
+@item
+The ability to turn an undefined parameter into an array (call by reference).
+
+@item
+The ability to create, access and update global variables.
+
+@item
+Easy access to all the elements of an array at once (``array flattening'')
+in order to loop over all the element in an easy fashion for C code.
+
+@item
+The ability to create arrays (including @command{gawk}'s true
+arrays of arrays).
+@end itemize
+@end itemize
+
+Some additional important goals were:
+
+@itemize @value{BULLET}
+@item
+The API should use only features in ISO C 90, so that extensions
+can be written using the widest range of C and C++ compilers. The header
+should include the appropriate @samp{#ifdef __cplusplus} and @samp{extern "C"}
+magic so that a C++ compiler could be used. (If using C++, the runtime
+system has to be smart enough to call any constructors and destructors,
+as @command{gawk} is a C program. As of this writing, this has not been
+tested.)
+
+@item
+The API mechanism should not require access to @command{gawk}'s
+symbols@footnote{The @dfn{symbols} are the variables and functions
+defined inside @command{gawk}. Access to these symbols by code
+external to @command{gawk} loaded dynamically at runtime is
+problematic on MS-Windows.} by the compile-time or dynamic linker,
+in order to enable creation of extensions that also work on MS-Windows.
+@end itemize
+
+During development, it became clear that there were other features
+that should be available to extensions, which were also subsequently
+provided:
+
+@itemize @value{BULLET}
+@item
+Extensions should have the ability to hook into @command{gawk}'s
+I/O redirection mechanism. In particular, the @command{xgawk}
+developers provided a so-called ``open hook'' to take over reading
+records. During development, this was generalized to allow
+extensions to hook into input processing, output processing, and
+two-way I/O.
+
+@item
+An extension should be able to provide a ``call back'' function
+to perform cleanup actions when @command{gawk} exits.
+
+@item
+An extension should be able to provide a version string so that
+@command{gawk}'s @option{--version} option can provide information
+about extensions as well.
+@end itemize
+
+The requirement to avoid access to @command{gawk}'s symbols is, at first
+glance, a difficult one to meet.
+
+One design, apparently used by Perl and Ruby and maybe others, would
+be to make the mainline @command{gawk} code into a library, with the
+@command{gawk} utility a small C @code{main()} function linked against
+the library.
+
+This seemed like the tail wagging the dog, complicating build and
+installation and making a simple copy of the @command{gawk} executable
+from one system to another (or one place to another on the same
+system!) into a chancy operation.
+
+Pat Rankin suggested the solution that was adopted.
+@xref{Extension Mechanism Outline}, for the details.
+
+@node Extension Other Design Decisions
+@appendixsubsec Other Design Decisions
+
+As an arbitrary design decision, extensions can read the values of
+predefined variables and arrays (such as @code{ARGV} and @code{FS}), but cannot
+change them, with the exception of @code{PROCINFO}.
+
+The reason for this is to prevent an extension function from affecting
+the flow of an @command{awk} program outside its control. While a real
+@command{awk} function can do what it likes, that is at the discretion
+of the programmer. An extension function should provide a service or
+make a C API available for use within @command{awk}, and not mess with
+@code{FS} or @code{ARGC} and @code{ARGV}.
+
+In addition, it becomes easy to start down a slippery slope. How
+much access to @command{gawk} facilities do extensions need?
+Do they need @code{getline}? What about calling @code{gsub()} or
+compiling regular expressions? What about calling into @command{awk}
+functions? (@emph{That} would be messy.)
+
+In order to avoid these issues, the @command{gawk} developers chose
+to start with the simplest, most basic features that are still truly useful.
+
+Another decision is that although @command{gawk} provides nice things like
+MPFR, and arrays indexed internally by integers, these features are not
+being brought out to the API in order to keep things simple and close to
+traditional @command{awk} semantics. (In fact, arrays indexed internally
+by integers are so transparent that they aren't even documented!)
+
+Additionally, all functions in the API check that their pointer
+input parameters are not @code{NULL}. If they are, they return an error.
+(It is a good idea for extension code to verify that
+pointers received from @command{gawk} are not @code{NULL}.
+Such a thing should not happen, but the @command{gawk} developers
+are only human, and they have been known to occasionally make
+mistakes.)
+
+With time, the API will undoubtedly evolve; the @command{gawk} developers
+expect this to be driven by user needs. For now, the current API seems
+to provide a minimal yet powerful set of features for creating extensions.
+
+@node Extension Future Growth
+@appendixsubsec Room For Future Growth
+
+The API can later be expanded, in two ways:
+
+@itemize @value{BULLET}
+@item
+@command{gawk} passes an ``extension id'' into the extension when it
+first loads the extension. The extension then passes this id back
+to @command{gawk} with each function call. This mechanism allows
+@command{gawk} to identify the extension calling into it, should it need
+to know.
+
+@item
+Similarly, the extension passes a ``name space'' into @command{gawk}
+when it registers each extension function. This accommodates a possible future
+mechanism for grouping extension functions and possibly avoiding name
+conflicts.
+@end itemize
+
+Of course, as of this writing, no decisions have been made with respect
+to any of the above.
+
+@node Old Extension Mechanism
+@appendixsec Compatibility For Old Extensions
+
+@ref{Dynamic Extensions}, describes the supported API and mechanisms
+for writing extensions for @command{gawk}. This API was introduced
+in @value{PVERSION} 4.1. However, for many years @command{gawk}
+provided an extension mechanism that required knowledge of @command{gawk}
+internals and that was not as well designed.
+
+In order to provide a transition period, @command{gawk} @value{PVERSION} 4.1
+continues to support the original extension mechanism.
+This will be true for the life of exactly one major release. This support
+will be withdrawn, and removed from the source code, at the next major
+release.
+
+Briefly, original-style extensions should be compiled by including the
+@file{awk.h} header file in the extension source code. Additionally,
+you must define the identifier @samp{GAWK} when building (use
+@samp{-DGAWK} with Unix-style compilers). Otherwise, the definitions
+in @file{gawkapi.h} will cause conflicts with those in @file{awk.h}
+and your extension will not compile.
+
+Just as in previous versions, you load an old-style extension with the
+@code{extension()} built-in function (which is not otherwise documented).
+This function in turn finds and loads the shared object file containing
+the extension and calls its @code{dl_load()} C routine.
+
+Because original-style and new-style extensions use different initialization
+routines (@code{dl_load()} versus @code{dlload()}), they may safely
+be installed in the same directory (to be found by @env{AWKLIBPATH})
+without conflict.
+
+The @command{gawk} development team strongly recommends that you
+convert any old extensions that you may have to use the new API
+described in @ref{Dynamic Extensions}.
+
+@node Notes summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+@command{gawk}'s extensions can be disabled with either the
+@option{--traditional} option or with the @option{--posix} option.
+The @option{--parsedebug} option is available if @command{gawk} is
+compiled with @samp{-DDEBUG}.
+
+@item
+The source code for @command{gawk} is maintained in a publicly
+accessible Git repository. Anyone may check it out and view the source.
+
+@item
+Contributions to @command{gawk} are welcome. Following the steps
+outlined in this @value{CHAPTER} will make it easier to integrate
+your contributions into the code base.
+This applies both to new feature contributions and to ports to
+additional operating systems.
+
+@item
+@command{gawk} has some limits---generally those that are imposed by
+the machine architecture.
+
+@item
+The extension API design was intended to solve a number of problems
+with the previous extension mechanism, enable features needed by
+the @code{xgawk} project, and provide binary compatibility going forward.
+
+@item
+The previous extension mechanism is still supported in @value{PVERSION} 4.1
+of @command{gawk}, but it @emph{will} be removed in the next major release.
+
+@end itemize
+
+
+@node Basic Concepts
+@appendix Basic Programming Concepts
+@cindex programming, concepts
+@cindex programming, concepts
+
+This @value{APPENDIX} attempts to define some of the basic concepts
+and terms that are used throughout the rest of this @value{DOCUMENT}.
+As this @value{DOCUMENT} is specifically about @command{awk},
+and not about computer programming in general, the coverage here
+is by necessity fairly cursory and simplistic.
+(If you need more background, there are many
+other introductory texts that you should refer to instead.)
+
+@menu
+* Basic High Level:: The high level view.
+* Basic Data Typing:: A very quick intro to data types.
+@end menu
+
+@node Basic High Level
+@appendixsec What a Program Does
+
+@cindex processing data
+At the most basic level, the job of a program is to process
+some input data and produce results.
+@ifnotdocbook
+See @ref{figure-general-flow}.
+@end ifnotdocbook
+@ifdocbook
+See @inlineraw{docbook, <xref linkend="figure-general-flow"/>}.
+@end ifdocbook
+
+@ifnotdocbook
+@float Figure,figure-general-flow
+@caption{General Program Flow}
+@ifinfo
+@center @image{general-program, , , General program flow, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{general-program, , , General program flow}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-general-flow" float="0">
+<title>General Program Flow</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="general-program.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
+
+@cindex compiled programs
+@cindex interpreted programs
+The ``program'' in the figure can be either a compiled
+program@footnote{Compiled programs are typically written
+in lower-level languages such as C, C++, or Ada,
+and then translated, or @dfn{compiled}, into a form that
+the computer can execute directly.}
+(such as @command{ls}),
+or it may be @dfn{interpreted}. In the latter case, a machine-executable
+program such as @command{awk} reads your program, and then uses the
+instructions in your program to process the data.
+
+@cindex programming, basic steps
+When you write a program, it usually consists
+of the following, very basic set of steps,
+@ifnotdocbook
+as shown in @ref{figure-process-flow}:
+@end ifnotdocbook
+@ifdocbook
+as shown in @inlineraw{docbook, <xref linkend="figure-process-flow"/>}:
+@end ifdocbook
+
+@ifnotdocbook
+@float Figure,figure-process-flow
+@caption{Basic Program Steps}
+@ifinfo
+@center @image{process-flow, , , Basic Program Stages, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{process-flow, , , Basic Program Stages}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-process-flow" float="0">
+<title>Basic Program Stages</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="process-flow.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
+
+@table @asis
+@item Initialization
+These are the things you do before actually starting to process
+data, such as checking arguments, initializing any data you need
+to work with, and so on.
+This step corresponds to @command{awk}'s @code{BEGIN} rule
+(@pxref{BEGIN/END}).
+
+If you were baking a cake, this might consist of laying out all the
+mixing bowls and the baking pan, and making sure you have all the
+ingredients that you need.
+
+@item Processing
+This is where the actual work is done. Your program reads data,
+one logical chunk at a time, and processes it as appropriate.
+
+In most programming languages, you have to manually manage the reading
+of data, checking to see if there is more each time you read a chunk.
+@command{awk}'s pattern-action paradigm
+(@pxref{Getting Started})
+handles the mechanics of this for you.
+
+In baking a cake, the processing corresponds to the actual labor:
+breaking eggs, mixing the flour, water, and other ingredients, and then putting the cake
+into the oven.
+
+@item Clean Up
+Once you've processed all the data, you may have things you need to
+do before exiting.
+This step corresponds to @command{awk}'s @code{END} rule
+(@pxref{BEGIN/END}).
+
+After the cake comes out of the oven, you still have to wrap it in
+plastic wrap to keep anyone from tasting it, as well as wash
+the mixing bowls and utensils.
+@end table
+
+@cindex algorithms
+An @dfn{algorithm} is a detailed set of instructions necessary to accomplish
+a task, or process data. It is much the same as a recipe for baking
+a cake. Programs implement algorithms. Often, it is up to you to design
+the algorithm and implement it, simultaneously.
+
+@cindex records
+@cindex fields
+The ``logical chunks'' we talked about previously are called @dfn{records},
+similar to the records a company keeps on employees, a school keeps for
+students, or a doctor keeps for patients.
+Each record has many component parts, such as first and last names,
+date of birth, address, and so on. The component parts are referred
+to as the @dfn{fields} of the record.
+
+The act of reading data is termed @dfn{input}, and that of
+generating results, not too surprisingly, is termed @dfn{output}.
+They are often referred to together as ``input/output,''
+and even more often, as ``I/O'' for short.
+(You will also see ``input'' and ``output'' used as verbs.)
+
+@cindex data-driven languages
+@cindex languages@comma{} data-driven
+@command{awk} manages the reading of data for you, as well as the
+breaking it up into records and fields. Your program's job is to
+tell @command{awk} what to do with the data. You do this by describing
+@dfn{patterns} in the data to look for, and @dfn{actions} to execute
+when those patterns are seen. This @dfn{data-driven} nature of
+@command{awk} programs usually makes them both easier to write
+and easier to read.
+
+@node Basic Data Typing
+@appendixsec Data Values in a Computer
+
+@cindex variables
+In a program,
+you keep track of information and values in things called @dfn{variables}.
+A variable is just a name for a given value, such as @code{first_name},
+@code{last_name}, @code{address}, and so on.
+@command{awk} has several predefined variables, and it has
+special names to refer to the current input record
+and the fields of the record.
+You may also group multiple
+associated values under one name, as an array.
+
+@cindex values, numeric
+@cindex values, string
+@cindex scalar values
+Data, particularly in @command{awk}, consists of either numeric
+values, such as 42 or 3.1415927, or string values.
+String values are essentially anything that's not a number, such as a name.
+Strings are sometimes referred to as @dfn{character data}, since they
+store the individual characters that comprise them.
+Individual variables, as well as numeric and string variables, are
+referred to as @dfn{scalar} values.
+Groups of values, such as arrays, are not scalars.
+
+@ref{Computer Arithmetic}, provided a basic introduction to numeric
+types (integer and floating-point) and how they are used in a computer.
+Please review that information, including a number of caveats that
+were presented.
+
+@cindex null strings
+While you are probably used to the idea of a number without a value (i.e., zero),
+it takes a bit more getting used to the idea of zero-length character data.
+Nevertheless, such a thing exists.
+It is called the @dfn{null string}.
+The null string is character data that has no value.
+In other words, it is empty. It is written in @command{awk} programs
+like this: @code{""}.
+
+Humans are used to working in decimal; i.e., base 10. In base 10,
+numbers go from 0 to 9, and then ``roll over'' into the next
+column. (Remember grade school? 42 = 4 x 10 + 2.)
+
+There are other number bases though. Computers commonly use base 2
+or @dfn{binary}, base 8 or @dfn{octal}, and base 16 or @dfn{hexadecimal}.
+In binary, each column represents two times the value in the column to
+its right. Each column may contain either a 0 or a 1.
+Thus, binary 1010 represents (1 x 8) + (0 x 4) + (1 x 2)
++ (0 x 1), or decimal 10.
+Octal and hexadecimal are discussed more in
+@ref{Nondecimal-numbers}.
+
+At the very lowest level, computers store values as groups of binary digits,
+or @dfn{bits}. Modern computers group bits into groups of eight, called @dfn{bytes}.
+Advanced applications sometimes have to manipulate bits directly,
+and @command{gawk} provides functions for doing so.
+
+Programs are written in programming languages.
+Hundreds, if not thousands, of programming languages exist.
+One of the most popular is the C programming language.
+The C language had a very strong influence on the design of
+the @command{awk} language.
+
+@cindex Kernighan, Brian
+@cindex Ritchie, Dennis
+There have been several versions of C. The first is often referred to
+as ``K&R'' C, after the initials of Brian Kernighan and Dennis Ritchie,
+the authors of the first book on C. (Dennis Ritchie created the language,
+and Brian Kernighan was one of the creators of @command{awk}.)
+
+In the mid-1980s, an effort began to produce an international standard
+for C. This work culminated in 1989, with the production of the ANSI
+standard for C. This standard became an ISO standard in 1990.
+In 1999, a revised ISO C standard was approved and released.
+Where it makes sense, POSIX @command{awk} is compatible with 1999 ISO C.
+
+
+@node Glossary
+@unnumbered Glossary
+
+@table @asis
+@item Action
+A series of @command{awk} statements attached to a rule. If the rule's
+pattern matches an input record, @command{awk} executes the
+rule's action. Actions are always enclosed in braces.
+(@xref{Action Overview}.)
+
+@cindex Ada programming language
+@cindex programming languages, Ada
+@item Ada
+A programming language originally defined by the U.S.@: Department of
+Defense for embedded programming. It was designed to enforce good
+Software Engineering practices.
+
+@cindex Spencer, Henry
+@cindex @command{sed} utility
+@cindex amazing @command{awk} assembler (@command{aaa})
+@item Amazing @command{awk} Assembler
+Henry Spencer at the University of Toronto wrote a retargetable assembler
+completely as @command{sed} and @command{awk} scripts. It is thousands
+of lines long, including machine descriptions for several eight-bit
+microcomputers. It is a good example of a program that would have been
+better written in another language.
+You can get it from @uref{http://awk.info/?awk100/aaa}.
+
+@cindex amazingly workable formatter (@command{awf})
+@cindex @command{awf} (amazingly workable formatter) program
+@item Amazingly Workable Formatter (@command{awf})
+Henry Spencer at the University of Toronto wrote a formatter that accepts
+a large subset of the @samp{nroff -ms} and @samp{nroff -man} formatting
+commands, using @command{awk} and @command{sh}.
+It is available
+from @uref{http://awk.info/?tools/awf}.
+
+@item Anchor
+The regexp metacharacters @samp{^} and @samp{$}, which force the match
+to the beginning or end of the string, respectively.
+
+@cindex ANSI
+@item ANSI
+The American National Standards Institute. This organization produces
+many standards, among them the standards for the C and C++ programming
+languages.
+These standards often become international standards as well. See also
+``ISO.''
+
+@item Array
+A grouping of multiple values under the same name.
+Most languages just provide sequential arrays.
+@command{awk} provides associative arrays.
+
+@item Assertion
+A statement in a program that a condition is true at this point in the program.
+Useful for reasoning about how a program is supposed to behave.
+
+@item Assignment
+An @command{awk} expression that changes the value of some @command{awk}
+variable or data object. An object that you can assign to is called an
+@dfn{lvalue}. The assigned values are called @dfn{rvalues}.
+@xref{Assignment Ops}.
+
+@item Associative Array
+Arrays in which the indices may be numbers or strings, not just
+sequential integers in a fixed range.
+
+@item @command{awk} Language
+The language in which @command{awk} programs are written.
+
+@item @command{awk} Program
+An @command{awk} program consists of a series of @dfn{patterns} and
+@dfn{actions}, collectively known as @dfn{rules}. For each input record
+given to the program, the program's rules are all processed in turn.
+@command{awk} programs may also contain function definitions.
+
+@item @command{awk} Script
+Another name for an @command{awk} program.
+
+@item Bash
+The GNU version of the standard shell
+@ifnotinfo
+(the @b{B}ourne-@b{A}gain @b{SH}ell).
+@end ifnotinfo
+@ifinfo
+(the Bourne-Again SHell).
+@end ifinfo
+See also ``Bourne Shell.''
+
+@item Bit
+Short for ``Binary Digit.''
+All values in computer memory ultimately reduce to binary digits: values
+that are either zero or one.
+Groups of bits may be interpreted differently---as integers,
+floating-point numbers, character data, addresses of other
+memory objects, or other data.
+@command{awk} lets you work with floating-point numbers and strings.
+@command{gawk} lets you manipulate bit values with the built-in
+functions described in
+@ref{Bitwise Functions}.
+
+Computers are often defined by how many bits they use to represent integer
+values. Typical systems are 32-bit systems, but 64-bit systems are
+becoming increasingly popular, and 16-bit systems have essentially
+disappeared.
+
+@item Boolean Expression
+Named after the English mathematician Boole. See also ``Logical Expression.''
+
+@item Bourne Shell
+The standard shell (@file{/bin/sh}) on Unix and Unix-like systems,
+originally written by Steven R.@: Bourne at Bell Laboratories.
+Many shells (Bash, @command{ksh}, @command{pdksh}, @command{zsh}) are
+generally upwardly compatible with the Bourne shell.
+
+@item Braces
+The characters @samp{@{} and @samp{@}}. Braces are used in
+@command{awk} for delimiting actions, compound statements, and function
+bodies.
+
+@item Built-in Function
+The @command{awk} language provides built-in functions that perform various
+numerical, I/O-related, and string computations. Examples are
+@code{sqrt()} (for the square root of a number) and @code{substr()} (for a
+substring of a string).
+@command{gawk} provides functions for timestamp management, bit manipulation,
+array sorting, type checking,
+and runtime string translation.
+(@xref{Built-in}.)
+
+@item Built-in Variable
+@code{ARGC},
+@code{ARGV},
+@code{CONVFMT},
+@code{ENVIRON},
+@code{FILENAME},
+@code{FNR},
+@code{FS},
+@code{NF},
+@code{NR},
+@code{OFMT},
+@code{OFS},
+@code{ORS},
+@code{RLENGTH},
+@code{RSTART},
+@code{RS},
+and
+@code{SUBSEP}
+are the variables that have special meaning to @command{awk}.
+In addition,
+@code{ARGIND},
+@code{BINMODE},
+@code{ERRNO},
+@code{FIELDWIDTHS},
+@code{FPAT},
+@code{IGNORECASE},
+@code{LINT},
+@code{PROCINFO},
+@code{RT},
+and
+@code{TEXTDOMAIN}
+are the variables that have special meaning to @command{gawk}.
+Changing some of them affects @command{awk}'s running environment.
+(@xref{Built-in Variables}.)
+
+@item C
+The system programming language that most GNU software is written in. The
+@command{awk} programming language has C-like syntax, and this @value{DOCUMENT}
+points out similarities between @command{awk} and C when appropriate.
+
+In general, @command{gawk} attempts to be as similar to the 1990 version
+of ISO C as makes sense.
+
+@item C++
+A popular object-oriented programming language derived from C.
+
+@cindex ASCII
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+@cindex character sets (machine character encodings)
+@cindex Unicode
+@item Character Set
+The set of numeric codes used by a computer system to represent the
+characters (letters, numbers, punctuation, etc.) of a particular country
+or place. The most common character set in use today is ASCII (American
+Standard Code for Information Interchange). Many European
+countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1).
+The @uref{http://www.unicode.org, Unicode character set} is
+increasingly popular and standard, and is particularly
+widely used on GNU/Linux systems.
+
+@cindex Kernighan, Brian
+@cindex Bentley, Jon
+@cindex @command{chem} utility
+@item CHEM
+A preprocessor for @command{pic} that reads descriptions of molecules
+and produces @command{pic} input for drawing them.
+It was written in @command{awk}
+by Brian Kernighan and Jon Bentley, and is available from
+@uref{http://netlib.sandia.gov/netlib/typesetting/chem.gz}.
+
+@item Comparison Expression
+A relation that is either true or false, such as @samp{a < b}.
+Comparison expressions are used in @code{if}, @code{while}, @code{do},
+and @code{for}
+statements, and in patterns to select which input records to process.
+(@xref{Typing and Comparison}.)
+
+@cindex compiled programs
+@item Compiler
+A program that translates human-readable source code into
+machine-executable object code. The object code is then executed
+directly by the computer.
+See also ``Interpreter.''
+
+@item Compound Statement
+A series of @command{awk} statements, enclosed in curly braces. Compound
+statements may be nested.
+(@xref{Statements}.)
+
+@item Concatenation
+Concatenating two strings means sticking them together, one after another,
+producing a new string. For example, the string @samp{foo} concatenated with
+the string @samp{bar} gives the string @samp{foobar}.
+(@xref{Concatenation}.)
+
+@item Conditional Expression
+An expression using the @samp{?:} ternary operator, such as
+@samp{@var{expr1} ? @var{expr2} : @var{expr3}}. The expression
+@var{expr1} is evaluated; if the result is true, the value of the whole
+expression is the value of @var{expr2}; otherwise the value is
+@var{expr3}. In either case, only one of @var{expr2} and @var{expr3}
+is evaluated. (@xref{Conditional Exp}.)
+
+@cindex McIlroy, Doug
+@cindex cookie
+@item Cookie
+A peculiar goodie, token, saying or remembrance
+produced by or presented to a program. (With thanks to Professor Doug McIlroy.)
+@ignore
+From: Doug McIlroy <doug@cs.dartmouth.edu>
+Date: Sat, 13 Oct 2012 19:55:25 -0400
+To: arnold@skeeve.com
+Subject: Re: origin of the term "cookie"?
+
+I believe the term "cookie", for a more or less inscrutable
+saying or crumb of information, was injected into Unix
+jargon by Bob Morris, who used the word quite frequently.
+It had no fixed meaning as it now does in browsers.
+
+The word had been around long before it was recognized in
+the 8th edition glossary (earlier editions had no glossary):
+
+cookie a peculiar goodie, token, saying or remembrance
+returned by or presented to a program. [I would say that
+"returned by" would better read "produced by", and assume
+responsibility for the inexactitude.]
+
+Doug McIlroy
+
+From: Doug McIlroy <doug@cs.dartmouth.edu>
+Date: Sun, 14 Oct 2012 10:08:43 -0400
+To: arnold@skeeve.com
+Subject: Re: origin of the term "cookie"?
+
+> Can I forward your email to Eric Raymond, for possible addition to the
+> Jargon File?
+
+Sure. I might add that I don't know how "cookie" entered Morris's
+vocabulary. Certainly "values of beta give rise to dom!" (see google)
+was an early, if not the earliest Unix cookie. The fact that it was
+found lying around on a model 37 teletype (which had Greek beta in
+its type box) suggests that maybe it was seen to be like milk and
+cookies laid out for Santa Claus. Morris was wont to make such
+connections.
+
+Doug
+@end ignore
+
+@item Coprocess
+A subordinate program with which two-way communications is possible.
+
+@item Curly Braces
+See ``Braces.''
+
+@cindex dark corner
+@item Dark Corner
+An area in the language where specifications often were (or still
+are) not clear, leading to unexpected or undesirable behavior.
+Such areas are marked in this @value{DOCUMENT} with
+@iftex
+the picture of a flashlight in the margin
+@end iftex
+@ifnottex
+``(d.c.)'' in the text
+@end ifnottex
+and are indexed under the heading ``dark corner.''
+
+@item Data Driven
+A description of @command{awk} programs, where you specify the data you
+are interested in processing, and what to do when that data is seen.
+
+@item Data Objects
+These are numbers and strings of characters. Numbers are converted into
+strings and vice versa, as needed.
+(@xref{Conversion}.)
+
+@item Deadlock
+The situation in which two communicating processes are each waiting
+for the other to perform an action.
+
+@item Debugger
+A program used to help developers remove ``bugs'' from (de-bug)
+their programs.
+
+@item Double Precision
+An internal representation of numbers that can have fractional parts.
+Double precision numbers keep track of more digits than do single precision
+numbers, but operations on them are sometimes more expensive. This is the way
+@command{awk} stores numeric values. It is the C type @code{double}.
+
+@item Dynamic Regular Expression
+A dynamic regular expression is a regular expression written as an
+ordinary expression. It could be a string constant, such as
+@code{"foo"}, but it may also be an expression whose value can vary.
+(@xref{Computed Regexps}.)
+
+@item Empty String
+See ``Null String.''
+
+@item Environment
+A collection of strings, of the form @samp{@var{name}=@var{val}}, that each
+program has available to it. Users generally place values into the
+environment in order to provide information to various programs. Typical
+examples are the environment variables @env{HOME} and @env{PATH}.
+
+@cindex epoch, definition of
+@item Epoch
+The date used as the ``beginning of time'' for timestamps.
+Time values in most systems are represented as seconds since the epoch,
+with library functions available for converting these values into
+standard date and time formats.
+
+The epoch on Unix and POSIX systems is 1970-01-01 00:00:00 UTC.
+See also ``GMT'' and ``UTC.''
+
+@item Escape Sequences
+A special sequence of characters used for describing nonprinting
+characters, such as @samp{\n} for newline or @samp{\033} for the ASCII
+ESC (Escape) character. (@xref{Escape Sequences}.)
+
+@item Extension
+An additional feature or change to a programming language or
+utility not defined by that language's or utility's standard.
+@command{gawk} has (too) many extensions over POSIX @command{awk}.
+
+@item FDL
+See ``Free Documentation License.''
+
+@item Field
+When @command{awk} reads an input record, it splits the record into pieces
+separated by whitespace (or by a separator regexp that you can
+change by setting the predefined variable @code{FS}). Such pieces are
+called fields. If the pieces are of fixed length, you can use the built-in
+variable @code{FIELDWIDTHS} to describe their lengths.
+If you wish to specify the contents of fields instead of the field
+separator, you can use the predefined variable @code{FPAT} to do so.
+(@xref{Field Separators},
+@ref{Constant Size},
+and
+@ref{Splitting By Content}.)
+
+@item Flag
+A variable whose truth value indicates the existence or nonexistence
+of some condition.
+
+@item Floating-Point Number
+Often referred to in mathematical terms as a ``rational'' or real number,
+this is just a number that can have a fractional part.
+See also ``Double Precision'' and ``Single Precision.''
+
+@item Format
+Format strings control the appearance of output in the
+@code{strftime()} and @code{sprintf()} functions, and in the
+@code{printf} statement as well. Also, data conversions from numbers to strings
+are controlled by the format strings contained in the predefined variables
+@code{CONVFMT} and @code{OFMT}. (@xref{Control Letters}.)
+
+@item Free Documentation License
+This document describes the terms under which this @value{DOCUMENT}
+is published and may be copied. (@xref{GNU Free Documentation License}.)
+
+@cindex FSF (Free Software Foundation)
+@cindex Free Software Foundation (FSF)
+@cindex Stallman, Richard
+@item Free Software Foundation
+A nonprofit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor. GNU Emacs is the most widely used version of Emacs today.
+
+@item FSF
+See ``Free Software Foundation.''
+
+@item Function
+A specialized group of statements used to encapsulate general
+or program-specific tasks. @command{awk} has a number of built-in
+functions, and also allows you to define your own.
+(@xref{Functions}.)
+
+@item @command{gawk}
+The GNU implementation of @command{awk}.
+
+@cindex GPL (General Public License)
+@cindex General Public License (GPL)
+@cindex GNU General Public License
+@item General Public License
+This document describes the terms under which @command{gawk} and its source
+code may be distributed. (@xref{Copying}.)
+
+@item GMT
+``Greenwich Mean Time.''
+This is the old term for UTC.
+It is the time of day used internally for Unix and POSIX systems.
+See also ``Epoch'' and ``UTC.''
+
+@cindex FSF (Free Software Foundation)
+@cindex Free Software Foundation (FSF)
+@cindex GNU Project
+@item GNU
+``GNU's not Unix''. An on-going project of the Free Software Foundation
+to create a complete, freely distributable, POSIX-compliant computing
+environment.
+
+@item GNU/Linux
+A variant of the GNU system using the Linux kernel, instead of the
+Free Software Foundation's Hurd kernel.
+The Linux kernel is a stable, efficient, full-featured clone of Unix that has
+been ported to a variety of architectures.
+It is most popular on PC-class systems, but runs well on a variety of
+other systems too.
+The Linux kernel source code is available under the terms of the GNU General
+Public License, which is perhaps its most important aspect.
+
+@item GPL
+See ``General Public License.''
+
+@item Hexadecimal
+Base 16 notation, where the digits are @code{0}--@code{9} and
+@code{A}--@code{F}, with @samp{A}
+representing 10, @samp{B} representing 11, and so on, up to @samp{F} for 15.
+Hexadecimal numbers are written in C using a leading @samp{0x},
+to indicate their base. Thus, @code{0x12} is 18 ((1 x 16) + 2).
+@xref{Nondecimal-numbers}.
+
+@item I/O
+Abbreviation for ``Input/Output,'' the act of moving data into and/or
+out of a running program.
+
+@item Input Record
+A single chunk of data that is read in by @command{awk}. Usually, an @command{awk} input
+record consists of one line of text.
+(@xref{Records}.)
+
+@item Integer
+A whole number, i.e., a number that does not have a fractional part.
+
+@item Internationalization
+The process of writing or modifying a program so
+that it can use multiple languages without requiring
+further source code changes.
+
+@cindex interpreted programs
+@item Interpreter
+A program that reads human-readable source code directly, and uses
+the instructions in it to process data and produce results.
+@command{awk} is typically (but not always) implemented as an interpreter.
+See also ``Compiler.''
+
+@item Interval Expression
+A component of a regular expression that lets you specify repeated matches of
+some part of the regexp. Interval expressions were not originally available
+in @command{awk} programs.
+
+@cindex ISO
+@item ISO
+The International Organization for Standardization.
+This organization produces international standards for many things, including
+programming languages, such as C and C++.
+In the computer arena, important standards like those for C, C++, and POSIX
+become both American national and ISO international standards simultaneously.
+This @value{DOCUMENT} refers to Standard C as ``ISO C'' throughout.
+See @uref{http://www.iso.org/iso/home/about.htm, the ISO website} for more
+information about the name of the organization and its language-independent
+three-letter acronym.
+
+@cindex Java programming language
+@cindex programming languages, Java
+@item Java
+A modern programming language originally developed by Sun Microsystems
+(now Oracle) supporting Object-Oriented programming. Although usually
+implemented by compiling to the instructions for a standard virtual
+machine (the JVM), the language can be compiled to native code.
+
+@item Keyword
+In the @command{awk} language, a keyword is a word that has special
+meaning. Keywords are reserved and may not be used as variable names.
+
+@command{gawk}'s keywords are:
+@code{BEGIN},
+@code{BEGINFILE},
+@code{END},
+@code{ENDFILE},
+@code{break},
+@code{case},
+@code{continue},
+@code{default}
+@code{delete},
+@code{do@dots{}while},
+@code{else},
+@code{exit},
+@code{for@dots{}in},
+@code{for},
+@code{function},
+@code{func},
+@code{if},
+@code{next},
+@code{nextfile},
+@code{switch},
+and
+@code{while}.
+
+@cindex LGPL (Lesser General Public License)
+@cindex Lesser General Public License (LGPL)
+@cindex GNU Lesser General Public License
+@item Lesser General Public License
+This document describes the terms under which binary library archives
+or shared objects,
+and their source code may be distributed.
+
+@item LGPL
+See ``Lesser General Public License.''
+
+@item Linux
+See ``GNU/Linux.''
+
+@item Localization
+The process of providing the data necessary for an
+internationalized program to work in a particular language.
+
+@item Logical Expression
+An expression using the operators for logic, AND, OR, and NOT, written
+@samp{&&}, @samp{||}, and @samp{!} in @command{awk}. Often called Boolean
+expressions, after the mathematician who pioneered this kind of
+mathematical logic.
+
+@item Lvalue
+An expression that can appear on the left side of an assignment
+operator. In most languages, lvalues can be variables or array
+elements. In @command{awk}, a field designator can also be used as an
+lvalue.
+
+@item Matching
+The act of testing a string against a regular expression. If the
+regexp describes the contents of the string, it is said to @dfn{match} it.
+
+@item Metacharacters
+Characters used within a regexp that do not stand for themselves.
+Instead, they denote regular expression operations, such as repetition,
+grouping, or alternation.
+
+@item No-op
+An operation that does nothing.
+
+@item Null String
+A string with no characters in it. It is represented explicitly in
+@command{awk} programs by placing two double quote characters next to
+each other (@code{""}). It can appear in input data by having two successive
+occurrences of the field separator appear next to each other.
+
+@item Number
+A numeric-valued data object. Modern @command{awk} implementations use
+double precision floating-point to represent numbers.
+Ancient @command{awk} implementations used single precision floating-point.
+
+@item Octal
+Base-eight notation, where the digits are @code{0}--@code{7}.
+Octal numbers are written in C using a leading @samp{0},
+to indicate their base. Thus, @code{013} is 11 ((1 x 8) + 3).
+@xref{Nondecimal-numbers}.
+
+@item Pattern
+Patterns tell @command{awk} which input records are interesting to which
+rules.
+
+A pattern is an arbitrary conditional expression against which input is
+tested. If the condition is satisfied, the pattern is said to @dfn{match}
+the input record. A typical pattern might compare the input record against
+a regular expression. (@xref{Pattern Overview}.)
+
+@item PEBKAC
+An acronym describing what is possibly the most frequent
+source of computer usage problems. (Problem Exists Between
+Keyboard And Chair.)
+
+@item POSIX
+The name for a series of standards
+that specify a Portable Operating System interface. The ``IX'' denotes
+the Unix heritage of these standards. The main standard of interest for
+@command{awk} users is
+@cite{IEEE Standard for Information Technology, Standard 1003.1-2008}.
+The 2008 POSIX standard can be found online at
+@url{http://www.opengroup.org/onlinepubs/9699919799/}.
+
+@item Precedence
+The order in which operations are performed when operators are used
+without explicit parentheses.
+
+@item Private
+Variables and/or functions that are meant for use exclusively by library
+functions and not for the main @command{awk} program. Special care must be
+taken when naming such variables and functions.
+(@xref{Library Names}.)
+
+@item Range (of input lines)
+A sequence of consecutive lines from the input file(s). A pattern
+can specify ranges of input lines for @command{awk} to process or it can
+specify single lines. (@xref{Pattern Overview}.)
+
+@item Recursion
+When a function calls itself, either directly or indirectly.
+If this is clear, stop, and proceed to the next entry.
+Otherwise, refer to the entry for ``recursion.''
+
+@item Redirection
+Redirection means performing input from something other than the standard input
+stream, or performing output to something other than the standard output stream.
+
+You can redirect input to the @code{getline} statement using
+the @samp{<}, @samp{|}, and @samp{|&} operators.
+You can redirect the output of the @code{print} and @code{printf} statements
+to a file or a system command, using the @samp{>}, @samp{>>}, @samp{|}, and @samp{|&}
+operators.
+(@xref{Getline},
+and @ref{Redirection}.)
+
+@item Regexp
+See ``Regular Expression.''
+
+@item Regular Expression
+A regular expression (``regexp'' for short) is a pattern that denotes a
+set of strings, possibly an infinite set. For example, the regular expression
+@samp{R.*xp} matches any string starting with the letter @samp{R}
+and ending with the letters @samp{xp}. In @command{awk}, regular expressions are
+used in patterns and in conditional expressions. Regular expressions may contain
+escape sequences. (@xref{Regexp}.)
+
+@item Regular Expression Constant
+A regular expression constant is a regular expression written within
+slashes, such as @code{/foo/}. This regular expression is chosen
+when you write the @command{awk} program and cannot be changed during
+its execution. (@xref{Regexp Usage}.)
+
+@item Rule
+A segment of an @command{awk} program that specifies how to process single
+input records. A rule consists of a @dfn{pattern} and an @dfn{action}.
+@command{awk} reads an input record; then, for each rule, if the input record
+satisfies the rule's pattern, @command{awk} executes the rule's action.
+Otherwise, the rule does nothing for that input record.
+
+@item Rvalue
+A value that can appear on the right side of an assignment operator.
+In @command{awk}, essentially every expression has a value. These values
+are rvalues.
+
+@item Scalar
+A single value, be it a number or a string.
+Regular variables are scalars; arrays and functions are not.
+
+@item Search Path
+In @command{gawk}, a list of directories to search for @command{awk} program source files.
+In the shell, a list of directories to search for executable programs.
+
+@item @command{sed}
+See ``Stream Editor.''
+
+@item Seed
+The initial value, or starting point, for a sequence of random numbers.
+
+@item Shell
+The command interpreter for Unix and POSIX-compliant systems.
+The shell works both interactively, and as a programming language
+for batch files, or shell scripts.
+
+@item Short-Circuit
+The nature of the @command{awk} logical operators @samp{&&} and @samp{||}.
+If the value of the entire expression is determinable from evaluating just
+the lefthand side of these operators, the righthand side is not
+evaluated.
+(@xref{Boolean Ops}.)
+
+@item Side Effect
+A side effect occurs when an expression has an effect aside from merely
+producing a value. Assignment expressions, increment and decrement
+expressions, and function calls have side effects.
+(@xref{Assignment Ops}.)
+
+@item Single Precision
+An internal representation of numbers that can have fractional parts.
+Single precision numbers keep track of fewer digits than do double precision
+numbers, but operations on them are sometimes less expensive in terms of CPU time.
+This is the type used by some ancient versions of @command{awk} to store
+numeric values. It is the C type @code{float}.
+
+@item Space
+The character generated by hitting the space bar on the keyboard.
+
+@item Special File
+A @value{FN} interpreted internally by @command{gawk}, instead of being handed
+directly to the underlying operating system---for example, @file{/dev/stderr}.
+(@xref{Special Files}.)
+
+@item Stream Editor
+A program that reads records from an input stream and processes them one
+or more at a time. This is in contrast with batch programs, which may
+expect to read their input files in entirety before starting to do
+anything, as well as with interactive programs which require input from the
+user.
+
+@item String
+A datum consisting of a sequence of characters, such as @samp{I am a
+string}. Constant strings are written with double quotes in the
+@command{awk} language and may contain escape sequences.
+(@xref{Escape Sequences}.)
+
+@item Tab
+The character generated by hitting the @kbd{TAB} key on the keyboard.
+It usually expands to up to eight spaces upon output.
+
+@item Text Domain
+A unique name that identifies an application.
+Used for grouping messages that are translated at runtime
+into the local language.
+
+@item Timestamp
+A value in the ``seconds since the epoch'' format used by Unix
+and POSIX systems. Used for the @command{gawk} functions
+@code{mktime()}, @code{strftime()}, and @code{systime()}.
+See also ``Epoch,'' ``GMT,'' and ``UTC.''
+
+@cindex Linux
+@cindex GNU/Linux
+@cindex Unix
+@cindex BSD-based operating systems
+@cindex NetBSD
+@cindex FreeBSD
+@cindex OpenBSD
+@item Unix
+A computer operating system originally developed in the early 1970's at
+AT&T Bell Laboratories. It initially became popular in universities around
+the world and later moved into commercial environments as a software
+development system and network server system. There are many commercial
+versions of Unix, as well as several work-alike systems whose source code
+is freely available (such as GNU/Linux, @uref{http://www.netbsd.org, NetBSD},
+@uref{http://www.freebsd.org, FreeBSD}, and @uref{http://www.openbsd.org, OpenBSD}).
+
+@item UTC
+The accepted abbreviation for ``Universal Coordinated Time.''
+This is standard time in Greenwich, England, which is used as a
+reference time for day and date calculations.
+See also ``Epoch'' and ``GMT.''
+
+@item Whitespace
+A sequence of space, TAB, or newline characters occurring inside an input
+record or a string.
+@end table
+
+@end ifclear
+
+@c The GNU General Public License.
+@node Copying
+@unnumbered GNU General Public License
+@ifnotdocbook
+@center Version 3, 29 June 2007
+@end ifnotdocbook
+@docbook
+<subtitle>Version 3, 29 June 2007</subtitle>
+@end docbook
+
+@c This file is intended to be included within another document,
+@c hence no sectioning command or @node.
+
+@display
+Copyright @copyright{} 2007 Free Software Foundation, Inc. @url{http://fsf.org/}
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+@end display
+
+@c fakenode --- for prepinfo
+@heading Preamble
+
+The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom
+to share and change all versions of a program---to make sure it remains
+free software for all its users. We, the Free Software Foundation,
+use the GNU General Public License for most of our software; it
+applies also to any other work released this way by its authors. You
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you
+have certain responsibilities if you distribute copies of the
+software, or if you modify it: responsibilities to respect the freedom
+of others.
+
+For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too,
+receive or can get the source code. And you must show them these
+terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the
+manufacturer can do so. This is fundamentally incompatible with the
+aim of protecting users' freedom to change the software. The
+systematic pattern of such abuse occurs in the area of products for
+individuals to use, which is precisely where it is most unacceptable.
+Therefore, we have designed this version of the GPL to prohibit the
+practice for those products. If such problems arise substantially in
+other domains, we stand ready to extend this provision to those
+domains in future versions of the GPL, as needed to protect the
+freedom of users.
+
+Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish
+to avoid the special danger that patents applied to a free program
+could make it effectively proprietary. To prevent this, the GPL
+assures that patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+@c fakenode --- for prepinfo
+@heading TERMS AND CONDITIONS
+
+@enumerate 0
+@item Definitions.
+
+``This License'' refers to version 3 of the GNU General Public License.
+
+``Copyright'' also means copyright-like laws that apply to other kinds
+of works, such as semiconductor masks.
+
+``The Program'' refers to any copyrightable work licensed under this
+License. Each licensee is addressed as ``you''. ``Licensees'' and
+``recipients'' may be individuals or organizations.
+
+To ``modify'' a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of
+an exact copy. The resulting work is called a ``modified version'' of
+the earlier work or a work ``based on'' the earlier work.
+
+A ``covered work'' means either the unmodified Program or a work based
+on the Program.
+
+To ``propagate'' a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+To ``convey'' a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user
+through a computer network, with no transfer of a copy, is not
+conveying.
+
+An interactive user interface displays ``Appropriate Legal Notices'' to
+the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+@item Source Code.
+
+The ``source code'' for a work means the preferred form of the work for
+making modifications to it. ``Object code'' means any non-source form
+of a work.
+
+A ``Standard Interface'' means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+The ``System Libraries'' of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+``Major Component'', in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+The ``Corresponding Source'' for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can
+regenerate automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same
+work.
+
+@item Basic Permissions.
+
+All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey,
+without conditions so long as your license otherwise remains in force.
+You may convey covered works to others for the sole purpose of having
+them make modifications exclusively for you, or provide you with
+facilities for running those works, provided that you comply with the
+terms of this License in conveying all material for which you do not
+control copyright. Those thus making or running the covered works for
+you must do so exclusively on your behalf, under your direction and
+control, on terms that prohibit them from making any copies of your
+copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under the
+conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+@item Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such
+circumvention is effected by exercising rights under this License with
+respect to the covered work, and you disclaim any intention to limit
+operation or modification of the work as a means of enforcing, against
+the work's users, your or third parties' legal rights to forbid
+circumvention of technological measures.
+
+@item Conveying Verbatim Copies.
+
+You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+@item Conveying Modified Source Versions.
+
+You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these
+conditions:
+
+@enumerate a
+@item
+The work must carry prominent notices stating that you modified it,
+and giving a relevant date.
+
+@item
+The work must carry prominent notices stating that it is released
+under this License and any conditions added under section 7. This
+requirement modifies the requirement in section 4 to ``keep intact all
+notices''.
+
+@item
+You must license the entire work, as a whole, under this License to
+anyone who comes into possession of a copy. This License will
+therefore apply, along with any applicable section 7 additional terms,
+to the whole of the work, and all its parts, regardless of how they
+are packaged. This License gives no permission to license the work in
+any other way, but it does not invalidate such permission if you have
+separately received it.
+
+@item
+If the work has interactive user interfaces, each must display
+Appropriate Legal Notices; however, if the Program has interactive
+interfaces that do not display Appropriate Legal Notices, your work
+need not make them do so.
+@end enumerate
+
+A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+``aggregate'' if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+@item Conveying Non-Source Forms.
+
+You may convey a covered work in object code form under the terms of
+sections 4 and 5, provided that you also convey the machine-readable
+Corresponding Source under the terms of this License, in one of these
+ways:
+
+@enumerate a
+@item
+Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by the
+Corresponding Source fixed on a durable physical medium customarily
+used for software interchange.
+
+@item
+Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by a written
+offer, valid for at least three years and valid for as long as you
+offer spare parts or customer support for that product model, to give
+anyone who possesses the object code either (1) a copy of the
+Corresponding Source for all the software in the product that is
+covered by this License, on a durable physical medium customarily used
+for software interchange, for a price no more than your reasonable
+cost of physically performing this conveying of source, or (2) access
+to copy the Corresponding Source from a network server at no charge.
+
+@item
+Convey individual copies of the object code with a copy of the written
+offer to provide the Corresponding Source. This alternative is
+allowed only occasionally and noncommercially, and only if you
+received the object code with such an offer, in accord with subsection
+6b.
+
+@item
+Convey the object code by offering access from a designated place
+(gratis or for a charge), and offer equivalent access to the
+Corresponding Source in the same way through the same place at no
+further charge. You need not require recipients to copy the
+Corresponding Source along with the object code. If the place to copy
+the object code is a network server, the Corresponding Source may be
+on a different server (operated by you or a third party) that supports
+equivalent copying facilities, provided you maintain clear directions
+next to the object code saying where to find the Corresponding Source.
+Regardless of what server hosts the Corresponding Source, you remain
+obligated to ensure that it is available for as long as needed to
+satisfy these requirements.
+
+@item
+Convey the object code using peer-to-peer transmission, provided you
+inform other peers where the object code and Corresponding Source of
+the work are being offered to the general public at no charge under
+subsection 6d.
+
+@end enumerate
+
+A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+A ``User Product'' is either (1) a ``consumer product'', which means any
+tangible personal property which is normally used for personal,
+family, or household purposes, or (2) anything designed or sold for
+incorporation into a dwelling. In determining whether a product is a
+consumer product, doubtful cases shall be resolved in favor of
+coverage. For a particular product received by a particular user,
+``normally used'' refers to a typical or common use of that class of
+product, regardless of the status of the particular user or of the way
+in which the particular user actually uses, or expects or is expected
+to use, the product. A product is a consumer product regardless of
+whether the product has substantial commercial, industrial or
+non-consumer uses, unless such uses represent the only significant
+mode of use of the product.
+
+``Installation Information'' for a User Product means any methods,
+procedures, authorization keys, or other information required to
+install and execute modified versions of a covered work in that User
+Product from a modified version of its Corresponding Source. The
+information must suffice to ensure that the continued functioning of
+the modified object code is in no case prevented or interfered with
+solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or
+updates for a work that has been modified or installed by the
+recipient, or for the User Product in which it has been modified or
+installed. Access to a network may be denied when the modification
+itself materially and adversely affects the operation of the network
+or violates the rules and protocols for communication across the
+network.
+
+Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+@item Additional Terms.
+
+``Additional permissions'' are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders
+of that material) supplement the terms of this License with terms:
+
+@enumerate a
+@item
+Disclaiming warranty or limiting liability differently from the terms
+of sections 15 and 16 of this License; or
+
+@item
+Requiring preservation of specified reasonable legal notices or author
+attributions in that material or in the Appropriate Legal Notices
+displayed by works containing it; or
+
+@item
+Prohibiting misrepresentation of the origin of that material, or
+requiring that modified versions of such material be marked in
+reasonable ways as different from the original version; or
+
+@item
+Limiting the use for publicity purposes of names of licensors or
+authors of the material; or
+
+@item
+Declining to grant rights under trademark law for use of some trade
+names, trademarks, or service marks; or
+
+@item
+Requiring indemnification of licensors and authors of that material by
+anyone who conveys the material (or modified versions of it) with
+contractual assumptions of liability to the recipient, for any
+liability that these contractual assumptions directly impose on those
+licensors and authors.
+@end enumerate
+
+All other non-permissive additional terms are considered ``further
+restrictions'' within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions; the
+above requirements apply either way.
+
+@item Termination.
+
+You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+However, if you cease all violation of this License, then your license
+from a particular copyright holder is reinstated (a) provisionally,
+unless and until the copyright holder explicitly and finally
+terminates your license, and (b) permanently, if the copyright holder
+fails to notify you of the violation by some reasonable means prior to
+60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+@item Acceptance Not Required for Having Copies.
+
+You are not required to accept this License in order to receive or run
+a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+@item Automatic Licensing of Downstream Recipients.
+
+Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+An ``entity transaction'' is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+@item Patents.
+
+A ``contributor'' is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's ``contributor version''.
+
+A contributor's ``essential patent claims'' are all patent claims owned
+or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, ``control'' includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+In the following three paragraphs, a ``patent license'' is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To ``grant'' such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. ``Knowingly relying'' means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+A patent license is ``discriminatory'' if it does not include within the
+scope of its coverage, prohibits the exercise of, or is conditioned on
+the non-exercise of one or more of the rights that are specifically
+granted under this License. You may not convey a covered work if you
+are a party to an arrangement with a third party that is in the
+business of distributing software, under which you make payment to the
+third party based on the extent of your activity of conveying the
+work, and under which the third party grants, to any of the parties
+who would receive the covered work from you, a discriminatory patent
+license (a) in connection with copies of the covered work conveyed by
+you (or copies made from those copies), or (b) primarily for and in
+connection with specific products or compilations that contain the
+covered work, unless you entered into that arrangement, or that patent
+license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+@item No Surrender of Others' Freedom.
+
+If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey
+a covered work so as to satisfy simultaneously your obligations under
+this License and any other pertinent obligations, then as a
+consequence you may not convey it at all. For example, if you agree
+to terms that obligate you to collect a royalty for further conveying
+from those to whom you convey the Program, the only way you could
+satisfy both those terms and this License would be to refrain entirely
+from conveying the Program.
+
+@item Use with the GNU Affero General Public License.
+
+Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+@item Revised Versions of this License.
+
+The Free Software Foundation may publish revised and/or new versions
+of the GNU General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies that a certain numbered version of the GNU General Public
+License ``or any later version'' applies to it, you have the option of
+following the terms and conditions either of that numbered version or
+of any later version published by the Free Software Foundation. If
+the Program does not specify a version number of the GNU General
+Public License, you may choose any version ever published by the Free
+Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions
+of the GNU General Public License can be used, that proxy's public
+statement of acceptance of a version permanently authorizes you to
+choose that version for the Program.
+
+Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+@item Disclaimer of Warranty.
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS'' WITHOUT
+WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
+PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+@item Limitation of Liability.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
+CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
+ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
+NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR
+LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM
+TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER
+PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+@item Interpretation of Sections 15 and 16.
+
+If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+@end enumerate
+
+@c fakenode --- for prepinfo
+@heading END OF TERMS AND CONDITIONS
+
+@c fakenode --- for prepinfo
+@heading How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and a brief idea of what it does.}
+Copyright (C) @var{year} @var{name of author}
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see @url{http://www.gnu.org/licenses/}.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+@smallexample
+@var{program} Copyright (C) @var{year} @var{name of author}
+This program comes with ABSOLUTELY NO WARRANTY; for details type @samp{show w}.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type @samp{show c} for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, your
+program's commands might be different; for a GUI interface, you would
+use an ``about box''.
+
+You should also get your employer (if you work as a programmer) or school,
+if any, to sign a ``copyright disclaimer'' for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+@url{http://www.gnu.org/licenses/}.
+
+The GNU General Public License does not permit incorporating your
+program into proprietary programs. If your program is a subroutine
+library, you may consider it more useful to permit linking proprietary
+applications with the library. If this is what you want to do, use
+the GNU Lesser General Public License instead of this License. But
+first, please read @url{http://www.gnu.org/philosophy/why-not-lgpl.html}.
+
+@ifclear FOR_PRINT
+@c The GNU Free Documentation License.
+@node GNU Free Documentation License
+@unnumbered GNU Free Documentation License
+@ifnotdocbook
+@center Version 1.3, 3 November 2008
+@end ifnotdocbook
+
+@docbook
+<subtitle>Version 1.3, 3 November 2008</subtitle>
+@end docbook
+
+@cindex FDL (Free Documentation License)
+@cindex Free Documentation License (FDL)
+@cindex GNU Free Documentation License
+
+@c This file is intended to be included within another document,
+@c hence no sectioning command or @node.
+
+@display
+Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+@uref{http://fsf.org/}
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@enumerate 0
+@item
+PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+functional and useful document @dfn{free} in the sense of freedom: to
+assure everyone the effective freedom to copy and redistribute it,
+with or without modifying it, either commercially or noncommercially.
+Secondarily, this License preserves for the author and publisher a way
+to get credit for their work, while not being considered responsible
+for modifications made by others.
+
+This License is a kind of ``copyleft'', which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+@item
+APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work, in any medium, that
+contains a notice placed by the copyright holder saying it can be
+distributed under the terms of this License. Such a notice grants a
+world-wide, royalty-free license, unlimited in duration, to use that
+work under the conditions stated herein. The ``Document'', below,
+refers to any such manual or work. Any member of the public is a
+licensee, and is addressed as ``you''. You accept the license if you
+copy, modify or distribute the work in a way requiring permission
+under copyright law.
+
+A ``Modified Version'' of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A ``Secondary Section'' is a named appendix or a front-matter section
+of the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall
+subject (or to related matters) and contains nothing that could fall
+directly within that overall subject. (Thus, if the Document is in
+part a textbook of mathematics, a Secondary Section may not explain
+any mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The ``Invariant Sections'' are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License. If a
+section does not fit the above definition of Secondary then it is not
+allowed to be designated as Invariant. The Document may contain zero
+Invariant Sections. If the Document does not identify any Invariant
+Sections then there are none.
+
+The ``Cover Texts'' are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License. A Front-Cover Text may
+be at most 5 words, and a Back-Cover Text may be at most 25 words.
+
+A ``Transparent'' copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, that is suitable for revising the document
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup, or absence of markup, has been arranged to thwart
+or discourage subsequent modification by readers is not Transparent.
+An image format is not Transparent if used for any substantial amount
+of text. A copy that is not ``Transparent'' is called ``Opaque''.
+
+Examples of suitable formats for Transparent copies include plain
+@sc{ascii} without markup, Texinfo input format, La@TeX{} input
+format, @acronym{SGML} or @acronym{XML} using a publicly available
+@acronym{DTD}, and standard-conforming simple @acronym{HTML},
+PostScript or @acronym{PDF} designed for human modification. Examples
+of transparent image formats include @acronym{PNG}, @acronym{XCF} and
+@acronym{JPG}. Opaque formats include proprietary formats that can be
+read and edited only by proprietary word processors, @acronym{SGML} or
+@acronym{XML} for which the @acronym{DTD} and/or processing tools are
+not generally available, and the machine-generated @acronym{HTML},
+PostScript or @acronym{PDF} produced by some word processors for
+output purposes only.
+
+The ``Title Page'' means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, ``Title Page'' means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+The ``publisher'' means any person or entity that distributes copies
+of the Document to the public.
+
+A section ``Entitled XYZ'' means a named subunit of the Document whose
+title either is precisely XYZ or contains XYZ in parentheses following
+text that translates XYZ in another language. (Here XYZ stands for a
+specific section name mentioned below, such as ``Acknowledgements'',
+``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title''
+of such a section when you modify the Document means that it remains a
+section ``Entitled XYZ'' according to this definition.
+
+The Document may include Warranty Disclaimers next to the notice which
+states that this License applies to the Document. These Warranty
+Disclaimers are considered to be included by reference in this
+License, but only as regards disclaiming warranties: any other
+implication that these Warranty Disclaimers may have is void and has
+no effect on the meaning of this License.
+
+@item
+VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+@item
+COPYING IN QUANTITY
+
+If you publish printed copies (or copies in media that commonly have
+printed covers) of the Document, numbering more than 100, and the
+Document's license notice requires Cover Texts, you must enclose the
+copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a computer-network location from which the general network-using
+public has access to download using public-standard network protocols
+a complete Transparent copy of the Document, free of added material.
+If you use the latter option, you must take reasonably prudent steps,
+when you begin distribution of Opaque copies in quantity, to ensure
+that this Transparent copy will remain thus accessible at the stated
+location until at least one year after the last time you distribute an
+Opaque copy (directly or through your agents or retailers) of that
+edition to the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+@item
+MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+@enumerate A
+@item
+Use in the Title Page (and on the covers, if any) a title distinct
+from that of the Document, and from those of previous versions
+(which should, if there were any, be listed in the History section
+of the Document). You may use the same title as a previous version
+if the original publisher of that version gives permission.
+
+@item
+List on the Title Page, as authors, one or more persons or entities
+responsible for authorship of the modifications in the Modified
+Version, together with at least five of the principal authors of the
+Document (all of its principal authors, if it has fewer than five),
+unless they release you from this requirement.
+
+@item
+State on the Title page the name of the publisher of the
+Modified Version, as the publisher.
+
+@item
+Preserve all the copyright notices of the Document.
+
+@item
+Add an appropriate copyright notice for your modifications
+adjacent to the other copyright notices.
+
+@item
+Include, immediately after the copyright notices, a license notice
+giving the public permission to use the Modified Version under the
+terms of this License, in the form shown in the Addendum below.
+
+@item
+Preserve in that license notice the full lists of Invariant Sections
+and required Cover Texts given in the Document's license notice.
+
+@item
+Include an unaltered copy of this License.
+
+@item
+Preserve the section Entitled ``History'', Preserve its Title, and add
+to it an item stating at least the title, year, new authors, and
+publisher of the Modified Version as given on the Title Page. If
+there is no section Entitled ``History'' in the Document, create one
+stating the title, year, authors, and publisher of the Document as
+given on its Title Page, then add an item describing the Modified
+Version as stated in the previous sentence.
+
+@item
+Preserve the network location, if any, given in the Document for
+public access to a Transparent copy of the Document, and likewise
+the network locations given in the Document for previous versions
+it was based on. These may be placed in the ``History'' section.
+You may omit a network location for a work that was published at
+least four years before the Document itself, or if the original
+publisher of the version it refers to gives permission.
+
+@item
+For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve
+the Title of the section, and preserve in the section all the
+substance and tone of each of the contributor acknowledgements and/or
+dedications given therein.
+
+@item
+Preserve all the Invariant Sections of the Document,
+unaltered in their text and in their titles. Section numbers
+or the equivalent are not considered part of the section titles.
+
+@item
+Delete any section Entitled ``Endorsements''. Such a section
+may not be included in the Modified Version.
+
+@item
+Do not retitle any existing section to be Entitled ``Endorsements'' or
+to conflict in title with any Invariant Section.
+
+@item
+Preserve any Warranty Disclaimers.
+@end enumerate
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section Entitled ``Endorsements'', provided it contains
+nothing but endorsements of your Modified Version by various
+parties---for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+@item
+COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice, and that you preserve all their Warranty Disclaimers.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections Entitled ``History''
+in the various original documents, forming one section Entitled
+``History''; likewise combine any sections Entitled ``Acknowledgements'',
+and any sections Entitled ``Dedications''. You must delete all
+sections Entitled ``Endorsements.''
+
+@item
+COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+@item
+AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, is called an ``aggregate'' if the copyright
+resulting from the compilation is not used to limit the legal rights
+of the compilation's users beyond what the individual works permit.
+When the Document is included in an aggregate, this License does not
+apply to the other works in the aggregate which are not themselves
+derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one half of
+the entire aggregate, the Document's Cover Texts may be placed on
+covers that bracket the Document within the aggregate, or the
+electronic equivalent of covers if the Document is in electronic form.
+Otherwise they must appear on printed covers that bracket the whole
+aggregate.
+
+@item
+TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License, and all the license notices in the
+Document, and any Warranty Disclaimers, provided that you also include
+the original English version of this License and the original versions
+of those notices and disclaimers. In case of a disagreement between
+the translation and the original version of this License or a notice
+or disclaimer, the original version will prevail.
+
+If a section in the Document is Entitled ``Acknowledgements'',
+``Dedications'', or ``History'', the requirement (section 4) to Preserve
+its Title (section 1) will typically require changing the actual
+title.
+
+@item
+TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense, or distribute it is void, and
+will automatically terminate your rights under this License.
+
+However, if you cease all violation of this License, then your license
+from a particular copyright holder is reinstated (a) provisionally,
+unless and until the copyright holder explicitly and finally
+terminates your license, and (b) permanently, if the copyright holder
+fails to notify you of the violation by some reasonable means prior to
+60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, receipt of a copy of some or all of the same material does
+not give you any rights to use it.
+
+@item
+FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+@uref{http://www.gnu.org/copyleft/}.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License ``or any later version'' applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation. If the Document
+specifies that a proxy can decide which future versions of this
+License can be used, that proxy's public statement of acceptance of a
+version permanently authorizes you to choose that version for the
+Document.
+
+@item
+RELICENSING
+
+``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any
+World Wide Web server that publishes copyrightable works and also
+provides prominent facilities for anybody to edit those works. A
+public wiki that anybody can edit is an example of such a server. A
+``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the
+site means any set of copyrightable works thus published on the MMC
+site.
+
+``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0
+license published by Creative Commons Corporation, a not-for-profit
+corporation with a principal place of business in San Francisco,
+California, as well as future copyleft versions of that license
+published by that same organization.
+
+``Incorporate'' means to publish or republish a Document, in whole or
+in part, as part of another Document.
+
+An MMC is ``eligible for relicensing'' if it is licensed under this
+License, and if all works that were first published under this License
+somewhere other than this MMC, and subsequently incorporated in whole
+or in part into the MMC, (1) had no cover texts or invariant sections,
+and (2) were thus incorporated prior to November 1, 2008.
+
+The operator of an MMC Site may republish an MMC contained in the site
+under CC-BY-SA on the same site at any time before August 1, 2009,
+provided the MMC is eligible for relicensing.
+
+@end enumerate
+
+@c fakenode --- for prepinfo
+@unnumberedsec ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+@smallexample
+@group
+ Copyright (C) @var{year} @var{your name}.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.3
+ or any later version published by the Free Software Foundation;
+ with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
+ Texts. A copy of the license is included in the section entitled ``GNU
+ Free Documentation License''.
+@end group
+@end smallexample
+
+If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
+replace the ``with@dots{}Texts.'' line with this:
+
+@smallexample
+@group
+ with the Invariant Sections being @var{list their titles}, with
+ the Front-Cover Texts being @var{list}, and with the Back-Cover Texts
+ being @var{list}.
+@end group
+@end smallexample
+
+If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
+
+@end ifclear
+
+@ifnotdocbook
+@node Index
+@unnumbered Index
+@end ifnotdocbook
+@printindex cp
+
+@bye
+
+Unresolved Issues:
+------------------
+1. From ADR.
+
+ Robert J. Chassell points out that awk programs should have some indication
+ of how to use them. It would be useful to perhaps have a "programming
+ style" section of the manual that would include this and other tips.
+
+Consistency issues:
+ /.../ regexps are in @code, not @samp
+ ".." strings are in @code, not @samp
+ no @print before @dots
+ values of expressions in the text (@code{x} has the value 15),
+ should be in roman, not @code
+ Use TAB and not tab
+ Use ESC and not ESCAPE
+ Use space and not blank to describe the space bar's character
+ The term "blank" is thus basically reserved for "blank lines" etc.
+ To make dark corners work, the @value{DARKCORNER} has to be outside
+ closing `.' of a sentence and after (pxref{...}).
+ " " should have an @w{} around it
+ Use "non-" only with language names or acronyms, or the words bug and option and null
+ Use @command{ftp} when talking about anonymous ftp
+ Use uppercase and lowercase, not "upper-case" and "lower-case"
+ or "upper case" and "lower case"
+ Use "single precision" and "double precision", not "single-precision" or "double-precision"
+ Use alphanumeric, not alpha-numeric
+ Use POSIX-compliant, not POSIX compliant
+ Use --foo, not -Wfoo when describing long options
+ Use "Bell Laboratories", but not "Bell Labs".
+ Use "behavior" instead of "behaviour".
+ Use "coprocess" instead of "co-process".
+ Use "zeros" instead of "zeroes".
+ Use "nonzero" not "non-zero".
+ Use "runtime" not "run time" or "run-time".
+ Use "command-line" as an adjective and "command line" as a noun.
+ Use "online" not "on-line".
+ Use "whitespace" not "white space".
+ Use "Input/Output", not "input/output". Also "I/O", not "i/o".
+ Use "lefthand"/"righthand", not "left-hand"/"right-hand".
+ Use "workaround", not "work-around".
+ Use "startup"/"cleanup", not "start-up"/"clean-up"
+ Use "filesystem", not "file system"
+ Use @code{do}, and not @code{do}-@code{while}, except where
+ actually discussing the do-while.
+ Use "versus" in text and "vs." in index entries
+ Use @code{"C"} for the C locale, not ``C'' or @samp{C}.
+ The words "a", "and", "as", "between", "for", "from", "in", "of",
+ "on", "that", "the", "to", "with", and "without",
+ should not be capitalized in @chapter, @section etc.
+ "Into" and "How" should.
+ Search for @dfn; make sure important items are also indexed.
+ "e.g." should always be followed by a comma.
+ "i.e." should always be followed by a comma.
+ The numbers zero through ten should be spelled out, except when
+ talking about file descriptor numbers. > 10 and < 0, it's
+ ok to use numbers.
+ For most cases, do NOT put a comma before "and", "or" or "but".
+ But exercise taste with this rule.
+ Don't show the awk command with a program in quotes when it's
+ just the program. I.e.
+
+ {
+ ....
+ }
+
+ not
+ awk '{
+ ...
+ }'
+
+ Do show it when showing command-line arguments, data files, etc, even
+ if there is no output shown.
+
+ Use numbered lists only to show a sequential series of steps.
+
+ Use @code{xxx} for the xxx operator in indexing statements, not @samp.
+ Use MS-Windows not MS Windows
+ Use MS-DOS not MS-DOS
+ Use an empty set of parentheses after built-in and awk function names.
+ Use "multiFOO" without a hyphen.
+
+Date: Wed, 13 Apr 94 15:20:52 -0400
+From: rms@gnu.org (Richard Stallman)
+To: gnu-prog@gnu.org
+Subject: A reminder: no pathnames in GNU
+
+It's a GNU convention to use the term "file name" for the name of a
+file, never "pathname". We use the term "path" for search paths,
+which are lists of file names. Using it for a single file name as
+well is potentially confusing to users.
+
+So please check any documentation you maintain, if you think you might
+have used "pathname".
+
+Note that "file name" should be two words when it appears as ordinary
+text. It's ok as one word when it's a metasyntactic variable, though.
+
+------------------------
+ORA uses filename, thus the macro.
+
+Suggestions:
+------------
+
+Better sidebars can almost sort of be done with:
+
+ @ifdocbook
+ @macro @sidebar{title, content}
+ @inlinefmt{docbook, <sidebar><title>}
+ \title\
+ @inlinefmt{docbook, </title>}
+ \content\
+ @inlinefmt{docbook, </sidebar>}
+ @end macro
+ @end ifdocbook
+
+
+ @ifnotdocbook
+ @macro @sidebar{title, content}
+ @cartouche
+ @center @b{\title\}
+
+ \content\
+ @end cartouche
+ @end macro
+ @end ifnotdocbook
+
+But to use it you have to say
+
+ @sidebar{Title Here,
+ @include file-with-content
+ }
+
+which sorta sucks.
+
+TODO:
+Check that all dark corners are indexed properly.