summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-15 22:57:04 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-15 22:57:04 +0300
commitea2e6b8b87ba6cdd6145a844dac3938678221988 (patch)
tree6cedda623d031b208a32ba18d26c61d86c5d6411
parentc6c0baa03be793460f7a1ab90882d49134724503 (diff)
downloadgawk-2.10-from-net.tar.gz
Removed files from gawk-2.10gawk-2.10-from-netgawk-2.10-from-net
-rw-r--r--att.getopt.c94
-rw-r--r--gawk-info6151
-rw-r--r--gawk-info-11231
-rw-r--r--gawk-info-21265
-rw-r--r--gawk-info-31385
-rw-r--r--gawk-info-41400
-rw-r--r--gawk-info-5960
-rw-r--r--gawk.11344
-rw-r--r--gawk.aux202
-rw-r--r--gawk.cp234
-rw-r--r--gawk.cps253
-rw-r--r--gawk.dvibin320368 -> 0 bytes
-rw-r--r--gawk.fn10
-rw-r--r--gawk.fns13
-rw-r--r--gawk.ky0
-rw-r--r--gawk.kys0
-rw-r--r--gawk.pg0
-rw-r--r--gawk.pgs0
-rw-r--r--gawk.texinfo6587
-rw-r--r--gawk.toc104
-rw-r--r--gawk.tp0
-rw-r--r--gawk.tps0
-rw-r--r--gawk.vr17
-rw-r--r--gawk.vrs21
-rw-r--r--gnu.getopt.c417
-rw-r--r--makefile.pc169
26 files changed, 0 insertions, 21857 deletions
diff --git a/att.getopt.c b/att.getopt.c
deleted file mode 100644
index df68405f..00000000
--- a/att.getopt.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-** @(#)getopt.c 2.5 (smail) 9/15/87
-*/
-
-/*
- * Here's something you've all been waiting for: the AT&T public domain
- * source for getopt(3). It is the code which was given out at the 1985
- * UNIFORUM conference in Dallas. I obtained it by electronic mail
- * directly from AT&T. The people there assure me that it is indeed
- * in the public domain.
- *
- * There is no manual page. That is because the one they gave out at
- * UNIFORUM was slightly different from the current System V Release 2
- * manual page. The difference apparently involved a note about the
- * famous rules 5 and 6, recommending using white space between an option
- * and its first argument, and not grouping options that have arguments.
- * Getopt itself is currently lenient about both of these things White
- * space is allowed, but not mandatory, and the last option in a group can
- * have an argument. That particular version of the man page evidently
- * has no official existence, and my source at AT&T did not send a copy.
- * The current SVR2 man page reflects the actual behavor of this getopt.
- * However, I am not about to post a copy of anything licensed by AT&T.
- */
-
-/* This include is needed only to get "index" defined as "strchr" on Sys V. */
-#ifdef MSDOS
-#define index strchr
-#else
-#include "defs.h"
-#endif
-
-/*LINTLIBRARY*/
-#define NULL 0
-#define EOF (-1)
-#define ERR(s, c) if(opterr){\
- extern int write();\
- char errbuf[2];\
- errbuf[0] = c; errbuf[1] = '\n';\
- (void) write(2, argv[0], (unsigned)strlen(argv[0]));\
- (void) write(2, s, (unsigned)strlen(s));\
- (void) write(2, errbuf, 2);}
-
-extern char *index();
-
-int opterr = 1;
-int optind = 1;
-int optopt;
-char *optarg;
-
-int
-getopt(argc, argv, opts)
-int argc;
-char **argv, *opts;
-{
- static int sp = 1;
- register int c;
- register char *cp;
-
- if(sp == 1)
- if(optind >= argc ||
- argv[optind][0] != '-' || argv[optind][1] == '\0')
- return(EOF);
- else if(strcmp(argv[optind], "--") == NULL) {
- optind++;
- return(EOF);
- }
- optopt = c = argv[optind][sp];
- if(c == ':' || (cp=index(opts, c)) == NULL) {
- ERR(": illegal option -- ", c);
- if(argv[optind][++sp] == '\0') {
- optind++;
- sp = 1;
- }
- return('?');
- }
- if(*++cp == ':') {
- if(argv[optind][sp+1] != '\0')
- optarg = &argv[optind++][sp+1];
- else if(++optind >= argc) {
- ERR(": option requires an argument -- ", c);
- sp = 1;
- return('?');
- } else
- optarg = argv[optind++];
- sp = 1;
- } else {
- if(argv[optind][++sp] == '\0') {
- sp = 1;
- optind++;
- }
- optarg = NULL;
- }
- return(c);
-}
diff --git a/gawk-info b/gawk-info
deleted file mode 100644
index 361bd0c5..00000000
--- a/gawk-info
+++ /dev/null
@@ -1,6151 +0,0 @@
-Info file gawk-info, produced by Makeinfo, -*- Text -*- from input
-file gawk.texinfo.
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of
-this manual under the conditions for verbatim copying, provided that
-the entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be stated in a
-translation approved by the Foundation.
-
-
-
-File: gawk-info, Node: Top, Next: Preface, Prev: (dir), Up: (dir)
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them; it
-contains the following chapters:
-
-* Menu:
-
-* Preface:: What you can do with `awk'; brief history
- and acknowledgements.
-
-* License:: Your right to copy and distribute `gawk'.
-
-* This Manual:: Using this manual.
-
- Includes sample input files that you can use.
-
-* Getting Started:: A basic introduction to using `awk'.
- How to run an `awk' program. Command line syntax.
-
-* Reading Files:: How to read files and manipulate fields.
-
-* Printing:: How to print using `awk'. Describes the
- `print' and `printf' statements.
- Also describes redirection of output.
-
-* One-liners:: Short, sample `awk' programs.
-
-* Patterns:: The various types of patterns explained in detail.
-
-* Actions:: The various types of actions are introduced here.
- Describes expressions and the various operators in
- detail. Also describes comparison expressions.
-
-* Statements:: The various control statements are described in
- detail.
-
-* Arrays:: The description and use of arrays. Also includes
- array--oriented control statements.
-
-* User-defined:: User--defined functions are described in detail.
-
-* Built-in:: The built--in functions are summarized here.
-
-* Special:: The special variables are summarized here.
-
-* Sample Program:: A sample `awk' program with a complete explanation.
-
-* Notes:: Something about the implementation of `gawk'.
-
-* Glossary:: An explanation of some unfamiliar terms.
-
-* Index::
-
-
-
-File: gawk-info, Node: Preface, Next: License, Prev: Top, Up: Top
-
-Preface
-*******
-
-If you are like many computer users, you frequently would like to
-make changes in various text files wherever certain patterns appear,
-or extract data from parts of certain lines while discarding the
-rest. To write a program to do this in a language such as C or
-Pascal is a time--consuming inconvenience that may take many lines of
-code. The job may be easier with `awk'.
-
-The `awk' utility interprets a special--purpose programming language
-that makes it possible to handle simple data--reformatting jobs
-easily with just a few lines of code.
-
-The GNU implementation of `awk' is called `gawk'; it is fully upward
-compatible with the System V Release 3.1 and later version of `awk'.
-All properly written `awk' programs should work with `gawk'. So we
-usually don't distinguish between `gawk' and other `awk'
-implementations in this manual.
-
-This manual teaches you what `awk' does and how you can use `awk'
-effectively. You should already be familiar with basic,
-general--purpose, operating system commands such as `ls'. Using
-`awk' you can:
-
- * manage small, personal databases,
-
- * generate reports,
-
- * validate data,
-
- * produce indexes, and perform other document preparation tasks,
-
- * even experiment with algorithms that can be adapted later to
- other computer languages!
-
-* Menu:
-
-* History:: The history of gawk and awk. Acknowledgements.
-
-
-
-File: gawk-info, Node: History, Up: Preface
-
-History of `awk' and `gawk'
-===========================
-
-The name `awk' comes from the initials of its designers: Alfred V.
-Aho, Peter J. Weinberger, and Brian W. Kernighan. The original
-version of `awk' was written in 1977. In 1985 a new version made the
-programming language more powerful, introducing user--defined
-functions, multiple input streams, and computed regular expressions.
-
-The GNU implementation, `gawk', was written in 1986 by Paul Rubin and
-Jay Fenlason, with advice from Richard Stallman. John Woods
-contributed parts of the code as well. In 1988, David Trueman, with
-help from Arnold Robbins, reworked `gawk' for compatibility with the
-newer `awk'.
-
-Many people need to be thanked for their assistance in producing this
-manual. Jay Fenlason contributed many ideas and sample programs.
-Richard Mlynarik and Robert Chassell gave helpful comments on drafts
-of this manual. The paper ``A Supplemental Document for `awk''' by
-John W. Pierce of the Chemistry Department at UC San Diego,
-pinpointed several issues relevant both to `awk' implementation and
-to this manual, that would otherwise have escaped us.
-
-Finally, we would like to thank Brian Kernighan of Bell Labs for
-invaluable assistance during the testing and debugging of `gawk', and
-for help in clarifying several points about the language.
-
-
-
-File: gawk-info, Node: License, Next: This Manual, Prev: Preface, Up: Top
-
-GNU GENERAL PUBLIC LICENSE
-**************************
-
- Version 1, February 1989
-
- Copyright (C) 1989 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
-
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-=========
-
- The license agreements of most software companies try to keep users
-at the mercy of those companies. By contrast, our General Public
-License is intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-The General Public License applies to the Free Software Foundation's
-software and to any other program whose authors commit to using it.
-You can use it for your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Specifically, the General Public License is designed to make
-sure that you have the freedom to give away or sell copies of free
-software, that you receive source code or can get it if you want it,
-that you can change the software or use pieces of it in new free
-programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if
-you distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of a such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must tell them their rights.
-
- We protect your rights with two steps: (1) copyright the software,
-and (2) offer you this license which gives you legal permission to
-copy, distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on,
-we want its recipients to know that what they have is not the
-original, so that any problems introduced by others will not reflect
-on the original authors' reputations.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 1. This License Agreement applies to any program or other work
- which contains a notice placed by the copyright holder saying it
- may be distributed under the terms of this General Public
- License. The ``Program'', below, refers to any such program or
- work, and a ``work based on the Program'' means either the
- Program or any work containing the Program or a portion of it,
- either verbatim or with modifications. Each licensee is
- addressed as ``you''.
-
- 2. You may copy and distribute verbatim copies of the Program's
- source code as you receive it, in any medium, provided that you
- conspicuously and appropriately publish on each copy an
- appropriate copyright notice and disclaimer of warranty; keep
- intact all the notices that refer to this General Public License
- and to the absence of any warranty; and give any other
- recipients of the Program a copy of this General Public License
- along with the Program. You may charge a fee for the physical
- act of transferring a copy.
-
- 3. You may modify your copy or copies of the Program or any portion
- of it, and copy and distribute such modifications under the
- terms of Paragraph 1 above, provided that you also do the
- following:
-
- * cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change; and
-
- * cause the whole of any work that you distribute or publish,
- that in whole or in part contains the Program or any part
- thereof, either with or without modifications, to be
- licensed at no charge to all third parties under the terms
- of this General Public License (except that you may choose
- to grant warranty protection to some or all third parties,
- at your option).
-
- * If the modified program normally reads commands
- interactively when run, you must cause it, when started
- running for such interactive use in the simplest and most
- usual way, to print or display an announcement including an
- appropriate copyright notice and a notice that there is no
- warranty (or else, saying that you provide a warranty) and
- that users may redistribute the program under these
- conditions, and telling the user how to view a copy of this
- General Public License.
-
- * You may charge a fee for the physical act of transferring a
- copy, and you may at your option offer warranty protection
- in exchange for a fee.
-
- Mere aggregation of another independent work with the Program
- (or its derivative) on a volume of a storage or distribution
- medium does not bring the other work under the scope of these
- terms.
-
- 4. You may copy and distribute the Program (or a portion or
- derivative of it, under Paragraph 2) in object code or
- executable form under the terms of Paragraphs 1 and 2 above
- provided that you also do one of the following:
-
- * accompany it with the complete corresponding
- machine-readable source code, which must be distributed
- under the terms of Paragraphs 1 and 2 above; or,
-
- * accompany it with a written offer, valid for at least three
- years, to give any third party free (except for a nominal
- charge for the cost of distribution) a complete
- machine-readable copy of the corresponding source code, to
- be distributed under the terms of Paragraphs 1 and 2 above;
- or,
-
- * accompany it with the information you received as to where
- the corresponding source code may be obtained. (This
- alternative is allowed only for noncommercial distribution
- and only if you received the program in object code or
- executable form alone.)
-
- Source code for a work means the preferred form of the work for
- making modifications to it. For an executable file, complete
- source code means all the source code for all modules it
- contains; but, as a special exception, it need not include
- source code for modules which are standard libraries that
- accompany the operating system on which the executable file
- runs, or for standard header files or definitions files that
- accompany that operating system.
-
- 5. You may not copy, modify, sublicense, distribute or transfer the
- Program except as expressly provided under this General Public
- License. Any attempt otherwise to copy, modify, sublicense,
- distribute or transfer the Program is void, and will
- automatically terminate your rights to use the Program under
- this License. However, parties who have received copies, or
- rights to use copies, from you under this General Public License
- will not have their licenses terminated so long as such parties
- remain in full compliance.
-
- 6. By copying, distributing or modifying the Program (or any work
- based on the Program) you indicate your acceptance of this
- license to do so, and all its terms and conditions.
-
- 7. Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from
- the original licensor to copy, distribute or modify the Program
- subject to these terms and conditions. You may not impose any
- further restrictions on the recipients' exercise of the rights
- granted herein.
-
- 8. The Free Software Foundation may publish revised and/or new
- versions of the General Public License from time to time. Such
- new versions will be similar in spirit to the present version,
- but may differ in detail to address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
- Program specifies a version number of the license which applies
- to it and ``any later version'', you have the option of
- following the terms and conditions either of that version or of
- any later version published by the Free Software Foundation. If
- the Program does not specify a version number of the license,
- you may choose any version ever published by the Free Software
- Foundation.
-
- 9. If you wish to incorporate parts of the Program into other free
- programs whose distribution conditions are different, write to
- the author to ask for permission. For software which is
- copyrighted by the Free Software Foundation, write to the Free
- Software Foundation; we sometimes make exceptions for this. Our
- decision will be guided by the two goals of preserving the free
- status of all derivatives of our free software and of promoting
- the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 10. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
- WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
- LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
- HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS''
- WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
- INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
- ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE
- COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 11. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
- WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
- MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
- LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
- INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS
- OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
- YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH
- ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
- ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
-Appendix: How to Apply These Terms to Your New Programs
-=======================================================
-
- If you develop a new program, and you want it to be of the greatest
-possible use to humanity, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these
-terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the ``copyright'' line and a pointer to where the full notice is found.
-
- ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
- Copyright (C) 19YY NAME OF AUTHOR
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Also add information on how to contact you by electronic and paper
-mail.
-
-If the program is interactive, make it output a short notice like
-this when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
- The hypothetical commands `show w' and `show c' should show the
-appropriate parts of the General Public License. Of course, the
-commands you use may be called something other than `show w' and
-`show c'; they could even be mouse-clicks or menu items--whatever
-suits your program.
-
-You should also get your employer (if you work as a programmer) or
-your school, if any, to sign a ``copyright disclaimer'' for the
-program, if necessary. Here a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the
- program `Gnomovision' (a program to direct compilers to make passes
- at assemblers) written by James Hacker.
-
- SIGNATURE OF TY COON, 1 April 1989
- Ty Coon, President of Vice
-
-That's all there is to it!
-
-
-
-File: gawk-info, Node: This Manual, Next: Getting Started, Prev: License, Up: Top
-
-Using This Manual
-*****************
-
-The term `gawk' refers to a program (a version of `awk') developed by
-the Free Software Foundation, and to the language you use to tell it
-what to do. When we need to be careful, we call the program ``the
-`awk' utility'' and the language ``the `awk' language''. The purpose
-of this manual is to explain the `awk' language and how to run the
-`awk' utility.
-
-The term "`awk' program" refers to a program written by you in the
-`awk' programming language.
-
-*Note Getting Started::, for the bare essentials you need to know to
-start using `awk'.
-
-Useful ``one--liners'' are included to give you a feel for the `awk'
-language (*note One-liners::.).
-
-A sizable sample `awk' program has been provided for you (*note
-Sample Program::.).
-
-If you find terms that you aren't familiar with, try looking them up
-in the glossary (*note Glossary::.).
-
-Most of the time complete `awk' programs are used as examples, but in
-some of the more advanced sections, only the part of the `awk'
-program that illustrates the concept being described is shown.
-
-* Menu:
-
-This chapter contains the following sections:
-
-* The Files:: Sample data files for use in the `awk' programs
- illustrated in this manual.
-
-
-
-File: gawk-info, Node: The Files, Up: This Manual
-
-Input Files for the Examples
-============================
-
-This manual contains many sample programs. The data for many of
-those programs comes from two files. The first file, called
-`BBS-list', represents a list of computer bulletin board systems and
-information about those systems.
-
-Each line of this file is one "record". Each record contains the
-name of a computer bulletin board, its phone number, the board's baud
-rate, and a code for the number of hours it is operational. An `A'
-in the last column means the board operates 24 hours all week. A `B'
-in the last column means the board operates evening and weekend
-hours, only. A `C' means the board operates only on weekends.
-
- aardvark 555-5553 1200/300 B
- alpo-net 555-3412 2400/1200/300 A
- barfly 555-7685 1200/300 A
- bites 555-1675 2400/1200/300 A
- camelot 555-0542 300 C
- core 555-2912 1200/300 C
- fooey 555-1234 2400/1200/300 B
- foot 555-6699 1200/300 B
- macfoo 555-6480 1200/300 A
- sdace 555-3430 2400/1200/300 A
- sabafoo 555-2127 1200/300 C
-
-The second data file, called `inventory-shipped', represents
-information about shipments during the year. Each line of this file
-is also one record. Each record contains the month of the year, the
-number of green crates shipped, the number of red boxes shipped, the
-number of orange bags shipped, and the number of blue packages
-shipped, respectively.
-
- Jan 13 25 15 115
- Feb 15 32 24 226
- Mar 15 24 34 228
- Apr 31 52 63 420
- May 16 34 29 208
- Jun 31 42 75 492
- Jul 24 34 67 436
- Aug 15 34 47 316
- Sep 13 55 37 277
- Oct 29 54 68 525
- Nov 20 87 82 577
- Dec 17 35 61 401
-
- Jan 21 36 64 620
- Feb 26 58 80 652
- Mar 24 75 70 495
- Apr 21 70 74 514
-
-If you are reading this in GNU Emacs using Info, you can copy the
-regions of text showing these sample files into your own test files.
-This way you can try out the examples shown in the remainder of this
-document. You do this by using the command `M-x write-region' to
-copy text from the Info file into a file for use with `awk' (see your
-``GNU Emacs Manual'' for more information). Using this information,
-create your own `BBS-list' and `inventory-shipped' files, and
-practice what you learn in this manual.
-
-
-
-File: gawk-info, Node: Getting Started, Next: Reading Files, Prev: This Manual, Up: Top
-
-Getting Started With `awk'
-**************************
-
-The basic function of `awk' is to search files for lines (or other
-units of text) that contain certain patterns. When a line matching
-any of those patterns is found, `awk' performs specified actions on
-that line. Then `awk' keeps processing input lines until the end of
-the file is reached.
-
-An `awk' "program" or "script" consists of a series of "rules".
-(They may also contain "function definitions", but that is an
-advanced feature, so let's ignore it for now. *Note User-defined::.)
-
-A rule contains a "pattern", an "action", or both. Actions are
-enclosed in curly braces to distinguish them from patterns.
-Therefore, an `awk' program is a sequence of rules in the form:
-
- PATTERN { ACTION }
- PATTERN { ACTION }
- ...
-
- * Menu:
-
-* Very Simple:: A very simple example.
-* Two Rules:: A less simple one--line example with two rules.
-* More Complex:: A more complex example.
-* Running gawk:: How to run gawk programs; includes command line syntax.
-* Comments:: Adding documentation to gawk programs.
-* Statements/Lines:: Subdividing or combining statements into lines.
-
-* When:: When to use gawk and when to use other things.
-
-
-
-File: gawk-info, Node: Very Simple, Next: Two Rules, Up: Getting Started
-
-A Very Simple Example
-=====================
-
-The following command runs a simple `awk' program that searches the
-input file `BBS-list' for the string of characters: `foo'. (A string
-of characters is usually called, quite simply, a "string".)
-
- awk '/foo/ { print $0 }' BBS-list
-
-When lines containing `foo' are found, they are printed, because
-`print $0' means print the current line. (Just `print' by itself
-also means the same thing, so we could have written that instead.)
-
-You will notice that slashes, `/', surround the string `foo' in the
-actual `awk' program. The slashes indicate that `foo' is a pattern
-to search for. This type of pattern is called a "regular
-expression", and is covered in more detail later (*note Regexp::.).
-There are single quotes around the `awk' program so that the shell
-won't interpret any of it as special shell characters.
-
-Here is what this program prints:
-
- fooey 555-1234 2400/1200/300 B
- foot 555-6699 1200/300 B
- macfoo 555-6480 1200/300 A
- sabafoo 555-2127 1200/300 C
-
-In an `awk' rule, either the pattern or the action can be omitted,
-but not both.
-
-If the pattern is omitted, then the action is performed for *every*
-input line.
-
-If the action is omitted, the default action is to print all lines
-that match the pattern. We could leave out the action (the print
-statement and the curly braces) in the above example, and the result
-would be the same: all lines matching the pattern `foo' would be
-printed. (By comparison, omitting the print statement but retaining
-the curly braces makes an empty action that does nothing; then no
-lines would be printed.)
-
-
-
-File: gawk-info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up: Getting Started
-
-An Example with Two Rules
-=========================
-
-The `awk' utility reads the input files one line at a time. For each
-line, `awk' tries the patterns of all the rules. If several patterns
-match then several actions are run, in the order in which they appear
-in the `awk' program. If no patterns match, then no actions are run.
-
-After processing all the rules (perhaps none) that match the line,
-`awk' reads the next line (however, *note Next::.). This continues
-until the end of the file is reached.
-
-For example, the `awk' program:
-
- /12/ { print $0 }
- /21/ { print $0 }
-
-contains two rules. The first rule has the string `12' as the
-pattern and `print $0' as the action. The second rule has the string
-`21' as the pattern and also has `print $0' as the action. Each
-rule's action is enclosed in its own pair of braces.
-
-This `awk' program prints every line that contains the string `12'
-*or* the string `21'. If a line contains both strings, it is printed
-twice, once by each rule.
-
-If we run this program on our two sample data files, `BBS-list' and
-`inventory-shipped', as shown here:
-
- awk '/12/ { print $0 }
- /21/ { print $0 }' BBS-list inventory-shipped
-
-we get the following output:
-
- aardvark 555-5553 1200/300 B
- alpo-net 555-3412 2400/1200/300 A
- barfly 555-7685 1200/300 A
- bites 555-1675 2400/1200/300 A
- core 555-2912 1200/300 C
- fooey 555-1234 2400/1200/300 B
- foot 555-6699 1200/300 B
- macfoo 555-6480 1200/300 A
- sdace 555-3430 2400/1200/300 A
- sabafoo 555-2127 1200/300 C
- sabafoo 555-2127 1200/300 C
- Jan 21 36 64 620
- Apr 21 70 74 514
-
-Note how the line in `BBS-list' beginning with `sabafoo' was printed
-twice, once for each rule.
-
-
-
-File: gawk-info, Node: More Complex, Next: Running gawk, Prev: Two Rules, Up: Getting Started
-
-A More Complex Example
-======================
-
-Here is an example to give you an idea of what typical `awk' programs
-do. This example shows how `awk' can be used to summarize, select,
-and rearrange the output of another utility. It uses features that
-haven't been covered yet, so don't worry if you don't understand all
-the details.
-
- ls -l | awk '$5 == "Nov" { sum += $4 }
- END { print sum }'
-
-This command prints the total number of bytes in all the files in the
-current directory that were last modified in November (of any year).
-(In the C shell you would need to type a semicolon and then a
-backslash at the end of the first line; in the Bourne shell you can
-type the example as shown.)
-
-The `ls -l' part of this example is a command that gives you a full
-listing of all the files in a directory, including file size and date.
-Its output looks like this:
-
- -rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile
- -rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h
- -rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h
- -rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y
- -rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c
- -rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c
- -rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c
- -rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c
-
-The first field contains read--write permissions, the second field
-contains the number of links to the file, and the third field
-identifies the owner of the file. The fourth field contains the size
-of the file in bytes. The fifth, sixth, and seventh fields contain
-the month, day, and time, respectively, that the file was last
-modified. Finally, the eighth field contains the name of the file.
-
-The `$5 == "Nov"' in our `awk' program is an expression that tests
-whether the fifth field of the output from `ls -l' matches the string
-`Nov'. Each time a line has the string `Nov' in its fifth field, the
-action `{ sum += $4 }' is performed. This adds the fourth field (the
-file size) to the variable `sum'. As a result, when `awk' has
-finished reading all the input lines, `sum' will be the sum of the
-sizes of files whose lines matched the pattern.
-
-After the last line of output from `ls' has been processed, the `END'
-pattern is executed, and the value of `sum' is printed. In this
-example, the value of `sum' would be 80600.
-
-These more advanced `awk' techniques are covered in later sections
-(*note Actions::.). Before you can move on to more advanced `awk'
-programming, you have to know how `awk' interprets your input and
-displays your output. By manipulating "fields" and using special
-"print" statements, you can produce some very useful and spectacular
-looking reports.
-
-
-
-File: gawk-info, Node: Running gawk, Next: Comments, Prev: More Complex, Up: Getting Started
-
-How to Run `awk' Programs
-=========================
-
-There are several ways to run an `awk' program. If the program is
-short, it is easiest to include it in the command that runs `awk',
-like this:
-
- awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ...
-
- where PROGRAM consists of a series of PATTERNS and ACTIONS, as
-described earlier.
-
-When the program is long, you would probably prefer to put it in a
-file and run it with a command like this:
-
- awk -f PROGRAM-FILE INPUT-FILE1 INPUT-FILE2 ...
-
- * Menu:
-
-* One-shot:: Running a short throw--away `awk' program.
-* Read Terminal:: Using no input files (input from terminal instead).
-* Long:: Putting permanent `awk' programs in files.
-* Executable Scripts:: Making self--contained `awk' programs.
-* Command Line:: How the `awk' command line is laid out.
-
-
-
-File: gawk-info, Node: One-shot, Next: Read Terminal, Up: Running gawk
-
-One--shot Throw--away `awk' Programs
-------------------------------------
-
-Once you are familiar with `awk', you will often type simple programs
-at the moment you want to use them. Then you can write the program
-as the first argument of the `awk' command, like this:
-
- awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ...
-
- where PROGRAM consists of a series of PATTERNS and ACTIONS, as
-described earlier.
-
-This command format tells the shell to start `awk' and use the
-PROGRAM to process records in the input file(s). There are single
-quotes around the PROGRAM so that the shell doesn't interpret any
-`awk' characters as special shell characters. They cause the shell
-to treat all of PROGRAM as a single argument for `awk'. They also
-allow PROGRAM to be more than one line long.
-
-This format is also useful for running short or medium--sized `awk'
-programs from shell scripts, because it avoids the need for a
-separate file for the `awk' program. A self--contained shell script
-is more reliable since there are no other files to misplace.
-
-
-
-File: gawk-info, Node: Read Terminal, Next: Long, Prev: One-shot, Up: Running gawk
-
-Running `awk' without Input Files
----------------------------------
-
-You can also use `awk' without any input files. If you type the
-command line:
-
- awk 'PROGRAM'
-
-then `awk' applies the PROGRAM to the "standard input", which usually
-means whatever you type on the terminal. This continues until you
-indicate end--of--file by typing `Control-d'.
-
-For example, if you type:
-
- awk '/th/'
-
-whatever you type next will be taken as data for that `awk' program.
-If you go on to type the following data,
-
- Kathy
- Ben
- Tom
- Beth
- Seth
- Karen
- Thomas
- `Control-d'
-
-then `awk' will print
-
- Kathy
- Beth
- Seth
-
-as matching the pattern `th'. Notice that it did not recognize
-`Thomas' as matching the pattern. The `awk' language is "case
-sensitive", and matches patterns *exactly*.
-
-
-
-File: gawk-info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
-
-Running Long Programs
----------------------
-
-Sometimes your `awk' programs can be very long. In this case it is
-more convenient to put the program into a separate file. To tell
-`awk' to use that file for its program, you type:
-
- awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ...
-
- The `-f' tells the `awk' utility to get the `awk' program from the
-file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For
-example, you could put the program:
-
- /th/
-
-into the file `th-prog'. Then the command:
-
- awk -f th-prog
-
-does the same thing as this one:
-
- awk '/th/'
-
-which was explained earlier (*note Read Terminal::.). Note that you
-don't usually need single quotes around the file name that you
-specify with `-f', because most file names don't contain any of the
-shell's special characters.
-
-If you want to identify your `awk' program files clearly as such, you
-can add the extension `.awk' to the filename. This doesn't affect
-the execution of the `awk' program, but it does make ``housekeeping''
-easier.
-
-
-
-File: gawk-info, Node: Executable Scripts, Next: Command Line, Prev: Long, Up: Running gawk
-
-Executable `awk' Programs
--------------------------
-
-(The following section assumes that you are already somewhat familiar
-with `awk'.)
-
-Once you have learned `awk', you may want to write self--contained
-`awk' scripts, using the `#!' script mechanism. You can do this on
-BSD Unix systems and GNU.
-
-For example, you could create a text file named `hello', containing
-the following (where `BEGIN' is a feature we have not yet discussed):
-
- #! /bin/awk -f
-
- # a sample awk program
-
- BEGIN { print "hello, world" }
-
-After making this file executable (with the `chmod' command), you can
-simply type:
-
- hello
-
-at the shell, and the system will arrange to run `awk' as if you had
-typed:
-
- awk -f hello
-
-Self--contained `awk' scripts are particularly useful for putting
-`awk' programs into production on your system, without your users
-having to know that they are actually using an `awk' program.
-
-If your system does not support the `#!' mechanism, you can get a
-similar effect using a regular shell script. It would look something
-like this:
-
- : a sample awk program
-
- awk 'PROGRAM' "$@"
-
-Using this technique, it is *vital* to enclose the PROGRAM in single
-quotes to protect it from interpretation by the shell. If you omit
-the quotes, only a shell wizard can predict the result.
-
-The `"$@"' causes the shell to forward all the command line arguments
-to the `awk' program, without interpretation.
-
-
-
-File: gawk-info, Node: Command Line, Prev: Executable Scripts, Up: Running gawk
-
-Details of the `awk' Command Line
----------------------------------
-
-(The following section assumes that you are already familiar with
-`awk'.)
-
-There are two ways to run `awk'. Here are templates for both of
-them; items enclosed in `[' and `]' in these templates are optional.
-
- awk [ -FFS ] [ -- ] 'PROGRAM' FILE ...
- awk [ -FFS ] -f SOURCE-FILE [ -f SOURCE-FILE ... ] [ -- ] FILE ...
-
- Options begin with a minus sign, and consist of a single character.
-The options and their meanings are as follows:
-
-`-FFS'
- This sets the `FS' variable to FS (*note Special::.). As a
- special case, if FS is `t', then `FS' will be set to the tab
- character (`"\t"').
-
-`-f SOURCE-FILE'
- Indicates that the `awk' program is to be found in SOURCE-FILE
- instead of in the first non--option argument.
-
-`--'
- This signals the end of the command line options. If you wish
- to specify an input file named `-f', you can precede it with the
- `--' argument to prevent the `-f' from being interpreted as an
- option. This handling of `--' follows the POSIX argument
- parsing conventions.
-
-Any other options will be flagged as invalid with a warning message,
-but are otherwise ignored.
-
-If the `-f' option is *not* used, then the first non--option command
-line argument is expected to be the program text.
-
-The `-f' option may be used more than once on the command line.
-`awk' will read its program source from all of the named files, as if
-they had been concatenated together into one big file. This is
-useful for creating libraries of `awk' functions. Useful functions
-can be written once, and then retrieved from a standard place,
-instead of having to be included into each individual program. You
-can still type in a program at the terminal and use library
-functions, by specifying `/dev/tty' as one of the arguments to a
-`-f'. Type your program, and end it with the keyboard end--of--file
-character `Control-d'.
-
-Any additional arguments on the command line are made available to
-your `awk' program in the `ARGV' array (*note Special::.). These
-arguments are normally treated as input files to be processed in the
-order specified. However, an argument that has the form VAR`='VALUE,
-means to assign the value VALUE to the variable VAR--it does not
-specify a file at all.
-
-Command line options and the program text (if present) are omitted
-from the `ARGV' array. All other arguments, including variable
-assignments, are included (*note Special::.).
-
-The distinction between file name arguments and variable--assignment
-arguments is made when `awk' is about to open the next input file.
-At that point in execution, it checks the ``file name'' to see
-whether it is really a variable assignment; if so, instead of trying
-to read a file it will, *at that point in the execution*, assign the
-variable.
-
-Therefore, the variables actually receive the specified values after
-all previously specified files have been read. In particular, the
-values of variables assigned in this fashion are *not* available
-inside a `BEGIN' rule (*note BEGIN/END::.), since such rules are run
-before `awk' begins scanning the argument list.
-
-The variable assignment feature is most useful for assigning to
-variables such as `RS', `OFS', and `ORS', which control input and
-output formats, before listing the data files. It is also useful for
-controlling state if multiple passes are needed over a data file.
-For example:
-
- awk 'pass == 1 { PASS 1 STUFF }
- pass == 2 { PASS 2 STUFF }' pass=1 datafile pass=2 datafile
-
-
-
-File: gawk-info, Node: Comments, Next: Statements/Lines, Prev: Running gawk, Up: Getting Started
-
-Comments in `awk' Programs
-==========================
-
-When you write a complicated `awk' program, you can put "comments" in
-the program file to help you remember what the program does, and how
-it works.
-
-A comment starts with the the sharp sign character, `#', and
-continues to the end of the line. The `awk' language ignores the
-rest of a line following a sharp sign. For example, we could have
-put the following into `th-prog':
-
- # This program finds records containing the pattern `th'. This is how
- # you continue comments on additional lines.
- /th/
-
-You can put comment lines into keyboard--composed throw--away `awk'
-programs also, but this usually isn't very useful; the purpose of a
-comment is to help yourself or another person understand the program
-at another time.
-
-
-
-File: gawk-info, Node: Statements/Lines, Next: When, Prev: Comments, Up: Getting Started
-
-`awk' Statements versus Lines
-=============================
-
-Most often, each line in an `awk' program is a separate statement or
-separate rule, like this:
-
- awk '/12/ { print $0 }
- /21/ { print $0 }' BBS-list inventory-shipped
-
-But sometimes statements can be more than one line, and lines can
-contain several statements.
-
-You can split a statement into multiple lines by inserting a newline
-after any of the following:
-
- , { ? : || &&
-
-Lines ending in `do' or `else' automatically have their statements
-continued on the following line(s). A newline at any other point
-ends the statement.
-
-If you would like to split a single statement into two lines at a
-point where a newline would terminate it, you can "continue" it by
-ending the first line with a backslash character, `\'. This is
-allowed absolutely anywhere in the statement, even in the middle of a
-string or regular expression. For example:
-
- awk '/This program is too long, so continue it\
- on the next line/ { print $1 }'
-
-We have generally not used backslash continuation in the sample
-programs in this manual. Since there is no limit on the length of a
-line, it is never strictly necessary; it just makes programs
-prettier. We have preferred to make them even more pretty by keeping
-the statements short. Backslash continuation is most useful when
-your `awk' program is in a separate source file, instead of typed in
-on the command line.
-
-*Warning: this does not work if you are using the C shell.*
-Continuation with backslash works for `awk' programs in files, and
-also for one--shot programs *provided* you are using the Bourne
-shell, the Korn shell, or the Bourne--again shell. But the C shell
-used on Berkeley Unix behaves differently! There, you must use two
-backslashes in a row, followed by a newline.
-
-When `awk' statements within one rule are short, you might want to
-put more than one of them on a line. You do this by separating the
-statements with semicolons, `;'. This also applies to the rules
-themselves. Thus, the above example program could have been written:
-
- /12/ { print $0 } ; /21/ { print $0 }
-
-*Note:* It is a new requirement that rules on the same line require
-semicolons as a separator in the `awk' language; it was done for
-consistency with the statements in the action part of rules.
-
-
-
-File: gawk-info, Node: When, Prev: Statements/Lines, Up: Getting Started
-
-When to Use `awk'
-=================
-
-What use is all of this to me, you might ask? Using additional
-operating system utilities, more advanced patterns, field separators,
-arithmetic statements, and other selection criteria, you can produce
-much more complex output. The `awk' language is very useful for
-producing reports from large amounts of raw data, like summarizing
-information from the output of standard operating system programs
-such as `ls'. (*Note A More Complex Example: More Complex.)
-
-Programs written with `awk' are usually much smaller than they would
-be in other languages. This makes `awk' programs easy to compose and
-use. Often `awk' programs can be quickly composed at your terminal,
-used once, and thrown away. Since `awk' programs are interpreted,
-you can avoid the usually lengthy edit--compile--test--debug cycle of
-software development.
-
-Complex programs have been written in `awk', including a complete
-retargetable assembler for 8--bit microprocessors (*note Glossary::.
-for more information) and a microcode assembler for a special purpose
-Prolog computer. However, `awk''s capabilities are strained by tasks
-of such complexity.
-
-If you find yourself writing `awk' scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated
-string or pattern matching capabilities. The shell is also good at
-string and pattern matching; in addition it allows powerful use of
-the standard utilities. More conventional languages like C, C++, or
-Lisp offer better facilities for system programming and for managing
-the complexity of large programs. Programs in these languages may
-require more lines of source code than the equivalent `awk' programs,
-but they will be easier to maintain and usually run more efficiently.
-
-
-
-File: gawk-info, Node: Reading Files, Next: Printing, Prev: Getting Started, Up: Top
-
-Reading Files (Input)
-*********************
-
-In the typical `awk' program, all input is read either from the
-standard input (usually the keyboard) or from files whose names you
-specify on the `awk' command line. If you specify input files, `awk'
-reads data from the first one until it reaches the end; then it reads
-the second file until it reaches the end, and so on. The name of the
-current input file can be found in the special variable `FILENAME'
-(*note Special::.).
-
-The input is split automatically into "records", and processed by the
-rules one record at a time. (Records are the units of text mentioned
-in the introduction; by default, a record is a line of text.) Each
-record read is split automatically into "fields", to make it more
-convenient for a rule to work on parts of the record under
-consideration.
-
-On rare occasions you will need to use the `getline' command, which
-can do explicit input from any number of files.
-
-* Menu:
-
-* Records:: Controlling how data is split into records.
-* Fields:: An introduction to fields.
-* Field Separators:: The field separator and how to change it.
-* Multiple:: Reading multi--line records.
-
-* Assignment Options:: Setting variables on the command line and a summary
- of command line syntax. This is an advanced method
- of input.
-
-* Getline:: Reading files under explicit program control
- using the `getline' function.
-* Close Input:: Closing an input file (so you can read from
- the beginning once more).
-
-
-
-File: gawk-info, Node: Records, Next: Fields, Up: Reading Files
-
-How Input is Split into Records
-===============================
-
-The `awk' language divides its input into records and fields.
-Records are separated from each other by the "record separator". By
-default, the record separator is the "newline" character. Therefore,
-normally, a record is a line of text.
-
-Sometimes you may want to use a different character to separate your
-records. You can use different characters by changing the special
-variable `RS'.
-
-The value of `RS' is a string that says how to separate records; the
-default value is `"\n"', the string of just a newline character.
-This is why lines of text are the default record. Although `RS' can
-have any string as its value, only the first character of the string
-will be used as the record separator. The other characters are
-ignored. `RS' is exceptional in this regard; `awk' uses the full
-value of all its other special variables.
-
-The value of `RS' is changed by "assigning" it a new value (*note
-Assignment Ops::.). One way to do this is at the beginning of your
-`awk' program, before any input has been processed, using the special
-`BEGIN' pattern (*note BEGIN/END::.). This way, `RS' is changed to
-its new value before any input is read. The new value of `RS' is
-enclosed in quotation marks. For example:
-
- awk 'BEGIN { RS = "/" } ; { print $0 }' BBS-list
-
-changes the value of `RS' to `/', the slash character, before reading
-any input. Records are now separated by a slash. The second rule in
-the `awk' program (the action with no pattern) will proceed to print
-each record. Since each `print' statement adds a newline at the end
-of its output, the effect of this `awk' program is to copy the input
-with each slash changed to a newline.
-
-Another way to change the record separator is on the command line,
-using the variable--assignment feature (*note Command Line::.).
-
- awk '...' RS="/" SOURCE-FILE
-
-`RS' will be set to `/' before processing SOURCE-FILE.
-
-The empty string (a string of no characters) has a special meaning as
-the value of `RS': it means that records are separated only by blank
-lines. *Note Multiple::, for more details.
-
-The `awk' utility keeps track of the number of records that have been
-read so far from the current input file. This value is stored in a
-special variable called `FNR'. It is reset to zero when a new file
-is started. Another variable, `NR', is the total number of input
-records read so far from all files. It starts at zero but is never
-automatically reset to zero.
-
-If you change the value of `RS' in the middle of an `awk' run, the
-new value is used to delimit subsequent records, but the record
-currently being processed (and records already finished) are not
-affected.
-
-
-
-File: gawk-info, Node: Fields, Next: Non-Constant Fields, Prev: Records, Up: Reading Files
-
-Examining Fields
-================
-
-When `awk' reads an input record, the record is automatically
-separated or "parsed" by the interpreter into pieces called "fields".
-By default, fields are separated by whitespace, like words in a line.
-Whitespace in `awk' means any string of one or more spaces and/or
-tabs; other characters such as newline, formfeed, and so on, that are
-considered whitespace by other languages are *not* considered
-whitespace by `awk'.
-
-The purpose of fields is to make it more convenient for you to refer
-to these pieces of the record. You don't have to use them--you can
-operate on the whole record if you wish--but fields are what make
-simple `awk' programs so powerful.
-
-To refer to a field in an `awk' program, you use a dollar--sign, `$',
-followed by the number of the field you want. Thus, `$1' refers to
-the first field, `$2' to the second, and so on. For example, suppose
-the following is a line of input:
-
- This seems like a pretty nice example.
-
- Here the first field, or `$1', is `This'; the second field, or `$2',
-is `seems'; and so on. Note that the last field, `$7', is
-`example.'. Because there is no space between the `e' and the `.',
-the period is considered part of the seventh field.
-
-No matter how many fields there are, the last field in a record can
-be represented by `$NF'. So, in the example above, `$NF' would be
-the same as `$7', which is `example.'. Why this works is explained
-below (*note Non-Constant Fields::.). If you try to refer to a field
-beyond the last one, such as `$8' when the record has only 7 fields,
-you get the empty string.
-
-Plain `NF', with no `$', is a special variable whose value is the
-number of fields in the current record.
-
-`$0', which looks like an attempt to refer to the zeroth field, is a
-special case: it represents the whole input record. This is what you
-would use when you aren't interested in fields.
-
-Here are some more examples:
-
- awk '$1 ~ /foo/ { print $0 }' BBS-list
-
-This example contains the "matching" operator `~' (*note Comparison
-Ops::.). Using this operator, all records in the file `BBS-list'
-whose first field contains the string `foo' are printed.
-
-By contrast, the following example:
-
- awk '/foo/ { print $1, $NF }' BBS-list
-
-looks for the string `foo' in *the entire record* and prints the
-first field and the last field for each input record containing the
-pattern.
-
-The following program will search the system password file, and print
-the entries for users who have no password.
-
- awk -F: '$2 == ""' /etc/passwd
-
-This program uses the `-F' option on the command line to set the file
-separator. (Fields in `/etc/passwd' are separated by colons. The
-second field represents a user's encrypted password, but if the field
-is empty, that user has no password.)
-
-
-
-File: gawk-info, Node: Non-Constant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files
-
-Non-constant Field Numbers
-==========================
-
-The number of a field does not need to be a constant. Any expression
-in the `awk' language can be used after a `$' to refer to a field.
-The `awk' utility evaluates the expression and uses the "numeric
-value" as a field number. Consider this example:
-
- awk '{ print $NR }'
-
-Recall that `NR' is the number of records read so far: 1 in the first
-record, 2 in the second, etc. So this example will print the first
-field of the first record, the second field of the second record, and
-so on. For the twentieth record, field number 20 will be printed;
-most likely this will make a blank line, because the record will not
-have 20 fields.
-
-Here is another example of using expressions as field numbers:
-
- awk '{ print $(2*2) }' BBS-list
-
-The `awk' language must evaluate the expression `(2*2)' and use its
-value as the field number to print. The `*' sign represents
-multiplication, so the expression `2*2' evaluates to 4. This
-example, then, prints the hours of operation (the fourth field) for
-every line of the file `BBS-list'.
-
-When you use non--constant field numbers, you may ask for a field
-with a negative number. This always results in an empty string, just
-like a field whose number is too large for the input record. For
-example, `$(1-4)' would try to examine field number -3; it would
-result in an empty string.
-
-If the field number you compute is zero, you get the entire record.
-
-The number of fields in the current record is stored in the special
-variable `NF' (*note Special::.). The expression `$NF' is not a
-special feature: it is the direct consequence of evaluating `NF' and
-using its value as a field number.
-
-
-
-File: gawk-info, Node: Changing Fields, Next: Field Separators, Prev: Non-Constant Fields, Up: Reading Files
-
-Changing the Contents of a Field
-================================
-
-You can change the contents of a field as seen by `awk' within an
-`awk' program; this changes what `awk' perceives as the current input
-record. (The actual input is untouched: `awk' never modifies the
-input file.)
-
-Look at this example:
-
- awk '{ $3 = $2 - 10; print $2, $3 }' inventory-shipped
-
-The `-' sign represents subtraction, so this program reassigns field
-three, `$3', to be the value of field two minus ten, ``$2' - 10'.
-(*Note Arithmetic Ops::.) Then field two, and the new value for
-field three, are printed.
-
-In order for this to work, the text in field `$2' must make sense as
-a number; the string of characters must be converted to a number in
-order for the computer to do arithmetic on it. The number resulting
-from the subtraction is converted back to a string of characters
-which then becomes field 3. *Note Conversion::.
-
-When you change the value of a field (as perceived by `awk'), the
-text of the input record is recalculated to contain the new field
-where the old one was. `$0' will from that time on reflect the
-altered field. Thus,
-
- awk '{ $2 = $2 - 10; print $0 }' inventory-shipped
-
-will print a copy of the input file, with 10 subtracted from the
-second field of each line.
-
-You can also assign contents to fields that are out of range. For
-example:
-
- awk '{ $6 = ($5 + $4 + $3 + $2)/4) ; print $6 }' inventory-shipped
-
-We've just created `$6', whose value is the average of fields `$2',
-`$3', `$4', and `$5'. The `+' sign represents addition, and the `/'
-sign represents division. For the file `inventory-shipped' `$6'
-represents the average number of parcels shipped for a particular
-month.
-
-Creating a new field changes what `awk' interprets as the current
-input record. The value of `$0' will be recomputed. This
-recomputation affects and is affected by features not yet discussed,
-in particular, the "Output Field Separator", `OFS', which is used to
-separate the fields (*note Output Separators::.), and `NF' (the
-number of fields; *note Fields::.). For example, the value of `NF'
-will be set to the number of the highest out--of--range field you
-create.
-
-Note, however, that merely *referencing* an out--of--range field will
-*not* change the value of either `$0' or `NF'. Referencing an
-out--of--range field merely produces a null string. For example:
-
- if ($(NF+1) != "")
- print "can't happen"
- else
- print "everything is normal"
-
-should print `everything is normal'. (*Note If::, for more
-information about `awk''s `if-else' statements.)
-
-
-
-File: gawk-info, Node: Field Separators, Next: Multiple, Prev: Changing Fields, Up: Reading Files
-
-Specifying How Fields Are Separated
-===================================
-
-You can change the way `awk' splits a record into fields by changing
-the value of the "field separator". The field separator is
-represented by the special variable `FS' in an `awk' program, and can
-be set by `-F' on the command line. The `awk' language scans each
-input line for the field separator character to determine the
-positions of fields within that line. Shell programmers take note!
-`awk' uses the variable `FS', not `IFS'.
-
-The default value of the field separator is a string containing a
-single space. This value is actually a special case; as you know, by
-default, fields are separated by whitespace sequences, not by single
-spaces: two spaces in a row do not delimit an empty field.
-``Whitespace'' is defined as sequences of one or more spaces or tab
-characters.
-
-You change the value of `FS' by "assigning" it a new value. You can
-do this using the special `BEGIN' pattern (*note BEGIN/END::.). This
-pattern allows you to change the value of `FS' before any input is
-read. The new value of `FS' is enclosed in quotations. For example,
-set the value of `FS' to the string `","':
-
- awk 'BEGIN { FS = "," } ; { print $2 }'
-
-and use the input line:
-
- John Q. Smith, 29 Oak St., Walamazoo, MI 42139
-
-This `awk' program will extract the string `29 Oak St.'.
-
-Sometimes your input data will contain separator characters that
-don't separate fields the way you thought they would. For instance,
-the person's name in the example we've been using might have a title
-or suffix attached, such as `John Q. Smith, LXIX'. If you assigned
-`FS' to be `,' then:
-
- awk 'BEGIN { FS = "," } ; { print $2 }
-
-would extract `LXIX', instead of `29 Oak St.'. If you were expecting
-the program to print the address, you would be surprised. So, choose
-your data layout and separator characters carefully to prevent
-problems like this from happening.
-
-You can assign `FS' to be a series of characters. For example, the
-assignment:
-
- FS = ", \t"
-
-makes every area of an input line that consists of a comma followed
-by a space and a tab, into a field separator. (`\t' stands for a tab.)
-
-If `FS' is any single character other than a blank, then that
-character is used as the field separator, and two successive
-occurrences of that character do delimit an empty field.
-
-If you assign `FS' to a string longer than one character, that string
-is evaluated as a "regular expression" (*note Regexp::.). The value
-of the regular expression is used as a field separator.
-
-`FS' can be set on the command line. You use the `-F' argument to do
-so. For example:
-
- awk -F, 'PROGRAM' INPUT-FILES
-
-sets `FS' to be the `,' character. Notice that the argument uses a
-capital `F'. Contrast this with `-f', which specifies a file
-containing an `awk' program. Case is significant in command options:
-the `-F' and `-f' options have nothing to do with each other. You
-can use both options at the same time to set the `FS' argument *and*
-get an `awk' program from a file.
-
-As a special case, if the argument to `-F' is `t', then `FS' is set
-to the tab character. (This is because if you type `-F\t', without
-the quotes, at the shell, the `\' gets deleted, so `awk' figures that
-you really want your fields to be separated with tabs, and not `t's.
-Use `FS="t"' if you really do want to separate your fields with `t's.)
-
-For example, let's use an `awk' program file called `baud.awk' that
-contains the pattern `/300/', and the action `print $1'. We'll use
-the operating system utility `cat' to ``look'' at our program:
-
- % cat baud.awk
- /300/ { print $1 }
-
-Let's also set `FS' to be the `-' character. We will apply all this
-information to the file `BBS-list'. This `awk' program will now
-print a list of the names of the bulletin boards that operate at 300
-baud and the first three digits of their phone numbers.
-
- awk -F- -f baud.awk BBS-list
-
-produces this output:
-
- aardvark 555
- alpo
- barfly 555
- bites 555
- camelot 555
- core 555
- fooey 555
- foot 555
- macfoo 555
- sdace 555
- sabafoo 555
-
-Note the second line of output. If you check the original file, you
-will see that the second line looked like this:
-
- alpo-net 555-3412 2400/1200/300 A
-
-The `-' as part of the system's name was used as the field separator,
-instead of the `-' in the phone number that was originally intended.
-This demonstrates why you have to be careful in choosing your field
-and record separators.
-
-
-
-File: gawk-info, Node: Multiple, Next: Assignment Options, Prev: Field Separators, Up: Reading Files
-
-Multiple--Line Records
-======================
-
-In some data bases, a single line cannot conveniently hold all the
-information in one entry. Then you will want to use multi--line
-records.
-
-The first step in doing this is to choose your data format: when
-records are not defined as single lines, how will you want to define
-them? What should separate records?
-
-One technique is to use an unusual character or string to separate
-records. For example, you could use the formfeed character (written
-`\f' in `awk', as in C) to separate them, making each record a page
-of the file. To do this, just set the variable `RS' to `"\f"' (a
-string containing the formfeed character), or whatever string you
-prefer to use.
-
-Another technique is to have blank lines separate records. By a
-special dispensation, a null string as the value of `RS' indicates
-that records are separated by one or more blank lines. If you set
-`RS' to the null string, a record will always end at the first blank
-line encountered. And the next record won't start until the first
-nonblank line that follows--no matter how many blank lines appear in
-a row, they will be considered one record--separator.
-
-The second step is to separate the fields in the record. One way to
-do this is to put each field on a separate line: to do this, just set
-the variable `FS' to the string `"\n"'. (This simple regular
-expression matches a single newline.) Another idea is to divide each
-of the lines into fields in the normal manner; the regular expression
-`"[ \t\n]+"' will do this nicely by treating the newlines inside the
-record just like spaces.
-
-When `RS' is set to the null string, the newline character *always*
-acts as a field separator. This is in addition to whatever value
-`FS' has. The probable reason for this rule is so that you get
-rational behavior in the default case (i.e. `FS == " "'). This can
-be a problem if you really don't want the newline character to
-separate fields, since there is no way to do that. However, you can
-work around this by using the `split' function to manually break up
-your data (*note String Functions::.).
-
-Here is how to use records separated by blank lines and break each
-line into fields normally:
-
- awk 'BEGIN { RS = ""; FS = "[ \t\n]+" } ; { print $0 }' BBS-list
-
-
-
-File: gawk-info, Node: Assignment Options, Next: Getline, Prev: Multiple, Up: Reading Files
-
-Assigning Variables on the Command Line
-=======================================
-
-You can include variable "assignments" among the file names on the
-command line used to invoke `awk' (*note Command Line::.). Such
-assignments have the form:
-
- VARIABLE=TEXT
-
-and allow you to change variables either at the beginning of the
-`awk' run or in between input files. The variable assignment is
-performed at a time determined by its position among the input file
-arguments: after the processing of the preceding input file argument.
-For example:
-
- awk '{ print $n }' n=4 inventory-shipped n=2 BBS-list
-
-prints the value of field number `n' for all input records. Before
-the first file is read, the command line sets the variable `n' equal
-to 4. This causes the fourth field of the file `inventory-shipped'
-to be printed. After the first file has finished, but before the
-second file is started, `n' is set to 2, so that the second field of
-the file `BBS-list' will be printed.
-
-Command line arguments are made available for explicit examination by
-the `awk' program in an array named `ARGV' (*note Special::.).
-
-
-
-File: gawk-info, Node: Getline, Prev: Assignment Options, Up: Reading Files
-
-Explicit Input with `getline'
-=============================
-
-So far we have been getting our input files from `awk''s main input
-stream--either the standard input (usually your terminal) or the
-files specified on the command line. The `awk' language has a
-special built--in function called `getline' that can be used to read
-input under your explicit control.
-
-This command is quite complex and should *not* be used by beginners.
-The command (and its variations) is covered here because this is the
-section about input. The examples that follow the explanation of the
-`getline' command include material that has not been covered yet.
-Therefore, come back and attempt the `getline' command *after* you
-have reviewed the rest of this manual and have a good knowledge of
-how `awk' works.
-
-When retrieving input, `getline' returns a 1 if it found a record,
-and a 0 if the end of the file was encountered. If there was some
-error in getting a record, such as a file that could not be opened,
-then `getline' returns a -1.
-
-In the following examples, COMMAND stands for a string value that
-represents a shell command.
-
-`getline'
- The `getline' function can be used by itself, in an `awk'
- program, to read input from the current input. All it does in
- this case is read the next input record and split it up into
- fields. This is useful if you've finished processing the
- current record, but you want to do some special processing
- *right now* on the next record. Here's an example:
-
- awk '{
- if (t = index($0, "/*")) {
- if(t > 1)
- tmp = substr($0, 1, t - 1)
- else
- tmp = ""
- u = index(substr($0, t + 2), "*/")
- while (! u) {
- getline
- t = -1
- u = index($0, "*/")
- }
- if(u <= length($0) - 2)
- $0 = tmp substr($0, t + u + 3)
- else
- $0 = tmp
- }
- print $0
- }'
-
- This `awk' program deletes all comments, `/* ... */', from the
- input. By replacing the `print $0' with other statements, you
- could perform more complicated processing on the de--commented
- input, such as search it for matches for a regular expression.
-
- This form of the `getline' command sets `NF' (the number of
- fields; *note Fields::.), `NR' (the number of records read so
- far), the `FNR' variable (*note Records::.), and the value of
- `$0'.
-
- *Note:* The new value of `$0' will be used in testing the
- patterns of any subsequent rules. The original value of `$0'
- that triggered the rule which executed `getline' is lost. By
- contrast, the `next' statement reads a new record but
- immediately begins processing it normally, starting with the
- first rule in the program. *Note Next::.
-
-`getline VAR'
- This form of `getline' reads a record into the variable VAR.
- This is useful when you want your program to read the next
- record from the input file, but you don't want to subject the
- record to the normal input processing.
-
- For example, suppose the next line is a comment, or a special
- string, and you want to read it, but you must make certain that
- it won't accidentally trigger any rules. This version of
- `getline' will allow you to read that line and store it in a
- variable so that the main read--a--line--and--check--each--rule
- loop of `awk' never sees it.
-
- The following example swaps every two lines of input. For
- example, given:
-
- wan
- tew
- free
- phore
-
- it outputs:
-
- tew
- wan
- phore
- free
-
- Here's the program:
-
- awk '{
- if ((getline tmp) > 0) {
- print tmp
- print $0
- } else
- print $0
- }'
-
- The `getline' function used in this way sets only `NR' and `FNR'
- (and of course, VAR). The record is not split into fields, so
- the values of the fields (including `$0') and the value of `NF'
- do not change.
-
-`getline < FILE'
- This form of the `getline' function takes its input from the
- file FILE. Here FILE is a string--valued expression that
- specifies the file name.
-
- This form is useful if you want to read your input from a
- particular file, instead of from the main input stream. For
- example, the following program reads its input record from the
- file `foo.input' when it encounters a first field with a value
- equal to 10 in the current input file.
-
- awk '{
- if ($1 == 10) {
- getline < "foo.input"
- print
- } else
- print
- }'
-
- Since the main input stream is not used, the values of `NR' and
- `FNR' are not changed. But the record read is split into fields
- in the normal manner, so the values of `$0' and other fields are
- changed. So is the value of `NF'.
-
- This does not cause the record to be tested against all the
- patterns in the `awk' program, in the way that would happen if
- the record were read normally by the main processing loop of
- `awk'. However the new record is tested against any subsequent
- rules, just as when `getline' is used without a redirection.
-
-`getline VAR < FILE'
- This form of the `getline' function takes its input from the
- file FILE and puts it in the variable VAR. As above, FILE is a
- string--valued expression that specifies the file to read from.
-
- In this version of `getline', none of the built--in variables
- are changed, and the record is not split into fields. The only
- variable changed is VAR.
-
- For example, the following program copies all the input files to
- the output, except for records that say `@include FILENAME'.
- Such a record is replaced by the contents of the file FILENAME.
-
- awk '{
- if (NF == 2 && $1 == "@include") {
- while ((getline line < $2) > 0)
- print line
- close($2)
- } else
- print
- }'
-
- Note here how the name of the extra input file is not built into
- the program; it is taken from the data, from the second field on
- the `@include' line.
-
- The `close' command is used to ensure that if two identical
- `@include' lines appear in the input, the entire specified file
- is included twice. *Note Close Input::.
-
- One deficiency of this program is that it does not process
- nested `@include' statements the way a true macro preprocessor
- would.
-
-`COMMAND | getline'
- You can "pipe" the output of a command into `getline'. A pipe
- is simply a way to link the output of one program to the input
- of another. In this case, the string COMMAND is run as a shell
- command and its output is piped into `awk' to be used as input.
- This form of `getline' reads one record from the pipe.
-
- For example, the following program copies input to output,
- except for lines that begin with `@execute', which are replaced
- by the output produced by running the rest of the line as a
- shell command:
-
- awk '{
- if ($1 == "@execute") {
- tmp = substr($0, 10)
- while ((tmp | getline) > 0)
- print
- close(tmp)
- } else
- print
- }'
-
- The `close' command is used to ensure that if two identical
- `@execute' lines appear in the input, the command is run again
- for each one. *Note Close Input::.
-
- Given the input:
-
- foo
- bar
- baz
- @execute who
- bletch
-
- the program might produce:
-
- foo
- bar
- baz
- hack ttyv0 Jul 13 14:22
- hack ttyp0 Jul 13 14:23 (gnu:0)
- hack ttyp1 Jul 13 14:23 (gnu:0)
- hack ttyp2 Jul 13 14:23 (gnu:0)
- hack ttyp3 Jul 13 14:23 (gnu:0)
- bletch
-
- Notice that this program ran the command `who' and printed the
- result. (If you try this program yourself, you will get
- different results, showing you logged in.)
-
- This variation of `getline' splits the record into fields, sets
- the value of `NF' and recomputes the value of `$0'. The values
- of `NR' and `FNR' are not changed.
-
-`COMMAND | getline VAR'
- The output of the command COMMAND is sent through a pipe to
- `getline' and into the variable VAR. For example, the following
- program reads the current date and time into the variable
- `current_time', using the utility called `date', and then prints
- it.
-
- awk 'BEGIN {
- "date" | getline current_time
- close("date")
- print "Report printed on " current_time
- }'
-
- In this version of `getline', none of the built--in variables
- are changed, and the record is not split into fields.
-
-
-
-File: gawk-info, Node: Close Input, Up: Getline
-
-Closing Input Files
--------------------
-
-If the same file name or the same shell command is used with
-`getline' more than once during the execution of the `awk' program,
-the file is opened (or the command is executed) only the first time.
-At that time, the first record of input is read from that file or
-command. The next time the same file or command is used in
-`getline', another record is read from it, and so on.
-
-What this implies is that if you want to start reading the same file
-again from the beginning, or if you want to rerun a shell command
-(rather that reading more output from the command), you must take
-special steps. What you can do is use the `close' statement:
-
- close (FILENAME)
-
-This statement closes a file or pipe, represented here by FILENAME.
-The string value of FILENAME must be the same value as the string
-used to open the file or pipe to begin with.
-
-Once this statement is executed, the next `getline' from that file or
-command will reopen the file or rerun the command.
-
-
-
-File: gawk-info, Node: Printing, Next: One-liners, Prev: Reading Files, Up: Top
-
-Printing Output
-***************
-
-One of the most common things that actions do is to output or "print"
-some or all of the input. For simple output, use the `print'
-statement. For fancier formatting use the `printf' statement. Both
-are described in this chapter.
-
-* Menu:
-
-* Print:: The `print' statement.
-* Print Examples:: Simple examples of `print' statements.
-* Output Separators:: The output separators and how to change them.
-
-* Redirection:: How to redirect output to multiple files and pipes.
-* Close Output:: How to close output files and pipes.
-
-* Printf:: The `printf' statement.
-
-
-
-File: gawk-info, Node: Print, Next: Print Examples, Up: Printing
-
-The `print' Statement
-=====================
-
-The `print' statement does output with simple, standardized
-formatting. You specify only the strings or numbers to be printed,
-in a list separated by commas. They are output, separated by single
-spaces, followed by a newline. The statement looks like this:
-
- print ITEM1, ITEM2, ...
-
- The entire list of items may optionally be enclosed in parentheses.
-The parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a
-redirection (*note Redirection::.). The relational operators are
-`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison
-Ops::.).
-
-The items printed can be constant strings or numbers, fields of the
-current record (such as `$1'), variables, or any `awk' expressions.
-The `print' statement is completely general for computing *what*
-values to print. With one exception (*note Output Separators::.),
-what you can't do is specify *how* to print them--how many columns to
-use, whether to use exponential notation or not, and so on. For
-that, you need the `printf' statement (*note Printf::.).
-
-To print a fixed piece of text, write a string constant as one item,
-such as `"Hello there"'. If you forget to use the double--quote
-characters, your text will be taken as an `awk' expression, and you
-will probably get an error. Keep in mind that a space will be
-printed between any two items.
-
-The simple statement `print' with no items is equivalent to `print
-$0': it prints the entire current record. To print a blank line, use
-`print ""', where `""' is the null, or empty, string.
-
-Most often, each `print' statement makes one line of output. But it
-isn't limited to one line. If an item value is a string that
-contains a newline, the newline is output along with the rest of the
-string. A single `print' can make any number of lines this way.
-
-
-
-File: gawk-info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing
-
-Examples of `print' Statements
-==============================
-
-Here is an example that prints the first two fields of each input
-record, with a space between them:
-
- awk '{ print $1, $2 }' inventory-shipped
-
-Its output looks like this:
-
- Jan 13
- Feb 15
- Mar 15
- ...
-
- A common mistake in using the `print' statement is to omit the comma
-between two items. This often has the effect of making the items run
-together in the output, with no space. The reason for this is that
-juxtaposing two string expressions in `awk' means to concatenate
-them. For example, without the comma:
-
- awk '{ print $1 $2 }' inventory-shipped
-
-prints:
-
- Jan13
- Feb15
- Mar15
- ...
-
- Neither example's output makes much sense to someone unfamiliar with
-the file `inventory-shipped'. A heading line at the beginning would
-make it clearer. Let's add some headings to our table of months
-(`$1') and green crates shipped (`$2'). We do this using the BEGIN
-pattern (*note BEGIN/END::.) to cause the headings to be printed only
-once:
-
- awk 'BEGIN { print "Month Crates"
- print "---- -----" }
- { print $1, $2 }' inventory-shipped
-
-Did you already guess what will happen? This program prints the
-following:
-
- Month Crates
- ---- -----
- Jan 13
- Feb 15
- Mar 15
- ...
-
- The headings and the table data don't line up! We can fix this by
-printing some spaces between the two fields:
-
- awk 'BEGIN { print "Month Crates"
- print "---- -----" }
- { print $1, " ", $2 }' inventory-shipped
-
-You can imagine that this way of lining up columns can get pretty
-complicated when you have many columns to fix. Counting spaces for
-two or three columns can be simple, but more than this and you can
-get ``lost'' quite easily. This is why the `printf' statement was
-created (*note Printf::.); one of its specialties is lining up
-columns of data.
-
-
-
-File: gawk-info, Node: Output Separators, Next: Redirection, Prev: Print Examples, Up: Printing
-
-Output Separators
-=================
-
-As mentioned previously, a `print' statement contains a list of
-items, separated by commas. In the output, the items are normally
-separated by single spaces. But they do not have to be spaces; a
-single space is only the default. You can specify any string of
-characters to use as the "output field separator", by setting the
-special variable `OFS'. The initial value of this variable is the
-string `" "'.
-
-The output from an entire `print' statement is called an "output
-record". Each `print' statement outputs one output record and then
-outputs a string called the "output record separator". The special
-variable `ORS' specifies this string. The initial value of the
-variable is the string `"\n"' containing a newline character; thus,
-normally each `print' statement makes a separate line.
-
-You can change how output fields and records are separated by
-assigning new values to the variables `OFS' and/or `ORS'. The usual
-place to do this is in the `BEGIN' rule (*note BEGIN/END::.), so that
-it happens before any input is processed. You may also do this with
-assignments on the command line, before the names of your input files.
-
-The following example prints the first and second fields of each
-input record separated by a semicolon, with a blank line added after
-each line:
-
- awk 'BEGIN { OFS = ";"; ORS = "\n\n" }
- { print $1, $2 }' BBS-list
-
-If the value of `ORS' does not contain a newline, all your output
-will be run together on a single line, unless you output newlines
-some other way.
-
-
-
-File: gawk-info, Node: Redirection, Next: Printf, Prev: Output Separators, Up: Printing
-
-Redirecting Output of `print' and `printf'
-==========================================
-
-So far we have been dealing only with output that prints to the
-standard output, usually your terminal. Both `print' and `printf'
-can be told to send their output to other places. This is called
-"redirection".
-
-A redirection appears after the `print' or `printf' statement.
-Redirections in `awk' are written just like redirections in shell
-commands, except that they are written inside the `awk' program.
-
-Here are the three forms of output redirection. They are all shown
-for the `print' statement, but they work for `printf' also.
-
-`print ITEMS > OUTPUT-FILE'
- This type of redirection prints the items onto the output file
- OUTPUT-FILE. The file name OUTPUT-FILE can be any expression.
- Its value is changed to a string and then used as a filename
- (*note Expressions::.).
-
- When this type of redirection is used, the OUTPUT-FILE is erased
- before the first output is written to it. Subsequent writes do
- not erase OUTPUT-FILE, but append to it. If OUTPUT-FILE does
- not exist, then it is created.
-
- For example, here is how one `awk' program can write a list of
- BBS names to a file `name-list' and a list of phone numbers to a
- file `phone-list'. Each output file contains one name or number
- per line.
-
- awk '{ print $2 > "phone-list"
- print $1 > "name-list" }' BBS-list
-
-`print ITEMS >> OUTPUT-FILE'
- This type of redirection prints the items onto the output file
- OUTPUT-FILE. The difference between this and the single--`>'
- redirection is that the old contents (if any) of OUTPUT-FILE are
- not erased. Instead, the `awk' output is appended to the file.
-
-`print ITEMS | COMMAND'
- It is also possible to send output through a "pipe" instead of
- into a file. This type of redirection opens a pipe to COMMAND
- and writes the values of ITEMS through this pipe, to another
- process created to execute COMMAND.
-
- The redirection argument COMMAND is actually an `awk'
- expression. Its value is converted to a string, whose contents
- give the shell command to be run.
-
- For example, this produces two files, one unsorted list of BBS
- names and one list sorted in reverse alphabetical order:
-
- awk '{ print $1 > "names.unsorted"
- print $1 | "sort -r > names.sorted" }' BBS-list
-
- Here the unsorted list is written with an ordinary redirection
- while the sorted list is written by piping through the `sort'
- utility.
-
- Here is an example that uses redirection to mail a message to a
- mailing list `bug-system'. This might be useful when trouble is
- encountered in an `awk' script run periodically for system
- maintenance.
-
- print "Awk script failed:", $0 | "mail bug-system"
- print "processing record number", FNR, "of", FILENAME | "mail bug-system"
- close ("mail bug-system")
-
- We use a `close' statement here because it's a good idea to
- close the pipe as soon as all the intended output has been sent
- to it. *Note Close Output::, for more information on this.
-
-Redirecting output using `>', `>>', or `|' asks the system to open a
-file or pipe only if the particular FILE or COMMAND you've specified
-has not already been written to by your program.
-
-
-
-File: gawk-info, Node: Close Output, Up: Redirection
-
-Closing Output Files and Pipes
-------------------------------
-
-When a file or pipe is opened, the filename or command associated
-with it is remembered by `awk' and subsequent writes to the same file
-or command are appended to the previous writes. The file or pipe
-stays open until `awk' exits. This is usually convenient.
-
-Sometimes there is a reason to close an output file or pipe earlier
-than that. To do this, use the `close' command, as follows:
-
- close (FILENAME)
-
-or
-
- close (COMMAND)
-
-The argument FILENAME or COMMAND can be any expression. Its value
-must exactly equal the string used to open the file or pipe to begin
-with--for example, if you open a pipe with this:
-
- print $1 | "sort -r > names.sorted"
-
-then you must close it with this:
-
- close ("sort -r > names.sorted")
-
-Here are some reasons why you might need to close an output file:
-
- * To write a file and read it back later on in the same `awk'
- program. Close the file when you are finished writing it; then
- you can start reading it with `getline' (*note Getline::.).
-
- * To write numerous files, successively, in the same `awk'
- program. If you don't close the files, eventually you will
- exceed the system limit on the number of open files in one
- process. So close each one when you are finished writing it.
-
- * To make a command finish. When you redirect output through a
- pipe, the command reading the pipe normally continues to try to
- read input as long as the pipe is open. Often this means the
- command cannot really do its work until the pipe is closed. For
- example, if you redirect output to the `mail' program, the
- message will not actually be sent until the pipe is closed.
-
- * To run the same subprogram a second time, with the same arguments.
- This is not the same thing as giving more input to the first run!
-
- For example, suppose you pipe output to the `mail' program. If
- you output several lines redirected to this pipe without closing
- it, they make a single message of several lines. By contrast,
- if you close the pipe after each line of output, then each line
- makes a separate message.
-
-
-
-File: gawk-info, Node: Printf, Prev: Redirection, Up: Printing
-
-Using `printf' Statements For Fancier Printing
-==============================================
-
-If you want more precise control over the output format than `print'
-gives you, use `printf'. With `printf' you can specify the width to
-use for each item, and you can specify various stylistic choices for
-numbers (such as what radix to use, whether to print an exponent,
-whether to print a sign, and how many digits to print after the
-decimal point). You do this by specifying a "format string".
-
-* Menu:
-
-* Basic Printf:: Syntax of the `printf' statement.
-* Format-Control:: Format-control letters.
-* Modifiers:: Format--specification modifiers.
-* Printf Examples:: Several examples.
-
-
-
-File: gawk-info, Node: Basic Printf, Next: Format-Control, Up: Printf
-
-Introduction to the `printf' Statement
---------------------------------------
-
-The `printf' statement looks like this:
-
- printf FORMAT, ITEM1, ITEM2, ...
-
- The entire list of items may optionally be enclosed in parentheses.
-The parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a
-redirection (*note Redirection::.). The relational operators are
-`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison
-Ops::.).
-
-The difference between `printf' and `print' is the argument FORMAT.
-This is an expression whose value is taken as a string; its job is to
-say how to output each of the other arguments. It is called the
-"format string".
-
-The format string is essentially the same as in the C library
-function `printf'. Most of FORMAT is text to be output verbatim.
-Scattered among this text are "format specifiers", one per item.
-Each format specifier says to output the next item at that place in
-the format.
-
-The `printf' statement does not automatically append a newline to its
-output. It outputs nothing but what the format specifies. So if you
-want a newline, you must include one in the format. The output
-separator variables `OFS' and `ORS' have no effect on `printf'
-statements.
-
-
-
-File: gawk-info, Node: Format-Control, Next: Modifiers, Prev: Basic Printf, Up: Printf
-
-Format--Control Characters
---------------------------
-
-A format specifier starts with the character `%' and ends with a
-"format--control letter"; it tells the `printf' statement how to
-output one item. (If you actually want to output a `%', write `%%'.)
-The format--control letter specifies what kind of value to print.
-The rest of the format specifier is made up of optional "modifiers"
-which are parameters such as the field width to use.
-
-Here is a list of them:
-
-`c'
- This prints a number as an ASCII character. Thus, `printf "%c",
- 65' outputs the letter `A'. The output for a string value is
- the first character of the string.
-
-`d'
- This prints a decimal integer.
-
-`e'
- This prints a number in scientific (exponential) notation. For
- example,
-
- printf "%4.3e", 1950
-
- prints `1.950e+03', with a total of 4 significant figures of
- which 3 follow the decimal point. The `4.3' are "modifiers",
- discussed below.
-
-`f'
- This prints a number in floating point notation.
-
-`g'
- This prints either scientific notation or floating point
- notation, whichever is shorter.
-
-`o'
- This prints an unsigned octal integer.
-
-`s'
- This prints a string.
-
-`x'
- This prints an unsigned hexadecimal integer.
-
-`%'
- This isn't really a format--control letter, but it does have a
- meaning when used after a `%': the sequence `%%' outputs one
- `%'. It does not consume an argument.
-
-
-
-File: gawk-info, Node: Modifiers, Next: Printf Examples, Prev: Format-Control, Up: Printf
-
-Modifiers for `printf' Formats
-------------------------------
-
-A format specification can also include "modifiers" that can control
-how much of the item's value is printed and how much space it gets.
-The modifiers come between the `%' and the format--control letter.
-Here are the possible modifiers, in the order in which they may appear:
-
-`-'
- The minus sign, used before the width modifier, says to
- left--justify the argument within its specified width. Normally
- the argument is printed right--justified in the specified width.
-
-`WIDTH'
- This is a number representing the desired width of a field.
- Inserting any number between the `%' sign and the format control
- character forces the field to be expanded to this width. The
- default way to do this is to pad with spaces on the left.
-
-`.PREC'
- This is a number that specifies the precision to use when
- printing. This specifies the number of digits you want printed
- to the right of the decimal place.
-
-The C library `printf''s dynamic WIDTH and PREC capability (for
-example, `"%*.*s"') is not supported. However, it can be easily
-simulated using concatenation to dynamically build the format string.
-
-
-
-File: gawk-info, Node: Printf Examples, Prev: Modifiers, Up: Printf
-
-Examples of Using `printf'
---------------------------
-
-Here is how to use `printf' to make an aligned table:
-
- awk '{ printf "%-10s %s\n", $1, $2 }' BBS-list
-
-prints the names of bulletin boards (`$1') of the file `BBS-list' as
-a string of 10 characters, left justified. It also prints the phone
-numbers (`$2') afterward on the line. This will produce an aligned
-two--column table of names and phone numbers, like so:
-
- aardvark 555-5553
- alpo-net 555-3412
- barfly 555-7685
- bites 555-1675
- camelot 555-0542
- core 555-2912
- fooey 555-1234
- foot 555-6699
- macfoo 555-6480
- sdace 555-3430
- sabafoo 555-2127
-
-Did you notice that we did not specify that the phone numbers be
-printed as numbers? They had to be printed as strings because the
-numbers are separated by a dash. This dash would be interpreted as a
-"minus" sign if we had tried to print the phone numbers as numbers.
-This would have led to some pretty confusing results.
-
-We did not specify a width for the phone numbers because they are the
-last things on their lines. We don't need to put spaces after them.
-
-We could make our table look even nicer by adding headings to the
-tops of the columns. To do this, use the BEGIN pattern (*note
-BEGIN/END::.) to cause the header to be printed only once, at the
-beginning of the `awk' program:
-
- awk 'BEGIN { print "Name Number"
- print "--- -----" }
- { printf "%-10s %s\n", $1, $2 }' BBS-list
-
-Did you notice that we mixed `print' and `printf' statements in the
-above example? We could have used just `printf' statements to get
-the same results:
-
- awk 'BEGIN { printf "%-10s %s\n", "Name", "Number"
- printf "%-10s %s\n", "---", "-----" }
- { printf "%-10s %s\n", $1, $2 }' BBS-list
-
-By outputting each column heading with the same format specification
-used for the elements of the column, we have made sure that the
-headings will be aligned just like the columns.
-
-The fact that the same format specification is used can be emphasized
-by storing it in a variable, like so:
-
- awk 'BEGIN { format = "%-10s %s\n"
- printf format, "Name", "Number"
- printf format, "---", "-----" }
- { printf format, $1, $2 }' BBS-list
-
-See if you can use the `printf' statement to line up the headings and
-table data for our `inventory-shipped' example covered earlier in the
-section on the `print' statement (*note Print::.).
-
-
-
-File: gawk-info, Node: One-liners, Next: Patterns, Prev: Printing, Up: Top
-
-Useful ``One-liners''
-*********************
-
-Useful `awk' programs are often short, just a line or two. Here is a
-collection of useful, short programs to get you started. Some of
-these programs contain constructs that haven't been covered yet. The
-description of the program will give you a good idea of what is going
-on, but please read the rest of the manual to become an `awk' expert!
-
-`awk '{ num_fields = num_fields + NF }'
-`` END { print num_fields }'''
- This program prints the total number of fields in all input lines.
-
-`awk 'length($0) > 80''
- This program prints every line longer than 80 characters. The
- sole rule has a relational expression as its pattern, and has no
- action (so the default action, printing the record, is used).
-
-`awk 'NF > 0''
- This program prints every line that has at least one field.
- This is an easy way to delete blank lines from a file (or
- rather, to create a new file similar to the old file but from
- which the blank lines have been deleted).
-
-`awk '{ if (NF > 0) print }''
- This program also prints every line that has at least one field.
- Here we allow the rule to match every line, then decide in the
- action whether to print.
-
-`awk 'BEGIN { for (i = 1; i <= 7; i++)'
-`` print int(101 * rand()) }'''
- This program prints 7 random numbers from 0 to 100, inclusive.
-
-`ls -l FILES | awk '{ x += $4 } ; END { print "total bytes: " x }''
- This program prints the total number of bytes used by FILES.
-
-`expand FILE | awk '{ if (x < length()) x = length() }'
-`` END { print "maximum line length is " x }'''
- This program prints the maximum line length of FILE. The input
- is piped through the `expand' program to change tabs into
- spaces, so the widths compared are actually the right--margin
- columns.
-
-
-
-File: gawk-info, Node: Patterns, Next: Actions, Prev: One-liners, Up: Top
-
-Patterns
-********
-
-Patterns control the execution of rules: a rule is executed when its
-pattern matches the input record. The `awk' language provides
-several special patterns that are described in the sections that
-follow. Patterns include:
-
-NULL
- The empty pattern, which matches every input record. (*Note The
- Empty Pattern: Empty.)
-
-/REGULAR EXPRESSION/
- A regular expression as a pattern. It matches when the text of
- the input record fits the regular expression. (*Note Regular
- Expressions as Patterns: Regexp.)
-
-CONDEXP
- A single comparison expression. It matches when it is true.
- (*Note Comparison Expressions as Patterns: Comparison Patterns.)
-
-`BEGIN'
-`END'
- Special patterns to supply start--up or clean--up information to
- `awk'. (*Note Specifying Record Ranges With Patterns: BEGIN/END.)
-
-PAT1, PAT2
- A pair of patterns separated by a comma, specifying a range of
- records. (*Note Specifying Record Ranges With Patterns: Ranges.)
-
-CONDEXP1 BOOLEAN CONDEXP2
- A "compound" pattern, which combines expressions with the
- operators `and', `&&', and `or', `||'. (*Note Boolean
- Operators and Patterns: Boolean.)
-
-! CONDEXP
- The pattern CONDEXP is evaluated. Then the `!' performs a
- boolean ``not'' or logical negation operation; if the input line
- matches the pattern in CONDEXP then the associated action is
- *not* executed. If the input line did not match that pattern,
- then the action *is* executed. (*Note Boolean Operators and
- Patterns: Boolean.)
-
-(EXPR)
- Parentheses may be used to control how operators nest.
-
-PAT1 ? PAT2 : PAT3
- The first pattern is evaluated. If it is true, the input line
- is tested against the second pattern, otherwise it is tested
- against the third. (*Note Conditional Patterns: Conditional
- Patterns.)
-
-* Menu:
-
-The following subsections describe these forms in detail:
-
-* Empty:: The empty pattern, which matches every record.
-
-* Regexp:: Regular expressions such as `/foo/'.
-
-* Comparison Patterns:: Comparison expressions such as `$1 > 10'.
-
-* Boolean:: Combining comparison expressions.
-
-* Ranges:: Using pairs of patterns to specify record ranges.
-
-* BEGIN/END:: Specifying initialization and cleanup rules.
-
-* Conditional Patterns:: Patterns such as `pat1 ? pat2 : pat3'.
-
-
-
-File: gawk-info, Node: Empty, Next: Regexp, Up: Patterns
-
-The Empty Pattern
-=================
-
-An empty pattern is considered to match *every* input record. For
-example, the program:
-
- awk '{ print $1 }' BBS-list
-
-prints just the first field of every record.
-
-
-
-File: gawk-info, Node: Regexp, Next: Comparison Patterns, Prev: Empty, Up: Patterns
-
-Regular Expressions as Patterns
-===============================
-
-A "regular expression", or "regexp", is a way of describing classes
-of strings. When enclosed in slashes (`/'), it makes an `awk'
-pattern that matches every input record that contains a match for the
-regexp.
-
-The simplest regular expression is a sequence of letters, numbers, or
-both. Such a regexp matches any string that contains that sequence.
-Thus, the regexp `foo' matches any string containing `foo'. (More
-complicated regexps let you specify classes of similar strings.)
-
-* Menu:
-
-* Usage: Regexp Usage. How regexps are used in patterns.
-* Operators: Regexp Operators. How to write a regexp.
-
-
-
-File: gawk-info, Node: Regexp Usage, Next: Regexp Operators, Up: Regexp
-
-How to use Regular Expressions
-------------------------------
-
-When you enclose `foo' in slashes, you get a pattern that matches a
-record that contains `foo'. For example, this prints the second
-field of each record that contains `foo' anywhere:
-
- awk '/foo/ { print $2 }' BBS-list
-
-Regular expressions can also be used in comparison expressions. Then
-you can specify the string to match against; it need not be the
-entire current input record. These comparison expressions can be
-used as patterns or in `if' and `while' statements.
-
-`EXP ~ /REGEXP/'
- This is true if the expression EXP (taken as a character string)
- is matched by REGEXP. The following example matches, or
- selects, all input records with the letter `J' in the first field:
-
- awk '$1 ~ /J/' inventory-shipped
-
- So does this:
-
- awk '{ if ($1 ~ /J/) print }' inventory-shipped
-
-`EXP !~ /REGEXP/'
- This is true if the expression EXP (taken as a character string)
- is *not* matched by REGEXP. The following example matches, or
- selects, all input records whose first field *does not* contain
- the letter `J':
-
- awk '$1 !~ /J/' inventory-shipped
-
-The right hand side of a `~' or `!~' operator need not be a constant
-regexp (i.e. a string of characters between `/'s). It can also be
-"computed", or "dynamic". For example:
-
- identifier = "[A-Za-z_][A-Za-z_0-9]+"
- $0 ~ identifier
-
-sets `identifier' to a regexp that describes `awk' variable names,
-and tests if the input record matches this regexp.
-
-A dynamic regexp may actually be any expression. The expression is
-evaluated, and the result is treated as a string that describes a
-regular expression.
-
-
-
-File: gawk-info, Node: Regexp Operators, Prev: Regexp Usage, Up: Regexp
-
-Regular Expression Operators
-----------------------------
-
-You can combine regular expressions with the following characters,
-called "regular expression operators", or "metacharacters", to
-increase the power and versatility of regular expressions. This is a
-table of metacharacters:
-
-`\'
- This is used to suppress the special meaning of a character when
- matching. For example:
-
- \$
-
- matches the character `$'.
-
-`^'
- This matches the beginning of the string or the beginning of a
- line within the string. For example:
-
- ^@chapter
-
- matches the `@chapter' at the beginning of a string, and can be
- used to identify chapter beginnings in Texinfo source files.
-
-`$'
- This is similar to `^', but it matches only at the end of a
- string or the end of a line within the string. For example:
-
- /p$/
-
- as a pattern matches a record that ends with a `p'.
-
-`.'
- This matches any single character except a newline. For example:
-
- .P
-
- matches any single character followed by a `P' in a string.
- Using concatenation we can make regular expressions like `U.A',
- which matches any three--character string that begins with `U'
- and ends with `A'.
-
-`[...]'
- This is called a "character set". It matches any one of a group
- of characters that are enclosed in the square brackets. For
- example:
-
- [MVX]
-
- matches any of the characters `M', `V', or `X' in a string.
-
- Ranges of characters are indicated by using a hyphen between the
- beginning and ending characters, and enclosing the whole thing
- in brackets. For example:
-
- [0-9]
-
- matches any string that contains a digit.
-
- Note that special patterns have to be followed to match the
- characters, `]', `-', and `^' when they are enclosed in the
- square brackets. To match a `]', make it the first character in
- the set. For example:
-
- []d]
-
- matches either `]', or `d'.
-
- To match `-', write it as `--', which is a range containing only
- `-'. You may also make the `-' be the first or last character
- in the set. To match `^', make it any character except the
- first one of a set.
-
-`[^ ...]'
- This is the "complemented character set". The first character
- after the `[' *must* be a `^'. This matches any characters
- *except* those in the square brackets. For example:
-
- [^0-9]
-
- matches any characters that are not digits.
-
-`|'
- This is the "alternation operator" and it is used to specify
- alternatives. For example:
-
- ^P|[0-9]
-
- matches any string that matches either `^P' or `[0-9]'. This
- means it matches any string that contains a digit or starts with
- `P'.
-
-`(...)'
- Parentheses are used for grouping in regular expressions as in
- arithmetic. They can be used to concatenate regular expressions
- containing the alternation operator, `|'.
-
-`*'
- This symbol means that the preceding regular expression is to be
- repeated as many times as possible to find a match. For example:
-
- ph*
-
- applies the `*' symbol to the preceding `h' and looks for
- matches to one `p' followed by any number of `h''s. This will
- also match just `p' if no `h''s are present.
-
- The `*' means repeat the *smallest* possible preceding
- expression in order to find a match. The `awk' language
- processes a `*' by matching as many repetitions as can be found.
- For example:
-
- awk '/\(c[ad][ad]*r x\)/ { print }' sample
-
- matches every record in the input containing a string of the
- form `(car x)', `(cdr x)', `(cadr x)', and so on.
-
-`+'
- This symbol is similar to `*', but the preceding expression must
- be matched at least once. This means that:
-
- wh+y
-
- would match `why' and `whhy' but not `wy', whereas `wh*y' would
- match all three of these strings. And this is a simpler way of
- writing the last `*' example:
-
- awk '/\(c[ad]+r x\)/ { print }' sample
-
-`?'
- This symbol is similar to `*', but the preceding expression can
- be matched once or not at all. For example:
-
- fe?d
-
- will match `fed' or `fd', but nothing else.
-
-In regular expressions, the `*', `+', and `?' operators have the
-highest precedence, followed by concatenation, and finally by `|'.
-As in arithmetic, parentheses can change how operators are grouped.
-
-Any other character stands for itself. However, it is important to
-note that case in regular expressions *is* significant, both when
-matching ordinary (i.e. non--metacharacter) characters, and inside
-character sets. Thus a `w' in a regular expression matches only a
-lower case `w' and not either an uppercase or lowercase `w'. When
-you want to do a case--independent match, you have to use a character
-set: `[Ww]'.
-
-
-
-File: gawk-info, Node: Comparison Patterns, Next: Ranges, Prev: Regexp, Up: Patterns
-
-Comparison Expressions as Patterns
-==================================
-
-"Comparison patterns" use "relational operators" to compare strings
-or numbers. The relational operators are the same as in C. Here is
-a table of them:
-
-`X < Y'
- True if X is less than Y.
-
-`X <= Y'
- True if X is less than or equal to Y.
-
-`X > Y'
- True if X is greater than Y.
-
-`X >= Y'
- True if X is greater than or equal to Y.
-
-`X == Y'
- True if X is equal to Y.
-
-`X != Y'
- True if X is not equal to Y.
-
-Comparison expressions can be used as patterns to control whether a
-rule is executed. The expression is evaluated for each input record
-read, and the pattern is considered matched if the condition is "true".
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (*note Conversion::.). Strings are compared by comparing the
-first character of each, then the second character of each, and so on.
-Thus, `"10"' is less than `"9"'.
-
-The following example prints the second field of each input record
-whose first field is precisely `foo'.
-
- awk '$1 == "foo" { print $2 }' BBS-list
-
-Contrast this with the following regular expression match, which
-would accept any record with a first field that contains `foo':
-
- awk '$1 ~ "foo" { print $2 }' BBS-list
-
-
-
-File: gawk-info, Node: Ranges, Next: BEGIN/END, Prev: Comparison Patterns, Up: Patterns
-
-Specifying Record Ranges With Patterns
-======================================
-
-A "range pattern" is made of two patterns separated by a comma:
-`BEGPAT, ENDPAT'. It matches ranges of consecutive input records.
-The first pattern BEGPAT controls where the range begins, and the
-second one ENDPAT controls where it ends.
-
-They work as follows: BEGPAT is matched against every input record;
-when a record matches BEGPAT, the range pattern becomes "turned on".
-The range pattern matches this record. As long as it stays turned
-on, it automatically matches every input record read. But meanwhile,
-ENDPAT is matched against every input record, and when it matches,
-the range pattern is turned off again for the following record. Now
-we go back to checking BEGPAT against each record. For example:
-
- awk '$1 == "on", $1 == "off"'
-
-prints every record between on/off pairs, inclusive.
-
-The record that turns on the range pattern and the one that turns it
-off both match the range pattern. If you don't want to operate on
-these records, you can write `if' statements in the rule's action to
-distinguish them.
-
-It is possible for a pattern to be turned both on and off by the same
-record, if both conditions are satisfied by that record. Then the
-action is executed for just that record.
-
-
-
-File: gawk-info, Node: BEGIN/END, Next: Boolean, Prev: Ranges, Up: Patterns
-
-`BEGIN' and `END' Special Patterns
-==================================
-
-`BEGIN' and `END' are special patterns. They are not used to match
-input records. Rather, they are used for supplying start--up or
-clean--up information to your `awk' script. A `BEGIN' rule is
-executed, once, before the first input record has been read. An
-`END' rule is executed, once, after all the input has been read. For
-example:
-
- awk 'BEGIN { print "Analysis of ``foo'' program" }
- /foo/ { ++foobar }
- END { print "``foo'' appears " foobar " times." }' BBS-list
-
-This program finds out how many times the string `foo' appears in the
-input file `BBS-list'. The `BEGIN' pattern prints out a title for
-the report. There is no need to use the `BEGIN' pattern to
-initialize the counter `foobar' to zero, as `awk' does this for us
-automatically (*note Variables::.). The second rule increments the
-variable `foobar' every time a record containing the pattern `foo' is
-read. The last rule prints out the value of `foobar' at the end of
-the run.
-
-The special patterns `BEGIN' and `END' do not combine with other
-kinds of patterns.
-
-An `awk' program may have multiple `BEGIN' and/or `END' rules. The
-contents of multiple `BEGIN' or `END' rules are treated as if they
-had been enclosed in a single rule, in the order that the rules are
-encountered in the `awk' program. (This feature was introduced with
-the new version of `awk'.)
-
-Multiple `BEGIN' and `END' sections are also useful for writing
-library functions that need to do initialization and/or cleanup of
-their own. Note that the order in which library functions are named
-on the command line will affect the order in which their `BEGIN' and
-`END' rules will be executed. Therefore you have to be careful how
-you write your library functions. (*Note Command Line::, for more
-information on using library functions.)
-
-If an `awk' program only has a `BEGIN' rule, and no other rules, then
-the program will exit after the `BEGIN' rule has been run. Older
-versions of `awk' used to read their input until end of file was
-seen. However, if an `END' rule exists as well, then the input will
-be read, even if there are no other rules in the program.
-
-`BEGIN' and `END' rules must have actions; there is no default action
-for these rules since there is no current record when they run.
-
-
-
-File: gawk-info, Node: Boolean, Next: Conditional Patterns, Prev: BEGIN/END, Up: Patterns
-
-Boolean Operators and Patterns
-==============================
-
-A boolean pattern is a combination of other patterns using the
-boolean operators ``or'' (`||'), ``and'' (`&&'), and ``not'' (`!'),
-along with parentheses to control nesting. Whether the boolean
-pattern matches an input record is computed from whether its
-subpatterns match.
-
-The subpatterns of a boolean pattern can be regular expressions,
-matching expressions, comparisons, or other boolean combinations of
-such. Range patterns cannot appear inside boolean operators, since
-they don't make sense for classifying a single record, and neither
-can the special patterns `BEGIN' and `END', which never match any
-input record.
-
-Here are descriptions of the three boolean operators.
-
-`PAT1 && PAT2'
- Matches if both PAT1 and PAT2 match by themselves. For example,
- the following command prints all records in the input file
- `BBS-list' that contain both `2400' and `foo'.
-
- awk '/2400/ && /foo/' BBS-list
-
- Whether PAT2 matches is tested only if PAT1 succeeds. This can
- make a difference when PAT2 contains expressions that have side
- effects: in the case of `/foo/ && ($2 == bar++)', the variable
- `bar' is not incremented if there is no `foo' in the record.
-
-`PAT1 || PAT2'
- Matches if at least one of PAT1 and PAT2 matches the current
- input record. For example, the following command prints all
- records in the input file `BBS-list' that contain *either*
- `2400' or `foo', or both.
-
- awk '/2400/ || /foo/' BBS-list
-
- Whether PAT2 matches is tested only if PAT1 fails to match.
- This can make a difference when PAT2 contains expressions that
- have side effects.
-
-`!PAT'
- Matches if PAT does not match. For example, the following
- command prints all records in the input file `BBS-list' that do
- *not* contain the string `foo'.
-
- awk '! /foo/' BBS-list
-
-Note that boolean patterns are built from other patterns just as
-boolean expressions are built from other expressions (*note Boolean
-Ops::.). Any boolean expression is also a valid boolean pattern.
-But the converse is not true: simple regular expression patterns such
-as `/foo/' are not allowed in boolean expressions. Regular
-expressions can appear in boolean expressions only in conjunction
-with the matching operators, `~' and `!~'.
-
-
-
-File: gawk-info, Node: Conditional Patterns, Prev: Boolean, Up: Patterns
-
-Conditional Patterns
-====================
-
-Patterns may use a "conditional expression" much like the conditional
-expression of the C language. This takes the form:
-
- PAT1 ? PAT2 : PAT3
-
-The first pattern is evaluated. If it evaluates to TRUE, then the
-input record is tested against PAT2. Otherwise it is tested against
-PAT3. The conditional pattern matches if PAT2 or PAT3 (whichever one
-is selected) matches.
-
-
-
-File: gawk-info, Node: Actions, Next: Expressions, Prev: Patterns, Up: Top
-
-Actions: The Basics
-*******************
-
-The "action" part of an `awk' rule tells `awk' what to do once a
-match for the pattern is found. An action consists of one or more
-`awk' "statements", enclosed in curly braces (`{' and `}'). The
-curly braces must be used even if the action contains only one
-statement, or even if it contains no statements at all. Action
-statements are separated by newlines or semicolons.
-
-Besides the print statements already covered (*note Printing::.),
-there are four kinds of action statements: expressions, control
-statements, compound statements, and function definitions.
-
- * "Expressions" include assignments, arithmetic, function calls,
- and more (*note Expressions::.).
-
- * "Control statements" specify the control flow of `awk' programs.
- The `awk' language gives you C--like constructs (`if', `for',
- `while', and so on) as well as a few special ones (*note
- Statements::.).
-
- * A "compound statement" is just one or more `awk' statements
- enclosed in curly braces. This way you can group several
- statements to form the body of an `if' or similar statement.
-
- * You can define "user--defined functions" for use elsewhere in
- the `awk' program (*note User-defined::.).
-
-
-
-File: gawk-info, Node: Expressions, Next: Statements, Prev: Actions, Up: Top
-
-Actions: Expressions
-********************
-
-Expressions are the basic building block of `awk' actions. An
-expression evaluates to a value, which you can print, test, store in
-a variable or pass to a function.
-
-But, beyond that, an expression can assign a new value to a variable
-or a field, with an assignment operator.
-
-An expression can serve as a statement on its own. Most other action
-statements are made up of various combinations of expressions. As in
-other languages, expressions in `awk' include variables, array
-references, constants, and function calls, as well as combinations of
-these with various operators.
-
-* Menu:
-
-* Constants:: String and numeric constants.
-* Variables:: Variables give names to values for future use.
-* Fields:: Field references such as `$1' are also expressions.
-* Arrays:: Array element references are expressions.
-
-* Arithmetic Ops:: Arithmetic operations (`+', `-', etc.)
-* Concatenation:: Concatenating strings.
-* Comparison Ops:: Comparison of numbers and strings with `<', etc.
-* Boolean Ops:: Combining comparison expressions using boolean operators
- `||' (``or''), `&&' (``and'') and `!' (``not'').
-
-* Assignment Ops:: Changing the value of a variable or a field.
-* Increment Ops:: Incrementing the numeric value of a variable.
-
-* Conversion:: The conversion of strings to numbers and vice versa.
-* Conditional Exp:: Conditional expressions select between two subexpressions
- under control of a third subexpression.
-* Function Calls:: A function call is an expression.
-
-
-
-File: gawk-info, Node: Constants, Next: Variables, Up: Expressions
-
-Constant Expressions
-====================
-
-There are two types of constants: numeric constants and string
-constants.
-
-The "numeric constant" is a number. This number can be an integer, a
-decimal fraction, or a number in scientific (exponential) notation.
-Note that all numeric values are represented within `awk' in
-double--precision floating point. Here are some examples of numeric
-constants, which all have the same value:
-
- 105
- 1.05e+2
- 1050e-1
-
-A string constant consists of a sequence of characters enclosed in
-double--quote marks. For example:
-
- "parrot"
-
-represents the string constant `parrot'. Strings in `gawk' can be of
-any length and they can contain all the possible 8--bit ASCII
-characters including ASCII NUL. Other `awk' implementations may have
-difficulty with some character codes.
-
-Some characters cannot be included literally in a string. You
-represent them instead with "escape sequences", which are character
-sequences beginning with a backslash (`\').
-
-One use of the backslash is to include double--quote characters in a
-string. Since a plain double--quote would end the string, you must
-use `\"'. Backslash itself is another character that can't be
-included normally; you write `\\' to put one backslash in the string.
-
-Another use of backslash is to represent unprintable characters such
-as newline. While there is nothing to stop you from writing these
-characters directly in an `awk' program, they may look ugly.
-
-`\b'
- Represents a backspaced, H'.
-
-`\f'
- Represents a formfeed, L'.
-
-`\n'
- Represents a newline, J'.
-
-`\r'
- Represents a carriage return, M'.
-
-`\t'
- Represents a horizontal tab, I'.
-
-`\v'
- Represents a vertical tab, K'.
-
-`\NNN'
- Represents the octal value NNN, where NNN is one to three digits
- between 0 and 7. For example, the code for the ASCII ESC
- (escape) character is `\033'.
-
-
-
-File: gawk-info, Node: Variables, Next: Arithmetic Ops, Prev: Constants, Up: Expressions
-
-Variables
-=========
-
-Variables let you give names to values and refer to them later. You
-have already seen variables in many of the examples. The name of a
-variable must be a sequence of letters, digits and underscores, but
-it may not begin with a digit. Case is significant in variable
-names; `a' and `A' are distinct variables.
-
-A variable name is a valid expression by itself; it represents the
-variable's current value. Variables are given new values with
-"assignment operators" and "increment operators". *Note Assignment
-Ops::.
-
-A few variables have special built--in meanings, such as `FS', the
-field separator, and `NF', the number of fields in the current input
-record. *Note Special::, for a list of them. Special variables can
-be used and assigned just like all other variables, but their values
-are also used or changed automatically by `awk'. Each special
-variable's name is made entirely of upper case letters.
-
-Variables in `awk' can be assigned either numeric values or string
-values. By default, variables are initialized to the null string,
-which has the numeric value zero. So there is no need to
-``initialize'' each variable explicitly in `awk', the way you would
-need to do in C or most other traditional programming languages.
-
-
-
-File: gawk-info, Node: Arithmetic Ops, Next: Concatenation, Prev: Variables, Up: Expressions
-
-Arithmetic Operators
-====================
-
-The `awk' language uses the common arithmetic operators when
-evaluating expressions. All of these arithmetic operators follow
-normal precedence rules, and work as you would expect them to. This
-example divides field 3 by field 4, adds field 2, stores the result
-into field 1, and prints the results:
-
- awk '{ $1 = $2 + $3 / $4; print }' inventory-shipped
-
-The arithmetic operators in `awk' are:
-
-`X + Y'
- Addition.
-
-`X - Y'
- Subtraction.
-
-`- X'
- Negation.
-
-`X / Y'
- Division. Since all numbers in `awk' are double--precision
- floating point, the result is not rounded to an integer: `3 / 4'
- has the value 0.75.
-
-`X * Y'
- Multiplication.
-
-`X % Y'
- Remainder. The quotient is rounded toward zero to an integer,
- multiplied by Y and this result is subtracted from X. This
- operation is sometimes known as ``trunc--mod''. The following
- relation always holds:
-
- `b * int(a / b) + (a % b) == a'
-
- One undesirable effect of this definition of remainder is that X
- % Y is negative if X is negative. Thus,
-
- -17 % 8 = -1
-
-`X ^ Y'
-`X ** Y'
- Exponentiation: X raised to the Y power. `2 ^ 3' has the value
- 8. The character sequence `**' is equivalent to `^'.
-
-
-
-File: gawk-info, Node: Concatenation, Next: Comparison Ops, Prev: Arithmetic Ops, Up: Expressions
-
-String Concatenation
-====================
-
-There is only one string operation: concatenation. It does not have
-a specific operator to represent it. Instead, concatenation is
-performed by writing expressions next to one another, with no
-operator. For example:
-
- awk '{ print "Field number one: " $1 }' BBS-list
-
-produces, for the first record in `BBS-list':
-
- Field number one: aardvark
-
-If you hadn't put the space after the `:', the line would have run
-together. For example:
-
- awk '{ print "Field number one:" $1 }' BBS-list
-
-produces, for the first record in `BBS-list':
-
- Field number one:aardvark
-
-
-
-File: gawk-info, Node: Comparison Ops, Next: Boolean Ops, Prev: Concatenation, Up: Expressions
-
-Comparison Expressions
-======================
-
-"Comparison expressions" use "relational operators" to compare
-strings or numbers. The relational operators are the same as in C.
-Here is a table of them:
-
-`X < Y'
- True if X is less than Y.
-
-`X <= Y'
- True if X is less than or equal to Y.
-
-`X > Y'
- True if X is greater than Y.
-
-`X >= Y'
- True if X is greater than or equal to Y.
-
-`X == Y'
- True if X is equal to Y.
-
-`X != Y'
- True if X is not equal to Y.
-
-`X ~ REGEXP'
- True if regexp REGEXP matches the string X.
-
-`X !~ REGEXP'
- True if regexp REGEXP does not match the string X.
-
-`SUBSCRIPT in ARRAY'
- True if array ARRAY has an element with the subscript SUBSCRIPT.
-
-Comparison expressions have the value 1 if true and 0 if false.
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (*note Conversion::.). Strings are compared by comparing the
-first character of each, then the second character of each, and so on.
-Thus, `"10"' is less than `"9"'.
-
-For example,
-
- $1 == "foo"
-
-has the value of 1, or is true, if the first field of the current
-input record is precisely `foo'. By contrast,
-
- $1 ~ /foo/
-
-has the value 1 if the first field contains `foo'.
-
-
-
-File: gawk-info, Node: Boolean Ops, Next: Assignment Ops, Prev: Comparison Ops, Up: Expressions
-
-Boolean Operators
-=================
-
-A boolean expression is combination of comparison expressions or
-matching expressions, using the boolean operators ``or'' (`||'),
-``and'' (`&&'), and ``not'' (`!'), along with parentheses to control
-nesting. The truth of the boolean expression is computed by
-combining the truth values of the component expressions.
-
-Boolean expressions can be used wherever comparison and matching
-expressions can be used. They can be used in `if' and `while'
-statements. They have numeric values (1 if true, 0 if false).
-
-In addition, every boolean expression is also a valid boolean
-pattern, so you can use it as a pattern to control the execution of
-rules.
-
-Here are descriptions of the three boolean operators, with an example
-of each. It may be instructive to compare these examples with the
-analogous examples of boolean patterns (*note Boolean::.), which use
-the same boolean operators in patterns instead of expressions.
-
-`BOOLEAN1 && BOOLEAN2'
- True if both BOOLEAN1 and BOOLEAN2 are true. For example, the
- following statement prints the current input record if it
- contains both `2400' and `foo'.
-
- if ($0 ~ /2400/ && $0 ~ /foo/) print
-
- The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is
- true. This can make a difference when BOOLEAN2 contains
- expressions that have side effects: in the case of `$0 ~ /foo/
- && ($2 == bar++)', the variable `bar' is not incremented if
- there is no `foo' in the record.
-
-`BOOLEAN1 || BOOLEAN2'
- True if at least one of BOOLEAN1 and BOOLEAN2 is true. For
- example, the following command prints all records in the input
- file `BBS-list' that contain *either* `2400' or `foo', or both.
-
- awk '{ if ($0 ~ /2400/ || $0 ~ /foo/) print }' BBS-list
-
- The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is
- true. This can make a difference when BOOLEAN2 contains
- expressions that have side effects.
-
-`!BOOLEAN'
- True if BOOLEAN is false. For example, the following program
- prints all records in the input file `BBS-list' that do *not*
- contain the string `foo'.
-
- awk '{ if (! ($0 ~ /foo/)) print }' BBS-list
-
-
-
-File: gawk-info, Node: Assignment Ops, Next: Increment Ops, Prev: Boolean Ops, Up: Expressions
-
-Assignment Operators
-====================
-
-An "assignment" is an expression that stores a new value into a
-variable. For example, let's assign the value 1 to the variable `z':
-
- z = 1
-
-After this expression is executed, the variable `z' has the value 1.
-Whatever old value `z' had before the assignment is forgotten.
-
-The `=' sign is called an "assignment operator". It is the simplest
-assignment operator because the value of the right--hand operand is
-stored unchanged.
-
-The left--hand operand of an assignment can be a variable (*note
-Variables::.), a field (*note Changing Fields::.) or an array element
-(*note Arrays::.). These are all called "lvalues", which means they
-can appear on the left side of an assignment operator. The
-right--hand operand may be any expression; it produces the new value
-which the assignment stores in the specified variable, field or array
-element.
-
-Assignments can store string values also. For example, this would
-store the value `"this food is good"' in the variable `message':
-
- thing = "food"
- predicate = "good"
- message = "this " thing " is " predicate
-
-(This also illustrates concatenation of strings.)
-
-It is important to note that variables do *not* have permanent types.
-The type of a variable is simply the type of whatever value it
-happens to hold at the moment. In the following program fragment,
-the variable `foo' has a numeric value at first, and a string value
-later on:
-
- foo = 1
- print foo
- foo = "bar"
- print foo
-
-When the second assignment gives `foo' a string value, the fact that
-it previously had a numeric value is forgotten.
-
-An assignment is an expression, so it has a value: the same value
-that is assigned. Thus, `z = 1' as an expression has the value 1.
-One consequence of this is that you can write multiple assignments
-together:
-
- x = y = z = 0
-
-stores the value 0 in all three variables. It does this because the
-value of `z = 0', which is 0, is stored into `y', and then the value
-of `y = z = 0', which is 0, is stored into `x'.
-
-You can use an assignment anywhere an expression is called for. For
-example, it is valid to write `x != (y = 1)' to set `y' to 1 and then
-test whether `x' equals 1. But this style tends to make programs
-hard to read; except in a one--shot program, you should rewrite it to
-get rid of such nesting of assignments. This is never very hard.
-
-Aside from `=', there are several other assignment operators that do
-arithmetic with the old value of the variable. For example, the
-operator `+=' computes a new value by adding the right--hand value to
-the old value of the variable. Thus, the following assignment adds 5
-to the value of `foo':
-
- foo += 5
-
-This is precisely equivalent to the following:
-
- foo = foo + 5
-
-Use whichever one makes the meaning of your program clearer.
-
-Here is a table of the arithmetic assignment operators. In each
-case, the right--hand operand is an expression whose value is
-converted to a number.
-
-`LVALUE += INCREMENT'
- Adds INCREMENT to the value of LVALUE to make the new value of
- LVALUE.
-
-`LVALUE -= DECREMENT'
- Subtracts DECREMENT from the value of LVALUE.
-
-`LVALUE *= COEFFICIENT'
- Multiplies the value of LVALUE by COEFFICIENT.
-
-`LVALUE /= QUOTIENT'
- Divides the value of LVALUE by QUOTIENT.
-
-`LVALUE %= MODULUS'
- Sets LVALUE to its remainder by MODULUS.
-
-`LVALUE ^= POWER'
-`LVALUE **= POWER'
- Raises LVALUE to the power POWER.
-
-
-
-File: gawk-info, Node: Increment Ops, Next: Conversion, Prev: Assignment Ops, Up: Expressions
-
-Increment Operators
-===================
-
-"Increment operators" increase or decrease the value of a variable by
-1. You could do the same thing with an assignment operator, so the
-increment operators add no power to the `awk' language; but they are
-convenient abbreviations for something very common.
-
-The operator to add 1 is written `++'. There are two ways to use
-this operator: pre--incrementation and post--incrementation.
-
-To pre--increment a variable V, write `++V'. This adds 1 to the
-value of V and that new value is also the value of this expression.
-The assignment expression `V += 1' is completely equivalent.
-
-Writing the `++' after the variable specifies post--increment. This
-increments the variable value just the same; the difference is that
-the value of the increment expression itself is the variable's *old*
-value. Thus, if `foo' has value 4, then the expression `foo++' has
-the value 4, but it changes the value of `foo' to 5.
-
-The post--increment `foo++' is nearly equivalent to writing `(foo +=
-1) - 1'. It is not perfectly equivalent because all numbers in `awk'
-are floating point: in floating point, `foo + 1 - 1' does not
-necessarily equal `foo'. But the difference will be minute as long
-as you stick to numbers that are fairly small (less than a trillion).
-
-Any lvalue can be incremented. Fields and array elements are
-incremented just like variables.
-
-The decrement operator `--' works just like `++' except that it
-subtracts 1 instead of adding. Like `++', it can be used before the
-lvalue to pre--decrement or after it to post--decrement.
-
-Here is a summary of increment and decrement expressions.
-
-`++LVALUE'
- This expression increments LVALUE and the new value becomes the
- value of this expression.
-
-`LVALUE++'
- This expression causes the contents of LVALUE to be incremented.
- The value of the expression is the *old* value of LVALUE.
-
-`--LVALUE'
- Like `++LVALUE', but instead of adding, it subtracts. It
- decrements LVALUE and delivers the value that results.
-
-`LVALUE--'
- Like `LVALUE++', but instead of adding, it subtracts. It
- decrements LVALUE. The value of the expression is the *old*
- value of LVALUE.
-
-
-
-File: gawk-info, Node: Conversion, Next: Conditional Exp, Prev: Increment Ops, Up: Expressions
-
-Conversion of Strings and Numbers
-=================================
-
-Strings are converted to numbers, and numbers to strings, if the
-context of your `awk' statement demands it. For example, if the
-values of `foo' or `bar' in the expression `foo + bar' happen to be
-strings, they are converted to numbers before the addition is
-performed. If numeric values appear in string concatenation, they
-are converted to strings. Consider this:
-
- two = 2; three = 3
- print (two three) + 4
-
-This eventually prints the (numeric) value `27'. The numeric
-variables `two' and `three' are converted to strings and concatenated
-together, and the resulting string is converted back to a number
-before adding `4'. The resulting numeric value `27' is printed.
-
-If, for some reason, you need to force a number to be converted to a
-string, concatenate the null string with that number. To force a
-string to be converted to a number, add zero to that string. Strings
-that can't be interpreted as valid numbers are given the numeric
-value zero.
-
-The exact manner in which numbers are converted into strings is
-controlled by the `awk' special variable `OFMT' (*note Special::.).
-Numbers are converted using a special version of the `sprintf'
-function (*note Built-in::.) with `OFMT' as the format specifier.
-
-`OFMT''s default value is `"%.6g"', which prints a value with at
-least six significant digits. You might want to change it to specify
-more precision, if your version of `awk' uses double precision
-arithmetic. Double precision on most modern machines gives you 16 or
-17 decimal digits of precision.
-
-Strange results can happen if you set `OFMT' to a string that doesn't
-tell `sprintf' how to format floating point numbers in a useful way.
-For example, if you forget the `%' in the format, all numbers will be
-converted to the same constant string.
-
-
-
-File: gawk-info, Node: Conditional Exp, Next: Function Calls, Prev: Conversion, Up: Expressions
-
-Conditional Expressions
-=======================
-
-A "conditional expression" is a special kind of expression with three
-operands. It allows you to use one expression's value to select one
-of two other expressions.
-
-The conditional expression looks the same as in the C language:
-
- SELECTOR ? IF-TRUE-EXP : IF-FALSE-EXP
-
-There are three subexpressions. The first, SELECTOR, is always
-computed first. If it is ``true'' (not zero) then IF-TRUE-EXP is
-computed next and its value becomes the value of the whole expression.
-Otherwise, IF-FALSE-EXP is computed next and its value becomes the
-value of the whole expression.
-
-For example, this expression produces the absolute value of `x':
-
- x > 0 ? x : -x
-
-Each time the conditional expression is computed, exactly one of
-IF-TRUE-EXP and IF-FALSE-EXP is computed; the other is ignored. This
-is important when the expressions contain side effects. For example,
-this conditional expression examines element `i' of either array `a'
-or array `b', and increments `i'.
-
- x == y ? a[i++] : b[i++]
-
-This is guaranteed to increment `i' exactly once, because each time
-one or the other of the two increment expressions will be executed
-and the other will not be.
-
-
-
-File: gawk-info, Node: Function Calls, Prev: Conditional Exp, Up: Expressions
-
-Function Calls
-==============
-
-A "function" is a name for a particular calculation. Because it has
-a name, you can ask for it by name at any point in the program. For
-example, the function `sqrt' computes the square root of a number.
-
-A fixed set of functions are "built in", which means they are
-available in every `awk' program. The `sqrt' function is one of
-these. *Note Built-in::, for a list of built--in functions and their
-descriptions. In addition, you can define your own functions in the
-program for use elsewhere in the same program. *Note User-defined::,
-for how to do this.
-
-The way to use a function is with a "function call" expression, which
-consists of the function name followed by a list of "arguments" in
-parentheses. The arguments are expressions which give the raw
-materials for the calculation that the function will do. When there
-is more than one argument, they are separated by commas. If there
-are no arguments, write just `()' after the function name.
-
-*Do not put any space between the function name and the
-open--parenthesis!* A user--defined function name looks just like
-the name of a variable, and space would make the expression look like
-concatenation of a variable with an expression inside parentheses.
-Space before the parenthesis is harmless with built--in functions,
-but it is best not to get into the habit of using space, lest you do
-likewise for a user--defined function one day by mistake.
-
-Each function needs a particular number of arguments. For example,
-the `sqrt' function must be called with a single argument, like this:
-
- sqrt(ARGUMENT)
-
-The argument is the number to take the square root of.
-
-Some of the built--in functions allow you to omit the final argument.
-If you do so, they will use a reasonable default. *Note Built-in::,
-for full details. If arguments are omitted in calls to user--defined
-functions, then those arguments are treated as local variables,
-initialized to the null string (*note User-defined::.).
-
-Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of `sqrt(ARGUMENT)' is the square root of the
-argument. A function can also have side effects, such as assigning
-the values of certain variables or doing I/O.
-
-Here is a command to read numbers, one number per line, and print the
-square root of each one:
-
- awk '{ print "The square root of", $1, "is", sqrt($1) }'
-
-
-
-File: gawk-info, Node: Statements, Next: Arrays, Prev: Expressions, Up: Top
-
-Actions: Statements
-*******************
-
-"Control statements" such as `if', `while', and so on control the
-flow of execution in `awk' programs. Most of the control statements
-in `awk' are patterned on similar statements in C.
-
-The simplest kind of statement is an expression. The other kinds of
-statements start with special keywords such as `if' and `while', to
-distinguish them from simple expressions.
-
-In all the examples in this chapter, BODY can be either a single
-statement or a group of statements. Groups of statements are
-enclosed in braces, and separated by newlines or semicolons.
-
-* Menu:
-
-* Expressions:: One kind of statement simply computes an expression.
-
-* If:: Conditionally execute some `awk' statements.
-
-* While:: Loop until some condition is satisfied.
-
-* Do:: Do specified action while looping until some
- condition is satisfied.
-
-* For:: Another looping statement, that provides
- initialization and increment clauses.
-
-* Break:: Immediately exit the innermost enclosing loop.
-
-* Continue:: Skip to the end of the innermost enclosing loop.
-
-* Next:: Stop processing the current input record.
-
-* Exit:: Stop execution of `awk'.
-
-
-
-File: gawk-info, Node: If, Next: While, Up: Statements
-
-The `if' Statement
-==================
-
-The `if'-`else' statement is `awk''s decision--making statement. The
-`else' part of the statement is optional.
-
- `if (CONDITION) BODY1 else BODY2'
-
-Here CONDITION is an expression that controls what the rest of the
-statement will do. If CONDITION is true, BODY1 is executed;
-otherwise, BODY2 is executed (assuming that the `else' clause is
-present). The condition is considered true if it is nonzero or
-nonnull.
-
-Here is an example:
-
- awk '{ if (x % 2 == 0)
- print "x is even"
- else
- print "x is odd" }'
-
-In this example, if the statement containing `x' is found to be true
-(that is, x is divisible by 2), then the first `print' statement is
-executed, otherwise the second `print' statement is performed.
-
-If the `else' appears on the same line as BODY1, and BODY1 is a
-single statement, then a semicolon must separate BODY1 from `else'.
-To illustrate this, let's rewrite the previous example:
-
- awk '{ if (x % 2 == 0) print "x is even"; else
- print "x is odd" }'
-
-If you forget the `;', `awk' won't be able to parse it, and you will
-get a syntax error.
-
-We would not actually write this example this way, because a human
-reader might fail to see the `else' if it were not the first thing on
-its line.
-
-
-
-File: gawk-info, Node: While, Next: Do, Prev: If, Up: Statements
-
-The `while' Statement
-=====================
-
-In programming, a loop means a part of a program that is (or at least
-can be) executed two or more times in succession.
-
-The `while' statement is the simplest looping statement in `awk'. It
-repeatedly executes a statement as long as a condition is true. It
-looks like this:
-
- while (CONDITION)
- BODY
-
-Here BODY is a statement that we call the "body" of the loop, and
-CONDITION is an expression that controls how long the loop keeps
-running.
-
-The first thing the `while' statement does is test CONDITION. If
-CONDITION is true, it executes the statement BODY. After BODY has
-been executed, CONDITION is tested again and this process is repeated
-until CONDITION is no longer true. If CONDITION is initially false,
-the body of the loop is never executed.
-
- awk '{ i = 1
- while (i <= 3) {
- print $i
- i++
- }
- }'
-
-This example prints the first three input fields, one per line.
-
-The loop works like this: first, the value of `i' is set to 1. Then,
-the `while' tests whether `i' is less than or equal to three. This
-is the case when `i' equals one, so the `i'-th field is printed.
-Then the `i++' increments the value of `i' and the loop repeats.
-
-When `i' reaches 4, the loop exits. Here BODY is a compound
-statement enclosed in braces. As you can see, a newline is not
-required between the condition and the body; but using one makes the
-program clearer unless the body is a compound statement or is very
-simple.
-
-
-
-File: gawk-info, Node: Do, Next: For, Prev: While, Up: Statements
-
-The `do'--`while' Statement
-===========================
-
-The `do' loop is a variation of the `while' looping statement. The
-`do' loop executes the BODY once, then repeats BODY as long as
-CONDITION is true. It looks like this:
-
- do
- BODY
- while (CONDITION)
-
-Even if CONDITION is false at the start, BODY is executed at least
-once (and only once, unless executing BODY makes CONDITION true).
-Contrast this with the corresponding `while' statement:
-
- while (CONDITION)
- BODY
-
-This statement will not execute BODY even once if CONDITION is false
-to begin with.
-
-Here is an example of a `do' statement:
-
- awk '{ i = 1
- do {
- print $0
- i++
- } while (i <= 10)
- }'
-
-prints each input record ten times. It isn't a very realistic
-example, since in this case an ordinary `while' would do just as
-well. But this is normal; there is only occasionally a real use for
-a `do' statement.
-
-
-
-File: gawk-info, Node: For, Next: Break, Prev: Do, Up: Statements
-
-The `for' Statement
-===================
-
-The `for' statement makes it more convenient to count iterations of a
-loop. The general form of the `for' statement looks like this:
-
- for (INITIALIZATION; CONDITION; INCREMENT)
- BODY
-
-This statement starts by executing INITIALIZATION. Then, as long as
-CONDITION is true, it repeatedly executes BODY and then INCREMENT.
-Typically INITIALIZATION sets a variable to either zero or one,
-INCREMENT adds 1 to it, and CONDITION compares it against the desired
-number of iterations.
-
-Here is an example of a `for' statement:
-
- awk '{ for (i = 1; i <= 3; i++)
- print $i
- }'
-
-This prints the first three fields of each input record, one field
-per line.
-
-In the `for' statement, BODY stands for any statement, but
-INITIALIZATION, CONDITION and INCREMENT are just expressions. You
-cannot set more than one variable in the INITIALIZATION part unless
-you use a multiple assignment statement such as `x = y = 0', which is
-possible only if all the initial values are equal. (But you can
-initialize additional variables by writing their assignments as
-separate statements preceding the `for' loop.)
-
-The same is true of the INCREMENT part; to increment additional
-variables, you must write separate statements at the end of the loop.
-The C compound expression, using C's comma operator, would be useful
-in this context, but it is not supported in `awk'.
-
-Most often, INCREMENT is an increment expression, as in the example
-above. But this is not required; it can be any expression whatever.
-For example, this statement prints odd numbers from 1 to 100:
-
- # print odd numbers from 1 to 100
- for (i = 1; i <= 100; i += 2)
- print i
-
-Any of the three expressions following `for' may be omitted if you
-don't want it to do anything. Thus, `for (;x > 0;)' is equivalent to
-`while (x > 0)'. If the CONDITION part is empty, it is treated as
-TRUE, effectively yielding an infinite loop.
-
-In most cases, a `for' loop is an abbreviation for a `while' loop, as
-shown here:
-
- INITIALIZATION
- while (CONDITION) {
- BODY
- INCREMENT
- }
-
-(The only exception is when the `continue' statement (*note
-Continue::.) is used inside the loop; changing a `for' statement to a
-`while' statement in this way can change the effect of the `continue'
-statement inside the loop.)
-
-The `awk' language has a `for' statement in addition to a `while'
-statement because often a `for' loop is both less work to type and
-more natural to think of. Counting the number of iterations is very
-common in loops. It can be easier to think of this counting as part
-of looping rather than as something to do inside the loop.
-
-The next section has more complicated examples of `for' loops.
-
-There is an alternate version of the `for' loop, for iterating over
-all the indices of an array:
-
- for (i in array)
- PROCESS array[i]
-
-*Note Arrays::, for more information on this version of the `for' loop.
-
-
-
-File: gawk-info, Node: Break, Next: Continue, Prev: For, Up: Statements
-
-The `break' Statement
-=====================
-
-The `break' statement jumps out of the innermost `for', `while', or
-`do'--`while' loop that encloses it. The following example finds the
-smallest divisor of any number, and also identifies prime numbers:
-
- awk '# find smallest divisor of num
- { num = $1
- for (div = 2; div*div <= num; div++)
- if (num % div == 0)
- break
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num }'
-
-When the remainder is zero in the first `if' statement, `awk'
-immediately "breaks" out of the containing `for' loop. This means
-that `awk' proceeds immediately to the statement following the loop
-and continues processing. (This is very different from the `exit'
-statement (*note Exit::.) which stops the entire `awk' program.)
-
-Here is another program equivalent to the previous one. It
-illustrates how the CONDITION of a `for' or `while' could just as
-well be replaced with a `break' inside an `if':
-
- awk '# find smallest divisor of num
- { num = $1
- for (div = 2; ; div++) {
- if (num % div == 0) {
- printf "Smallest divisor of %d is %d\n", num, div
- break
- }
- if (div*div > num) {
- printf "%d is prime\n", num
- break
- }
- }
- }'
-
-
-
-File: gawk-info, Node: Continue, Next: Next, Prev: Break, Up: Statements
-
-The `continue' Statement
-========================
-
-The `continue' statement, like `break', is used only inside `for',
-`while', and `do'--`while' loops. It skips over the rest of the loop
-body, causing the next cycle around the loop to begin immediately.
-Contrast this with `break', which jumps out of the loop altogether.
-Here is an example:
-
- # print names that don't contain the string "ignore"
-
- # first, save the text of each line
- { names[NR] = $0 }
-
- # print what we're interested in
- END {
- for (x in names) {
- if (names[x] ~ /ignore/)
- continue
- print names[x]
- }
- }
-
-If any of the input records contain the string `ignore', this example
-skips the print statement and continues back to the first statement
-in the loop.
-
-This isn't a practical example of `continue', since it would be just
-as easy to write the loop like this:
-
- for (x in names)
- if (x !~ /ignore/)
- print x
-
-The `continue' statement causes `awk' to skip the rest of what is
-inside a `for' loop, but it resumes execution with the increment part
-of the `for' loop. The following program illustrates this fact:
-
- awk 'BEGIN {
- for (x = 0; x <= 20; x++) {
- if (x == 5)
- continue
- printf ("%d ", x)
- }
- print ""
- }'
-
-This program prints all the numbers from 0 to 20, except for 5, for
-which the `printf' is skipped. Since the increment `x++' is not
-skipped, `x' does not remain stuck at 5.
-
-
-
-File: gawk-info, Node: Next, Next: Exit, Prev: Continue, Up: Statements
-
-The `next' Statement
-====================
-
-The `next' statement forces `awk' to immediately stop processing the
-current record and go on to the next record. This means that no
-further rules are executed for the current record. The rest of the
-current rule's action is not executed either.
-
-Contrast this with the effect of the `getline' function (*note
-Getline::.). That too causes `awk' to read the next record
-immediately, but it does not alter the flow of control in any way.
-So the rest of the current action executes with a new input record.
-
-At the grossest level, `awk' program execution is a loop that reads
-an input record and then tests each rule pattern against it. If you
-think of this loop as a `for' statement whose body contains the
-rules, then the `next' statement is analogous to a `continue'
-statement: it skips to the end of the body of the loop, and executes
-the increment (which reads another record).
-
-For example, if your `awk' program works only on records with four
-fields, and you don't want it to fail when given bad input, you might
-use the following rule near the beginning of the program:
-
- NF != 4 {
- printf ("line %d skipped: doesn't have 4 fields", FNR) > "/dev/tty"
- next
- }
-
-so that the following rules will not see the bad record. The error
-message is redirected to `/dev/tty' (the terminal), so that it won't
-get lost amid the rest of the program's regular output.
-
-
-
-File: gawk-info, Node: Exit, Prev: Next, Up: Statements
-
-The `exit' Statement
-====================
-
-The `exit' statement causes `awk' to immediately stop executing the
-current rule and to stop processing input; any remaining input is
-ignored.
-
-If an `exit' statement is executed from a `BEGIN' rule the program
-stops processing everything immediately. No input records will be
-read. However, if an `END' rule is present, it will be executed
-(*note BEGIN/END::.).
-
-If `exit' is used as part of an `END' rule, it causes the program to
-stop immediately.
-
-An `exit' statement that is part an ordinary rule (that is, not part
-of a `BEGIN' or `END' rule) stops the execution of any further
-automatic rules, but the `END' rule is executed if there is one. If
-you don't want the `END' rule to do its job in this case, you can set
-a variable to nonzero before the `exit' statement, and check that
-variable in the `END' rule.
-
-If an argument is supplied to `exit', its value is used as the exit
-status code for the `awk' process. If no argument is supplied,
-`exit' returns status zero (success).
-
-For example, let's say you've discovered an error condition you
-really don't know how to handle. Conventionally, programs report
-this by exiting with a nonzero status. Your `awk' program can do
-this using an `exit' statement with a nonzero argument. Here's an
-example of this:
-
- BEGIN {
- if (("date" | getline date_now) < 0) {
- print "Can't get system date"
- exit 4
- }
- }
-
-
-
-File: gawk-info, Node: Arrays, Next: Built-in, Prev: Statements, Up: Top
-
-Actions: Using Arrays in `awk'
-******************************
-
-An "array" is a table of various values, called "elements". The
-elements of an array are distinguished by their "indices". Names of
-arrays in `awk' are strings of alphanumeric characters and
-underscores, just like regular variables.
-
-You cannot use the same identifier as both a variable and as an array
-name in one `awk' program.
-
-* Menu:
-
-* Intro: Array Intro. Basic facts abou arrays in `awk'.
-* Reference to Elements:: How to examine one element of an array.
-* Assigning Elements:: How to change an element of an array.
-* Example: Array Example. Sample program explained.
-
-* Scanning an Array:: A variation of the `for' statement. It loops
- through the indices of an array's existing elements.
-
-* Delete:: The `delete' statement removes an element from an array.
-
-* Multi-dimensional:: Emulating multi--dimensional arrays in `awk'.
-* Multi-scanning:: Scanning multi--dimensional arrays.
-
-
-
-File: gawk-info, Node: Array Intro, Next: Reference to Elements, Up: Arrays
-
-Introduction to Arrays
-======================
-
-The `awk' language has one--dimensional "arrays" for storing groups
-of related strings or numbers. Each array must have a name; valid
-array names are the same as valid variable names, and they do
-conflict with variable names: you can't have both an array and a
-variable with the same name at any point in an `awk' program.
-
-Arrays in `awk' superficially resemble arrays in other programming
-languages; but there are fundamental differences. In `awk', you
-don't need to declare the size of an array before you start to use it.
-What's more, in `awk' any number or even a string may be used as an
-array index.
-
-In most other languages, you have to "declare" an array and specify
-how many elements or components it has. In such languages, the
-declaration causes a contiguous block of memory to be allocated for
-that many elements. An index in the array must be a positive
-integer; for example, the index 0 specifies the first element in the
-array, which is actually stored at the beginning of the block of
-memory. Index 1 specifies the second element, which is stored in
-memory right after the first element, and so on. It is impossible to
-add more elements to the array, because it has room for only as many
-elements as you declared. (Some languages have arrays whose first
-index is 1, others require that you specify both the first and last
-index when you declare the array. In such a language, an array could
-be indexed, for example, from -3 to 17.) A contiguous array of four
-elements might look like this, conceptually, if the element values
-are 8, `"foo"', `""' and 30:
-
- +--------+--------+-------+--------+
- | 8 | "foo" | "" | 30 | value
- +--------+--------+-------+--------+
- 0 1 2 3 index
-
-Only the values are stored; the indices are implicit from the order
-of the values. 8 is the value at index 0, because 8 appears in the
-position with 0 elements before it.
-
-Arrays in `awk' are different: they are "associative". This means
-that each array is a collection of pairs: an index, and its
-corresponding array element value:
-
- Element 4 Value 30
- Element 2 Value "foo"
- Element 1 Value 8
- Element 3 Value ""
-
-We have shown the pairs in jumbled order because their order doesn't
-mean anything.
-
-One advantage of an associative array is that new pairs can be added
-at any time. For example, suppose we add to that array a tenth
-element whose value is `"number ten"'. The result is this:
-
- Element 10 Value "number ten"
- Element 4 Value 30
- Element 2 Value "foo"
- Element 1 Value 8
- Element 3 Value ""
-
-Now the array is "sparse" (i.e. some indices are missing): it has
-elements number 4 and 10, but doesn't have an element 5, 6, 7, 8, or 9.
-
-Another consequence of associative arrays is that the indices don't
-have to be positive integers. Any number, or even a string, can be
-an index. For example, here is an array which translates words from
-English into French:
-
- Element "dog" Value "chien"
- Element "cat" Value "chat"
- Element "one" Value "un"
- Element 1 Value "un"
-
-Here we decided to translate the number 1 in both spelled--out and
-numeral form--thus illustrating that a single array can have both
-numbers and strings as indices.
-
-When `awk' creates an array for you, e.g. with the `split' built--in
-function (*note String Functions::.), that array's indices start at
-the number one.
-
-
-
-File: gawk-info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Arrays
-
-Referring to an Array Element
-=============================
-
-The principal way of using an array is to refer to one of its elements.
-An array reference is an expression which looks like this:
-
- ARRAY[INDEX]
-
-Here ARRAY is the name of an array. The expression INDEX is the
-index of the element of the array that you want. The value of the
-array reference is the current value of that array element.
-
-For example, `foo[4.3]' is an expression for the element of array
-`foo' at index 4.3.
-
-If you refer to an array element that has no recorded value, the
-value of the reference is `""', the null string. This includes
-elements to which you have not assigned any value, and elements that
-have been deleted (*note Delete::.). Such a reference automatically
-creates that array element, with the null string as its value. (In
-some cases, this is unfortunate, because it might waste memory inside
-`awk').
-
-You can find out if an element exists in an array at a certain index
-with the expression:
-
- INDEX in ARRAY
-
-This expression tests whether or not the particular index exists,
-without the side effect of creating that element if it is not present.
-The expression has the value 1 (true) if `ARRAY[SUBSCRIPT]' exists,
-and 0 (false) if it does not exist.
-
-For example, to find out whether the array `frequencies' contains the
-subscript `"2"', you would ask:
-
- if ("2" in frequencies) print "Subscript \"2\" is present."
-
-Note that this is *not* a test of whether or not the array
-`frequencies' contains an element whose *value* is `"2"'. (There is
-no way to that except to scan all the elements.) Also, this *does
-not* create `frequencies["2"]', while the following (incorrect)
-alternative would:
-
- if (frequencies["2"] != "") print "Subscript \"2\" is present."
-
-
-
-File: gawk-info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Arrays
-
-Assigning Array Elements
-========================
-
-Array elements are lvalues: they can be assigned values just like
-`awk' variables:
-
- ARRAY[SUBSCRIPT] = VALUE
-
-Here ARRAY is the name of your array. The expression SUBSCRIPT is
-the index of the element of the array that you want to assign a
-value. The expression VALUE is the value you are assigning to that
-element of the array.
-
-
-
-File: gawk-info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Arrays
-
-Basic Example of an Array
-=========================
-
-The following program takes a list of lines, each beginning with a
-line number, and prints them out in order of line number. The line
-numbers are not in order, however, when they are first read: they
-are scrambled. This program sorts the lines by making an array using
-the line numbers as subscripts. It then prints out the lines in
-sorted order of their numbers. It is a very simple program, and will
-get confused if it encounters repeated numbers, gaps, or lines that
-don't begin with a number.
-
- BEGIN {
- max=0
- }
-
- {
- if ($1 > max)
- max = $1
- arr[$1] = $0
- }
-
- END {
- for (x = 1; x <= max; x++)
- print arr[x]
- }
-
-The first rule just initializes the variable `max'. (This is not
-strictly necessary, since an uninitialized variable has the null
-string as its value, and the null string is effectively zero when
-used in a context where a number is required.)
-
-The second rule keeps track of the largest line number seen so far;
-it also stores each line into the array `arr', at an index that is
-the line's number.
-
-The third rule runs after all the input has been read, to print out
-all the lines.
-
-When this program is run with the following input:
-
- 5 I am the Five man
- 2 Who are you? The new number two!
- 4 . . . And four on the floor
- 1 Who is number one?
- 3 I three you.
-
- its output is this:
-
- 1 Who is number one?
- 2 Who are you? The new number two!
- 3 I three you.
- 4 . . . And four on the floor
- 5 I am the Five man
-
-
-
-File: gawk-info, Node: Scanning an Array, Next: Delete, Prev: Array Example, Up: Arrays
-
-Scanning All Elements of an Array
-=================================
-
-In programs that use arrays, often you need a loop that will execute
-once for each element of an array. In other languages, where arrays
-are contiguous and indices are limited to positive integers, this is
-easy: the largest index is one less than the length of the array, and
-you can find all the valid indices by counting from zero up to that
-value. This technique won't do the job in `awk', since any number or
-string may be an array index. So `awk' has a special kind of `for'
-statement for scanning an array:
-
- for (VAR in ARRAY)
- BODY
-
-This loop executes BODY once for each different value that your
-program has previously used as an index in ARRAY, with the variable
-VAR set to that index.
-
-Here is a program that uses this form of the `for' statement. The
-first rule scans the input records and notes which words appear (at
-least once) in the input, by storing a 1 into the array `used' with
-the word as index. The second rule scans the elements of `used' to
-find all the distinct words that appear in the input. It prints each
-word that is more than 10 characters long, and also prints the number
-of such words. *Note Built-in::, for more information on the
-built--in function `length'.
-
- # Record a 1 for each word that is used at least once.
- {
- for (i = 0; i < NF; i++)
- used[$i] = 1
- }
-
- # Find number of distinct words more than 10 characters long.
- END {
- num_long_words = 0
- for (x in used)
- if (length(x) > 10) {
- ++num_long_words
- print x
- }
- print num_long_words, "words longer than 10 characters"
- }
-
-*Note Sample Program::, for a more detailed example of this type.
-
-The order in which elements of the array are accessed by this
-statement is determined by the internal arrangement of the array
-elements within `awk' and cannot be controlled or changed. This can
-lead to problems if new elements are added to ARRAY by statements in
-BODY; you cannot predict whether or not the `for' loop will reach
-them. Similarly, changing VAR inside the loop can produce strange
-results. It is best to avoid such things.
-
-
-
-File: gawk-info, Node: Delete, Next: Multi-dimensional, Prev: Scanning an Array, Up: Arrays
-
-The `delete' Statement
-======================
-
-You can remove an individual element of an array using the `delete'
-statement:
-
- delete ARRAY[INDEX]
-
-When an array element is deleted, it is as if you had never referred
-to it and had never given it any value. Any value the element
-formerly had can no longer be obtained.
-
-Here is an example of deleting elements in an array:
-
- awk '{ for (i in frequencies)
- delete frequencies[i]
- }'
-
-This example removes all the elements from the array `frequencies'.
-
-If you delete an element, the `for' statement to scan the array will
-not report that element, and the `in' operator to check for the
-presence of that element will return 0:
-
- delete foo[4]
- if (4 in foo)
- print "This will never be printed"
-
-
-
-File: gawk-info, Node: Multi-dimensional, Next: Multi-scanning, Prev: Delete, Up: Arrays
-
-Multi--dimensional arrays
-=========================
-
-A multi--dimensional array is an array in which an element is
-identified by a sequence of indices, not a single index. For
-example, a two--dimensional array requires two indices. The usual
-way (in most languages, including `awk') to refer to an element of a
-two--dimensional array named `grid' is with `grid[x,y]'.
-
-Multi--dimensional arrays are supported in `awk' through
-concatenation of indices into one string. What happens is that `awk'
-converts the indices into strings (*note Conversion::.) and
-concatenates them together, with a separator between them. This
-creates a single string that describes the values of the separate
-indices. The combined string is used as a single index into an
-ordinary, one--dimensional array. The separator used is the value of
-the special variable `SUBSEP'.
-
-For example, suppose the value of `SUBSEP' is `","' and the
-expression `foo[5,12]="value"' is executed. The numbers 5 and 12
-will be concatenated with a comma between them, yielding `"5,12"';
-thus, the array element `foo["5,12"]' will be set to `"value"'.
-
-Once the element's value is stored, `awk' has no record of whether it
-was stored with a single index or a sequence of indices. The two
-expressions `foo[5,12]' and `foo[5 SUBSEP 12]' always have the same
-value.
-
-The default value of `SUBSEP' is not a comma; it is the string
-`"\034"', which contains a nonprinting character that is unlikely to
-appear in an `awk' program or in the input data.
-
-The usefulness of choosing an unlikely character comes from the fact
-that index values that contain a string matching `SUBSEP' lead to
-combined strings that are ambiguous. Suppose that `SUBSEP' is a
-comma; then `foo["a,b", "c"]' and `foo["a", "b,c"]' will be
-indistinguishable because both are actually stored as `foo["a,b,c"]'.
-Because `SUBSEP' is `"\034"', such confusion can actually happen only
-when an index contains the character `"\034"', which is a rare event.
-
-You can test whether a particular index--sequence exists in a
-``multi--dimensional'' array with the same operator `in' used for
-single dimensional arrays. Instead of a single index as the
-left--hand operand, write the whole sequence of indices, separated by
-commas, in parentheses:
-
- (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY
-
-The following example treats its input as a two--dimensional array of
-fields; it rotates this array 90 degrees clockwise and prints the
-result. It assumes that all lines have the same number of elements.
-
- awk 'BEGIN {
- max_nf = max_nr = 0
- }
-
- {
- if (max_nf < NF)
- max_nf = NF
- max_nr = NR
- for (x = 1; x <= NF; x++)
- vector[x, NR] = $x
- }
-
- END {
- for (x = 1; x <= max_nf; x++) {
- for (y = max_nr; y >= 1; --y)
- printf("%s ", vector[x, y])
- printf("\n")
- }
- }'
-
-When given the input:
-
- 1 2 3 4 5 6
- 2 3 4 5 6 1
- 3 4 5 6 1 2
- 4 5 6 1 2 3
-
-it produces:
-
- 4 3 2 1
- 5 4 3 2
- 6 5 4 3
- 1 6 5 4
- 2 1 6 5
- 3 2 1 6
-
-
-
-File: gawk-info, Node: Multi-scanning, Prev: Multi-dimensional, Up: Arrays
-
-Scanning Multi--dimensional Arrays
-==================================
-
-There is no special `for' statement for scanning a
-``multi--dimensional'' array; there cannot be one, because in truth
-there are no multi--dimensional arrays or elements; there is only a
-multi--dimensional *way of accessing* an array.
-
-However, if your program has an array that is always accessed as
-multi--dimensional, you can get the effect of scanning it by
-combining the scanning `for' statement (*note Scanning an Array::.)
-with the `split' built--in function (*note String Functions::.). It
-works like this:
-
- for (combined in ARRAY) {
- split (combined, separate, SUBSEP)
- ...
- }
-
-This finds each concatenated, combined index in the array, and splits
-it into the individual indices by breaking it apart where the value
-of `SUBSEP' appears. The split--out indices become the elements of
-the array `separate'.
-
-Thus, suppose you have previously stored in `ARRAY[1, "foo"]'; then
-an element with index `"1\034foo"' exists in ARRAY. (Recall that the
-default value of `SUBSEP' contains the character with code 034.)
-Sooner or later the `for' statement will find that index and do an
-iteration with `combined' set to `"1\034foo"'. Then the `split'
-function will be called as follows:
-
- split ("1\034foo", separate, "\034")
-
-The result of this is to set `separate[1]' to 1 and `separate[2]' to
-`"foo"'. Presto, the original sequence of separate indices has been
-recovered.
-
-
-
-File: gawk-info, Node: Built-in, Next: User-defined, Prev: Arrays, Up: Top
-
-Built--in functions
-*******************
-
-"Built--in" functions are functions always available for your `awk'
-program to call. This chapter defines all the built--in functions
-that exist; some of them are mentioned in other sections, but they
-are summarized here for your convenience. (You can also define new
-functions yourself. *Note User-defined::.)
-
-In most cases, any extra arguments given to built--in functions are
-ignored. The defaults for omitted arguments vary from function to
-function and are described under the individual functions.
-
-The name of a built--in function need not be followed immediately by
-the opening left parenthesis of the arguments; whitespace is allowed.
-However, it is wise to write no space there, since user--defined
-functions do not allow space.
-
-When a function is called, expressions that create the function's
-actual parameters are evaluated completely before the function call
-is performed. For example, in the code fragment:
-
- i = 4
- j = myfunc(i++)
-
-the variable `i' will be set to 5 before `myfunc' is called with a
-value of 4 for its actual parameter.
-
-* Menu:
-
-* Numeric Functions:: Functions that work with numbers,
- including `int', `sin' and `rand'.
-
-* String Functions:: Functions for string manipulation,
- such as `split', `match', and `sprintf'.
-
-* I/O Functions:: Functions for files and shell commands
-
-
-
-File: gawk-info, Node: Numeric Functions, Next: String Functions, Up: Built-in
-
-Numeric Built--in Functions
-===========================
-
-The general syntax of the numeric built--in functions is the same for
-each. Here is an example of that syntax:
-
- awk '# Read input records containing a pair of points: x0, y0, x1, y1.
- # Print the points and the distance between them.
- { printf "%f %f %f %f %f\n", $1, $2, $3, $4,
- sqrt(($2-$1) * ($2-$1) + ($4-$3) * ($4-$3)) }'
-
-This calculates the square root of a calculation that uses the values
-of the fields. It then prints the first four fields of the input
-record and the result of the square root calculation.
-
-Here is the full list of numeric built--in functions:
-
-`int(X)'
- This gives you the integer part of X, truncated toward 0. This
- produces the nearest integer to X, located between X and 0.
-
- For example, `int(3)' is 3, `int(3.9)' is 3, `int(-3.9)' is -3,
- and `int(-3)' is -3 as well.
-
-`sqrt(X)'
- This gives you the positive square root of X. It reports an
- error if X is negative.
-
-`exp(X)'
- This gives you the exponential of X, or reports an error if X is
- out of range. The range of values X can have depends on your
- machine's floating point representation.
-
-`log(X)'
- This gives you the natural logarithm of X, if X is positive;
- otherwise, it reports an error.
-
-`sin(X)'
- This gives you the sine of X, with X in radians.
-
-`cos(X)'
- This gives you the cosine of X, with X in radians.
-
-`atan2(Y, X)'
- This gives you the arctangent of Y/X, with both in radians.
-
-`rand()'
- This gives you a random number. The values of `rand()' are
- uniformly--distributed between 0 and 1. The value is never 0
- and never 1.
-
- Often you want random integers instead. Here is a user--defined
- function you can use to obtain a random nonnegative integer less
- than N:
-
- function randint(n) {
- return int(n * rand())
- }
-
- The multiplication produces a random real number at least 0, and
- less than N. We then make it an integer (using `int') between 0
- and `N-1'.
-
- Here is an example where a similar function is used to produce
- random integers between 1 and N:
-
- awk '
- # Function to roll a simulated die.
- function roll(n) { return 1 + int(rand() * n) }
-
- # Roll 3 six--sided dice and print total number of points.
- {
- printf("%d points\n", roll(6)+roll(6)+roll(6))
- }'
-
- *Note* that `rand()' starts generating numbers from the same
- point, or "seed", each time you run `awk'. This means that the
- same program will produce the same results each time you run it.
- The numbers are random within one `awk' run, but predictable
- from run to run. This is convenient for debugging, but if you
- want a program to do different things each time it is used, you
- must change the seed to a value that will be different in each
- run. To do this, use `srand'.
-
-`srand(X)'
- The function `srand(X)' sets the starting point, or "seed", for
- generating random numbers to the value X.
-
- Each seed value leads to a particular sequence of ``random''
- numbers. Thus, if you set the seed to the same value a second
- time, you will get the same sequence of ``random'' numbers again.
-
- If you omit the argument X, as in `srand()', then the current
- date and time of day are used for a seed. This is the way to
- get random numbers that are truly unpredictable.
-
- The return value of `srand()' is the previous seed. This makes
- it easy to keep track of the seeds for use in consistently
- reproducing sequences of random numbers.
-
-
-
-File: gawk-info, Node: String Functions, Next: I/O Functions, Prev: Numeric Functions, Up: Built-in
-
-Built--in Functions for String Manipulation
-===========================================
-
-`index(IN, FIND)'
- This searches the string IN for the first occurrence of the
- string FIND, and returns the position where that occurrence
- begins in the string IN. For example:
-
- awk 'BEGIN { print index("peanut", "an") }'
-
- prints `3'. If FIND is not found, `index' returns 0.
-
-`length(STRING)'
- This gives you the number of characters in STRING. If STRING is
- a number, the length of the digit string representing that
- number is returned. For example, `length("abcde")' is 5.
- Whereas, `length(15 * 35)' works out to 3. How? Well, 15 * 35
- = 525, and 525 is then converted to the string `"525"', which
- has three characters.
-
-`match(STRING, REGEXP)'
- The `match' function searches the string, STRING, for the
- longest, leftmost substring matched by the regular expression,
- REGEXP. It returns the character position, or "index", of where
- that substring begins (1, if it starts at the beginning of
- STRING). If no match if found, it returns 0.
-
- The `match' function sets the special variable `RSTART' to the
- index. It also sets the special variable `RLENGTH' to the
- length of the matched substring. If no match is found, `RSTART'
- is set to 0, and `RLENGTH' to -1.
-
- For example:
-
- awk '{
- if ($1 == "FIND")
- regex = $2
- else {
- where = match($0, regex)
- if (where)
- print "Match of", regex, "found at", where, "in", $0
- }
- }'
-
- This program looks for lines that match the regular expression
- stored in the variable `regex'. This regular expression can be
- changed. If the first word on a line is `FIND', `regex' is
- changed to be the second word on that line. Therefore, given:
-
- FIND fo*bar
- My program was a foobar
- But none of it would doobar
- FIND Melvin
- JF+KM
- This line is property of The Reality Engineering Co.
- This file was created by Melvin.
-
- `awk' prints:
-
- Match of fo*bar found at 18 in My program was a foobar
- Match of Melvin found at 26 in This file was created by Melvin.
-
-`split(STRING, ARRAY, FIELD_SEPARATOR)'
- This divides STRING up into pieces separated by FIELD_SEPARATOR,
- and stores the pieces in ARRAY. The first piece is stored in
- `ARRAY[1]', the second piece in `ARRAY[2]', and so forth. The
- string value of the third argument, FIELD_SEPARATOR, is used as
- a regexp to search for to find the places to split STRING. If
- the FIELD_SEPARATOR is omitted, the value of `FS' is used.
- `split' returns the number of elements created.
-
- The `split' function, then, splits strings into pieces in a
- manner similar to the way input lines are split into fields.
- For example:
-
- split("auto-da-fe", a, "-")
-
- splits the string `auto-da-fe' into three fields using `-' as
- the separator. It sets the contents of the array `a' as follows:
-
- a[1] = "auto"
- a[2] = "da"
- a[3] = "fe"
-
- The value returned by this call to `split' is 3.
-
-`sprintf(FORMAT, EXPRESSION1,...)'
- This returns (without printing) the string that `printf' would
- have printed out with the same arguments (*note Printf::.). For
- example:
-
- sprintf("pi = %.2f (approx.)", 22/7)
-
- returns the string `"pi = 3.14 (approx.)"'.
-
-`sub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)'
- The `sub' function alters the value of TARGET_VARIABLE. It
- searches this value, which should be a string, for the leftmost
- substring matched by the regular expression, REGEXP, extending
- this match as far as possible. Then the entire string is
- changed by replacing the matched text with REPLACEMENT_STRING.
- The modified string becomes the new value of TARGET_VARIABLE.
-
- This function is peculiar because TARGET_VARIABLE is not simply
- used to compute a value, and not just any expression will do: it
- must be a variable, field or array reference, so that `sub' can
- store a modified value there. If this argument is omitted, then
- the default is to use and alter `$0'.
-
- For example:
-
- str = "water, water, everywhere"
- sub(/at/, "ith", str)
-
- sets `str' to `"wither, water, everywhere"', by replacing the
- leftmost, longest occurrence of `at' with `ith'.
-
- The `sub' function returns the number of substitutions made
- (either one or zero).
-
- The special character, `&', in the replacement string,
- REPLACEMENT_STRING, stands for the precise substring that was
- matched by REGEXP. (If the regexp can match more than one
- string, then this precise substring may vary.) For example:
-
- awk '{ sub(/candidate/, "& and his wife"); print }'
-
- will change the first occurrence of ``candidate'' to ``candidate
- and his wife'' on each input line.
-
- The effect of this special character can be turned off by
- preceding it with a backslash (`\&'). To include a backslash in
- the replacement string, it too must be preceded with a (second)
- backslash.
-
- Note: if you use `sub' with a third argument that is not a
- variable, field or array element reference, then it will still
- search for the pattern and return 0 or 1, but the modified
- string is thrown away because there is no place to put it. For
- example:
-
- sub(/USA/, "United States", "the USA and Canada")
-
- will indeed produce a string `"the United States and Canada"',
- but there will be no way to use that string!
-
-`gsub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)'
- This is similar to the `sub' function, except `gsub' replaces
- *all* of the longest, leftmost, *non--overlapping* matching
- substrings it can find. The ``g'' in `gsub' stands for
- "global", which means replace *everywhere*. For example:
-
- awk '{ gsub(/Britain/, "United Kingdom"); print }'
-
- replaces all occurrences of the string `Britain' with `United
- Kingdom' for all input records.
-
- The `gsub' function returns the number of substitutions made.
- If the variable to be searched and altered, TARGET_VARIABLE, is
- omitted, then the entire input record, `$0', is used.
-
- The characters `&' and `\' are special in `gsub' as they are in
- `sub' (see immediately above).
-
-`substr(STRING, START, LENGTH)'
- This returns a LENGTH--character--long substring of STRING,
- starting at character number START. The first character of a
- string is character number one. For example,
- `substr("washington", 5, 3)' returns `"ing"'.
-
- If LENGTH is not present, this function returns the whole suffix
- of STRING that begins at character number START. For example,
- `substr("washington", 5)' returns `"ington"'.
-
-
-
-File: gawk-info, Node: I/O Functions, Prev: String Functions, Up: Built-in
-
-Built--in Functions for I/O to Files and Commands
-=================================================
-
-`close(FILENAME)'
- Close the file FILENAME. The argument may alternatively be a
- shell command that was used for redirecting to or from a pipe;
- then the pipe is closed.
-
- *Note Close Input::, regarding closing input files and pipes.
- *Note Close Output::, regarding closing output files and pipes.
-
-`system(COMMAND)'
- The system function allows the user to execute operating system
- commands and then return to the `awk' program. The `system'
- function executes the command given by the string value of
- COMMAND. It returns, as its value, the status returned by the
- command that was executed. This is known as returning the "exit
- status".
-
- For example, if the following fragment of code is put in your
- `awk' program:
-
- END {
- system("mail -s 'awk run done' operator < /dev/null")
- }
-
- the system operator will be sent mail when the `awk' program
- finishes processing input and begins its end--of--input
- processing.
-
- Note that much the same result can be obtained by redirecting
- `print' or `printf' into a pipe. However, if your `awk' program
- is interactive, this function is useful for cranking up large
- self--contained programs, such as a shell or an editor.
-
-
-
-File: gawk-info, Node: User-defined, Next: Special, Prev: Built-in, Up: Top
-
-User--defined Functions
-***********************
-
-Complicated `awk' programs can often be simplified by defining your
-own functions. User--defined functions can be called just like
-built--in ones (*note Function Calls::.), but it is up to you to
-define them--to tell `awk' what they should do.
-
-* Menu:
-
-* Definition Syntax:: How to write definitions and what they mean.
-* Function Example:: An example function definition and what it does.
-* Function Caveats:: Things to watch out for.
-* Return Statement:: Specifying the value a function returns.
-
-
-
-File: gawk-info, Node: Definition Syntax, Next: Function Example, Up: User-defined
-
-Syntax of Function Definitions
-==============================
-
-The definition of a function named NAME looks like this:
-
- function NAME (PARAMETER-LIST) {
- BODY-OF-FUNCTION
- }
-
-A valid function name is like a valid variable name: a sequence of
-letters, digits and underscores, not starting with a digit.
-
-Such function definitions can appear anywhere between the rules of
-the `awk' program. The general format of an `awk' program, then, is
-now modified to include sequences of rules *and* user--defined
-function definitions.
-
-The function definition need not precede all the uses of the function.
-This is because `awk' reads the entire program before starting to
-execute any of it.
-
-The PARAMETER-LIST is a list of the function's "local" variable
-names, separated by commas. Within the body of the function, local
-variables refer to arguments with which the function is called. If
-the function is called with fewer arguments than it has local
-variables, this is not an error; the extra local variables are simply
-set as the null string.
-
-The local variable values hide or "shadow" any variables of the same
-names used in the rest of the program. The shadowed variables are
-not accessible in the function definition, because there is no way to
-name them while their names have been taken away for the local
-variables. All other variables used in the `awk' program can be
-referenced or set normally in the function definition.
-
-The local variables last only as long as the function is executing.
-Once the function finishes, the shadowed variables come back.
-
-The BODY-OF-FUNCTION part of the definition is the most important
-part, because this is what says what the function should actually *do*.
-The local variables exist to give the body a way to talk about the
-arguments.
-
-Functions may be "recursive", i.e., they can call themselves, either
-directly, or indirectly (via calling a second function that calls the
-first again).
-
-The keyword `function' may also be written `func'.
-
-
-
-File: gawk-info, Node: Function Example, Next: Function Caveats, Prev: Definition Syntax, Up: User-defined
-
-Function Definition Example
-===========================
-
-Here is an example of a user--defined function, called `myprint',
-that takes a number and prints it in a specific format.
-
- function myprint(num)
- {
- printf "%6.3g\n", num
- }
-
-To illustrate, let's use the following `awk' rule to use, or "call",
-our `myprint' function:
-
- $3 > 0 { myprint($3) }'
-
-This program prints, in our special format, all the third fields that
-contain a positive number in our input. Therefore, when given:
-
- 1.2 3.4 5.6 7.8
- 9.10 11.12 13.14 15.16
- 17.18 19.20 21.22 23.24
-
-this program, using our function to format the results, will print:
-
- 5.6
- 13.1
- 21.2
-
-Here is a rather contrived example of a recursive function. It
-prints a string backwards:
-
- function rev (str, len) {
- if (len == 0) {
- printf "\n"
- return
- }
- printf "%c", substr(str, len, 1)
- rev(str, len - 1)
- }
-
-
-
-File: gawk-info, Node: Function Caveats, Next: Return Statement, Prev: Function Example, Up: User-defined
-
-Caveats of Function Calling
-===========================
-
-*Note* that there cannot be any blanks between the function name and
-the left parenthesis of the argument list, when calling a function.
-This is so `awk' can tell you are not trying to concatenate the value
-of a variable with the value of an expression inside the parentheses.
-
-When a function is called, it is given a *copy* of the values of its
-arguments. This is called "passing by value". The caller may use a
-variable as the expression for the argument, but the called function
-does not know this: all it knows is what value the argument had. For
-example, if you write this code:
-
- foo = "bar"
- z = myfunc(foo)
-
-then you should not think of the argument to `myfunc' as being ``the
-variable `foo'''. Instead, think of the argument as the string
-value, `"bar"'.
-
-If the function `myfunc' alters the values of its local variables,
-this has no effect on any other variables. In particular, if
-`myfunc' does this:
-
- function myfunc (win) {
- print win
- win = "zzz"
- print win
- }
-
-to change its first argument variable `win', this *does not* change
-the value of `foo' in the caller. The role of `foo' in calling
-`myfunc' ended when its value, `"bar"', was computed. If `win' also
-exists outside of `myfunc', this definition will not change it--that
-value is shadowed during the execution of `myfunc' and cannot be seen
-or changed from there.
-
-However, when arrays are the parameters to functions, they are *not*
-copied. Instead, the array itself is made available for direct
-manipulation by the function. This is usually called "passing by
-reference". Changes made to an array parameter inside the body of a
-function *are* visible outside that function. *This can be very
-dangerous if you don't watch what you are doing.* For example:
-
- function changeit (array, ind, nvalue) {
- array[ind] = nvalue
- }
-
- BEGIN {
- a[1] = 1 ; a[2] = 2 ; a[3] = 3
- changeit(a, 2, "two")
- printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
- }
-
-will print `a[1] = 1, a[2] = two, a[3] = 3', because the call to
-`changeit' stores `"two"' in the second element of `a'.
-
-
-
-File: gawk-info, Node: Return Statement, Prev: Function Caveats, Up: User-defined
-
-The `return' statement
-======================
-
-The body of a user--defined function can contain a `return' statement.
-This statement returns control to the rest of the `awk' program. It
-can also be used to return a value for use in the rest of the `awk'
-program. It looks like:
-
- `return EXPRESSION'
-
-The EXPRESSION part is optional. If it is omitted, then the returned
-value is undefined and, therefore, unpredictable.
-
-A `return' statement with no value expression is assumed at the end
-of every function definition. So if control reaches the end of the
-function definition, then the function returns an unpredictable value.
-
-Here is an example of a user--defined function that returns a value
-for the largest number among the elements of an array:
-
- function maxelt (vec, i, ret) {
- for (i in vec) {
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- }
- return ret
- }
-
-You call `maxelt' with one argument, an array name. The local
-variables `i' and `ret' are not intended to be arguments; while there
-is nothing to stop you from passing two or three arguments to
-`maxelt', the results would be strange.
-
-When writing a function definition, it is conventional to separate
-the parameters from the local variables with extra spaces, as shown
-above in the definition of `maxelt'.
-
-Here is a program that uses, or calls, our `maxelt' function. This
-program loads an array, calls `maxelt', and then reports the maximum
-number in that array:
-
- awk '
- function maxelt (vec, i, ret) {
- for (i in vec) {
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- }
- return ret
- }
-
- # Load all fields of each record into nums.
- {
- for(i = 1; i <= NF; i++)
- nums[NR, i] = $i
- }
-
- END {
- print maxelt(nums)
- }'
-
-Given the following input:
-
- 1 5 23 8 16
- 44 3 5 2 8 26
- 256 291 1396 2962 100
- -6 467 998 1101
- 99385 11 0 225
-
-our program tells us (predictably) that:
-
- 99385
-
-is the largest number in our array.
-
-
-
-File: gawk-info, Node: Special, Next: Sample Program, Prev: User-defined, Up: Top
-
-Special Variables
-*****************
-
-Most `awk' variables are available for you to use for your own
-purposes; they will never change except when your program assigns
-them, and will never affect anything except when your program
-examines them.
-
-A few variables have special meanings. Some of them `awk' examines
-automatically, so that they enable you to tell `awk' how to do
-certain things. Others are set automatically by `awk', so that they
-carry information from the internal workings of `awk' to your program.
-
-Most of these variables are also documented in the chapters where
-their areas of activity are described.
-
-* Menu:
-
-* User-modified:: Special variables that you change to control `awk'.
-
-* Auto-set:: Special variables where `awk' gives you information.
-
-
-
-File: gawk-info, Node: User-modified, Next: Auto-set, Up: Special
-
-Special Variables That Control `awk'
-====================================
-
-This is a list of the variables which you can change to control how
-`awk' does certain things.
-
-`FS'
- `FS' is the input field separator (*note Field Separators::.).
- The value is a regular expression that matches the separations
- between fields in an input record.
-
- The default value is `" "', a string consisting of a single
- space. As a special exception, this value actually means that
- any sequence of spaces and tabs is a single separator. It also
- causes spaces and tabs at the beginning or end of a line to be
- ignored.
-
- You can set the value of `FS' on the command line using the `-F'
- option:
-
- awk -F, 'PROGRAM' INPUT-FILES
-
-`OFMT'
- This string is used by `awk' to control conversion of numbers to
- strings (*note Conversion::.). It works by being passed, in
- effect, as the first argument to the `sprintf' function. Its
- default value is `"%.6g"'.
-
-`OFS'
- This is the output field separator (*note Output Separators::.).
- It is output between the fields output by a `print' statement.
- Its default value is `" "', a string consisting of a single space.
-
-`ORS'
- This is the output record separator (*note Output
- Separators::.). It is output at the end of every `print'
- statement. Its default value is the newline character, often
- represented in `awk' programs as `\n'.
-
-`RS'
- This is `awk''s record separator (*note Records::.). Its
- default value is a string containing a single newline character,
- which means that an input record consists of a single line of
- text.
-
-`SUBSEP'
- `SUBSEP' is a subscript separator (*note Multi-dimensional::.).
- It has the default value of `"\034"', and is used to separate
- the parts of the name of a multi--dimensional array. Thus, if
- you access `foo[12,3]', it really accesses `foo["12\0343"]'.
-
-
-
-File: gawk-info, Node: Auto-set, Prev: User-modified, Up: Special
-
-Special Variables That Convey Information to You
-================================================
-
-This is a list of the variables that are set automatically by `awk'
-on certain occasions so as to provide information for your program.
-
-`ARGC'
-`ARGV'
- The command--line arguments available to `awk' are stored in an
- array called `ARGV'. `ARGC' is the number of command--line
- arguments present. `ARGV' is indexed from zero to `ARGC' - 1.
- For example:
-
- awk '{ print ARGV[$1] }' inventory-shipped BBS-list
-
- In this example, `ARGV[0]' contains `"awk"', `ARGV[1]' contains
- `"inventory-shipped"', and `ARGV[2]' contains `"BBS-list"'.
- `ARGC' is 3, one more than the index of the last element in
- `ARGV' since the elements are numbered from zero.
-
- Notice that the `awk' program is not treated as an argument.
- The `-f' `FILENAME' option, and the `-F' option, are also not
- treated as arguments for this purpose.
-
- Variable assignments on the command line *are* treated as
- arguments, and do show up in the `ARGV' array.
-
- Your program can alter `ARGC' the elements of `ARGV'. Each time
- `awk' reaches the end of an input file, it uses the next element
- of `ARGV' as the name of the next input file. By storing a
- different string there, your program can change which files are
- read. You can use `-' to represent the standard input. By
- storing additional elements and incrementing `ARGC' you can
- cause additional files to be read.
-
- If you decrease the value of `ARGC', that eliminates input files
- from the end of the list. By recording the old value of `ARGC'
- elsewhere, your program can treat the eliminated arguments as
- something other than file names.
-
- To eliminate a file from the middle of the list, store the null
- string (`""') into `ARGV' in place of the file's name. As a
- special feature, `awk' ignores file names that have been
- replaced with the null string.
-
-`ENVIRON'
- This is an array that contains the values of the environment.
- The array indices are the environment variable names; the values
- are the values of the particular environment variables. For
- example, `ENVIRON["HOME"]' might be `/u/close'. Changing this
- array does not affect the environment passed on to any programs
- that `awk' may spawn via redirection or the `system' function.
- (This may not work under operating systems other than MS-DOS,
- Unix, or GNU.)
-
-`FILENAME'
- This is the name of the file that `awk' is currently reading.
- If `awk' is reading from the standard input (in other words,
- there are no files listed on the command line), `FILENAME' is
- set to `"-"'. `FILENAME' is changed each time a new file is
- read (*note Reading Files::.).
-
-`FNR'
- `FNR' is the current record number in the current file. `FNR'
- is incremented each time a new record is read (*note Getline::.).
- It is reinitialized to 0 each time a new input file is started.
-
-`NF'
- `NF' is the number of fields in the current input record. `NF'
- is set each time a new record is read, when a new field is
- created, or when $0 changes (*note Fields::.).
-
-`NR'
- This is the number of input records `awk' has processed since
- the beginning of the program's execution. (*note Records::.).
- `NR' is set each time a new record is read.
-
-`RLENGTH'
- `RLENGTH' is the length of the string matched by the `match'
- function (*note String Functions::.). `RLENGTH' is set by
- invoking the `match' function. Its value is the length of the
- matched string, or -1 if no match was found.
-
-`RSTART'
- `RSTART' is the start of the string matched by the `match'
- function (*note String Functions::.). `RSTART' is set by
- invoking the `match' function. Its value is the position of the
- string where the matched string starts, or 0 if no match was
- found.
-
-
-
-File: gawk-info, Node: Sample Program, Next: Notes, Prev: Special, Up: Top
-
-Sample Program
-**************
-
-The following example is a complete `awk' program, which prints the
-number of occurrences of each word in its input. It illustrates the
-associative nature of `awk' arrays by using strings as subscripts.
-It also demonstrates the `for X in ARRAY' construction. Finally, it
-shows how `awk' can be used in conjunction with other utility
-programs to do a useful task of some complexity with a minimum of
-effort. Some explanations follow the program listing.
-
- awk '
- # Print list of word frequencies
- {
- for (i = 1; i <= NF; i++)
- freq[$i]++
- }
-
- END {
- for (word in freq)
- printf "%s\t%d\n", word, freq[word]
- }'
-
-The first thing to notice about this program is that it has two
-rules. The first rule, because it has an empty pattern, is executed
-on every line of the input. It uses `awk''s field--accessing
-mechanism (*note Fields::.) to pick out the individual words from the
-line, and the special variable `NF' (*note Special::.) to know how
-many fields are available.
-
-For each input word, an element of the array `freq' is incremented to
-reflect that the word has been seen an additional time.
-
-The second rule, because it has the pattern `END', is not executed
-until the input has been exhausted. It prints out the contents of
-the `freq' table that has been built up inside the first action.
-
-Note that this program has several problems that would prevent it
-from being useful by itself on real text files:
-
- * Words are detected using the `awk' convention that fields are
- separated by whitespace and that other characters in the input
- (except newlines) don't have any special meaning to `awk'. This
- means that punctuation characters count as part of words.
-
- * The `awk' language considers upper and lower case characters to
- be distinct. Therefore, `foo' and `Foo' will not be treated by
- this program as the same word. This is undesirable since in
- normal text, words are capitalized if they begin sentences, and
- a frequency analyzer should not be sensitive to that.
-
- * The output does not come out in any useful order. You're more
- likely to be interested in which words occur most frequently, or
- having an alphabetized table of how frequently each word occurs.
-
-The way to solve these problems is to use other operating system
-utilities to process the input and output of the `awk' script.
-Suppose the script shown above is saved in the file `frequency.awk'.
-Then the shell command:
-
- tr A-Z a-z < file1 | tr -cd 'a-z\012' \
- | awk -f frequency.awk \
- | sort +1 -nr
-
-produces a table of the words appearing in `file1' in order of
-decreasing frequency.
-
-The first `tr' command in this pipeline translates all the upper case
-characters in `file1' to lower case. The second `tr' command deletes
-all the characters in the input except lower case characters and
-newlines. The second argument to the second `tr' is quoted to
-protect the backslash in it from being interpreted by the shell. The
-`awk' program reads this suitably massaged data and produces a word
-frequency table, which is not ordered.
-
-The `awk' script's output is now sorted by the `sort' command and
-printed on the terminal. The options given to `sort' in this example
-specify to sort by the second field of each input line (skipping one
-field), that the sort keys should be treated as numeric quantities
-(otherwise `15' would come before `5'), and that the sorting should
-be done in descending (reverse) order.
-
-See the general operating system documentation for more information
-on how to use the `tr' and `sort' commands.
-
-
-
-File: gawk-info, Node: Notes, Next: Glossary, Prev: Sample Program, Up: Top
-
-Implementation Notes
-********************
-
-This appendix contains information mainly of interest to implementors
-and maintainers of `gawk'. Everything in it applies specifically to
-`gawk', and not to other implementations.
-
-* Menu:
-
-* Extensions:: Things`gawk' does that Unix `awk' does not.
-
-* Future Extensions:: Things likely to appear in a future release.
-
-* Improvements:: Suggestions for future improvements.
-
-* Manual Improvements:: Suggestions for improvements to this manual.
-
-
-
-File: gawk-info, Node: Extensions, Next: Future Extensions, Up: Notes
-
-GNU Extensions to the AWK Language
-==================================
-
-Several new features are in a state of flux. They are described here
-merely to document them somewhat, but they will probably change. We
-hope they will be incorporated into other versions of `awk', too.
-
-All of these features can be turned off either by compiling `gawk'
-with `-DSTRICT', or by invoking `gawk' as `awk'.
-
-The `AWKPATH' environment variable
- When opening a file supplied via the `-f' option, if the
- filename does not contain a `/', `gawk' will perform a "path
- search" for the file, similar to that performed by the shell.
- `gawk' gets its search path from the `AWKPATH' environment
- variable. If that variable does not exist, it uses the default
- path `".:/usr/lib/awk:/usr/local/lib/awk"'.
-
-Case Independent Matching
- Two new operators have been introduced, `~~', and `!~~'. These
- perform regular expression match and no-match operations that
- are case independent. In other words, `A' and `a' would both
- match `/a/'.
-
-The `-i' option
- This option causes the `~' and `!~' operators to behave like the
- `~~' and `!~~' operators described above.
-
-The `-v' option
- This option prints version information for this particular copy
- of `gawk'. This is so you can determine if your copy of `gawk'
- is up to date with respect to whatever the Free Software
- Foundation is currently distributing. It may disappear in a
- future version of `gawk'.
-
-
-
-File: gawk-info, Node: Future Extensions, Next: Improvements, Prev: Extensions, Up: Notes
-
-Extensions Likely To Appear In A Future Release
-===============================================
-
-Here are some more extensions that indicate the directions we are
-currently considering for `gawk'. Like the previous section, this
-section is also subject to change. None of these are implemented yet.
-
-The `IGNORECASE' special variable
- If `IGNORECASE' is non--zero, then *all* regular expression
- matching will be done in a case--independent fashion. The `-i'
- option and the `~~' and `!~~' operators will go away, as this
- mechanism generalizes those facilities.
-
-More Escape Sequences
- The ANSI C `\a', and `\x' escape sequences will be recognized.
- Unix `awk' does not recognize `\v', although `gawk' does.
-
-`RS' as a regexp
- The meaning of `RS' will be generalized along the lines of `FS'.
-
-Transliteration Functions
- We are planning on adding `toupper' and `tolower' functions
- which will take string arguments, and return strings where the
- case of each letter has been transformed to upper-- or
- lower--case respectively.
-
-Access To System File Descriptors
- `gawk' will recognize the special file names `/dev/stdin',
- `/dev/stdout', `/dev/stderr', and `/dev/fd/N' internally. These
- will allow access to inherited file descriptors from within an
- `awk' program.
-
-
-
-File: gawk-info, Node: Improvements, Next: Manual Improvements, Prev: Future Extensions, Up: Notes
-
-Suggestions for Future Improvements
-===================================
-
-Here are some projects that would--be `gawk' hackers might like to
-take on. They vary in size from a few days to a few weeks of
-programming, depending on which one you choose and how fast a
-programmer you are. Please send any improvements you write to the
-maintainers at the GNU project.
-
- 1. State machine regexp matcher: At present, `gawk' uses the
- backtracking regular expression matcher from the GNU subroutine
- library. If a regexp is really going to be used a lot of times,
- it is faster to convert it once to a description of a finite
- state machine, then run a routine simulating that machine every
- time you want to match the regexp. You could use the matching
- routines used by GNU `egrep'.
-
- 2. Compilation of `awk' programs: `gawk' uses a `Bison'
- (YACC--like) parser to convert the script given it into a syntax
- tree; the syntax tree is then executed by a simple recursive
- evaluator. Both of these steps incur a lot of overhead, since
- parsing can be slow (especially if you also do the previous
- project and convert regular expressions to finite state machines
- at compile time) and the recursive evaluator performs many
- procedure calls to do even the simplest things.
-
- It should be possible for `gawk' to convert the script's parse
- tree into a C program which the user would then compile, using
- the normal C compiler and a special `gawk' library to provide
- all the needed functions (regexps, fields, associative arrays,
- type coercion, and so on).
-
- An easier possibility might be for an intermediate phase of
- `awk' to convert the parse tree into a linear byte code form
- like the one used in GNU Emacs Lisp. The recursive evaluator
- would then be replaced by a straight line byte code interpreter
- that would be intermediate in speed between running a compiled
- program and doing what `gawk' does now.
-
-
-
-File: gawk-info, Node: Manual Improvements, Prev: Improvements, Up: Notes
-
-Suggestions For Future Improvements of This Manual
-==================================================
-
- 1. An error message section has not been included in this version
- of the manual. Perhaps some nice beta testers will document
- some of the messages for the future.
-
- 2. A summary page has not been included, as the ``man'', or help,
- page that comes with the `gawk' code should suffice.
-
- GNU only supports Info, so this manual itself should contain
- whatever forms of information it would be useful to have on an
- Info summary page.
-
- 3. A function and variable index has not been included as we are
- not sure what to put in it.
-
- 4. A section summarizing the differences between V7 `awk' and
- System V Release 4 `awk' would be useful for long--time `awk'
- hackers.
-
-
-
-File: gawk-info, Node: Glossary, Next: Index, Prev: Notes, Up: Top
-
-Glossary
-********
-
-Action
- A series of `awk' statements attached to a rule. If the rule's
- pattern matches an input record, the `awk' language executes the
- rule's action. Actions are always enclosed in curly braces.
-
-Amazing `awk' assembler
- Henry Spencer at the University of Toronto wrote a retargetable
- assembler completely as `awk' scripts. It is thousands of lines
- long, including machine descriptions for several 8--bit
- microcomputers. It is distributed with `gawk' and is a good
- example of a program that would have been better written in
- another language.
-
-Assignment
- An `awk' expression that changes the value of some `awk'
- variable or data object. An object that you can assign to is
- called an "lvalue".
-
-Built-in function
- The `awk' language provides built--in functions that perform
- various numerical and string computations. Examples are `sqrt'
- (for the square root of a number) and `substr' (for a substring
- of a string).
-
-C
- The system programming language that most of GNU is written in.
- The `awk' programming language has C--like syntax, and this
- manual points out similarities between `awk' and C when
- appropriate.
-
-Compound statement
- A series of `awk' statements, enclosed in curly braces.
- Compound statements may be nested.
-
-Concatenation
- Concatenating two strings means sticking them together, one
- after another, giving a new string. For example, the string
- `foo' concatenated with the string `bar' gives the string
- `foobar'.
-
-Conditional expression
- A relation that is either true or false, such as `(a < b)'.
- Conditional expressions are used in `if' and `while' statements,
- and in patterns to select which input records to process.
-
-Curly braces
- The characters `{' and `}'. Curly braces are used in `awk' for
- delimiting actions, compound statements, and function bodies.
-
-Data objects
- These are numbers and strings of characters. Numbers are
- converted into strings and vice versa, as needed.
-
-Escape Sequences
- A special sequence of characters used for describing
- non--printable characters, such as `\n' for newline, or `\033'
- for the ASCII ESC (escape) character.
-
-Field
- When `awk' reads an input record, it splits the record into
- pieces separated by whitespace (or by a separator regexp which
- you can change by setting the special variable `FS'). Such
- pieces are called fields.
-
-Format
- Format strings are used to control the appearance of output in
- the `printf' statement. Also, data conversions from numbers to
- strings are controlled by the format string contained in the
- special variable `OFMT'.
-
-Function
- A specialized group of statements often used to encapsulate
- general or program--specific tasks. `awk' has a number of
- built--in functions, and also allows you to define your own.
-
-`gawk'
- The GNU implementation of `awk'.
-
-`awk' language
- The language in which `awk' programs are written.
-
-`awk' program
- An `awk' program consists of a series of "patterns" and
- "actions", collectively known as "rules". For each input record
- given to the program, the program's rules are all processed in
- turn. `awk' programs may also contain function definitions.
-
-`awk' script
- Another name for an `awk' program.
-
-Input record
- A single chunk of data read in by `awk'. Usually, an `awk'
- input record consists of one line of text.
-
-Keyword
- In the `awk' language, a keyword is a word that has special
- meaning. Keywords are reserved and may not be used as variable
- names.
-
- The keywords are: `if', `else', `while', `do...while', `for',
- `for...in', `break', `continue', `delete', `next', `function',
- `func', and `exit'.
-
-Lvalue
- An expression that can appear on the left side of an assignment
- operator. In most languages, lvalues can be variables or array
- elements. In `awk', a field designator can also be used as an
- lvalue.
-
-Number
- A numeric valued data object. The `gawk' implementation uses
- double precision floating point to represent numbers.
-
-Pattern
- Patterns tell `awk' which input records are interesting to which
- rules.
-
- A pattern is an arbitrary conditional expression against which
- input is tested. If the condition is satisfied, the pattern is
- said to "match" the input record. A typical pattern might
- compare the input record against a regular expression.
-
-Range (of input lines)
- A sequence of consecutive lines from the input file. A pattern
- can specify ranges of input lines for `awk' to process, or it
- can specify single lines.
-
-Recursion
- When a function calls itself, either directly or indirectly. If
- this isn't clear, refer to the entry for ``recursion''.
-
-Redirection
- Redirection means performing input from other than the standard
- input stream, or output to other than the standard output stream.
-
- You can redirect the output of the `print' and `printf'
- statements to a file or a system command, using the `>', `>>',
- and `|' operators. You can redirect input to the `getline'
- statement using the `<' and `|' operators.
-
-Regular Expression
- See ``regexp''.
-
-Regexp
- Short for "regular expression". A regexp is a pattern that
- denotes a set of strings, possibly an infinite set. For
- example, the regexp `R.*xp' matches any string starting with the
- letter `R' and ending with the letters `xp'. In `awk', regexps
- are used in patterns and in conditional expressions.
-
-Rule
- A segment of an `awk' program, that specifies how to process
- single input records. A rule consists of a "pattern" and an
- "action". `awk' reads an input record; then, for each rule, if
- the input record satisfies the rule's pattern, `awk' executes
- the rule's action. Otherwise, the rule does nothing for that
- input record.
-
-Special Variable
- The variables `ARGC', `ARGV', `ENVIRON', `FILENAME', `FNR',
- `FS', `NF', `NR', `OFMT', `OFS', `ORS', `RLENGTH', `RSTART',
- `RS', `SUBSEP', have special meaning to `awk'. Changing some of
- them affects `awk''s running environment.
-
-Stream Editor
- A program that reads records from an input stream and processes
- them one or more at a time. This is in contrast with batch
- programs, which may expect to read their input files in entirety
- before starting to do anything, and with interactive programs,
- which require input from the user.
-
-String
- A datum consisting of a sequence of characters, such as `I am a
- string'. Constant strings are written with double--quotes in
- the `awk' language, and may contain "escape sequences".
-
-Whitespace
- A sequence of blank or tab characters occurring inside an input
- record or a string.
-
-
-
-File: gawk-info, Node: Index, Prev: Glossary, Up: Top
-
-Index
-*****
-
-* Menu:
-
-* #!: Executable Scripts.
-* -f option: Long.
-* `$NF', last field in record: Fields.
-* `$' (field operator): Fields.
-* `>>': Redirection.
-* `>': Redirection.
-* `BEGIN', special pattern: BEGIN/END.
-* `END', special pattern: BEGIN/END.
-* `awk' language: This Manual.
-* `awk' program: This Manual.
-* `break' statement: Break.
-* `close' statement for input: Close Input.
-* `close' statement for output: Close Output.
-* `continue' statement: Continue.
-* `delete' statement: Delete.
-* `exit' statement: Exit.
-* `for (x in ...)': Scanning an Array.
-* `for' statement: For.
-* `if' statement: If.
-* `next' statement: Next.
-* `print $0': Very Simple.
-* `printf' statement, format of: Basic Printf.
-* `printf', format-control characters: Format-Control.
-* `printf', modifiers: Modifiers.
-* `print' statement: Print.
-* `return' statement: Return Statement.
-* `while' statement: While.
-* `|': Redirection.
-* `BBS-list' file: The Files.
-* `inventory-shipped' file: The Files.
-* Accessing fields: Fields.
-* Acronym: History.
-* Action, curly braces: Actions.
-* Action, curly braces: Getting Started.
-* Action, default: Very Simple.
-* Action, definition of: Getting Started.
-* Action, general: Actions.
-* Action, separating statements: Actions.
-* Applications of `awk': When.
-* Arguments in function call: Function Calls.
-* Arguments, Command Line: Command Line.
-* Arithmetic operators: Arithmetic Ops.
-* Array assignment: Assigning Elements.
-* Array reference: Reference to Elements.
-* Arrays: Array Intro.
-* Arrays, definition of: Array Intro.
-* Arrays, deleting an element: Delete.
-* Arrays, determining presence of elements: Reference to Elements.
-* Arrays, multi-dimensional subscripts: Multi-dimensional.
-* Arrays, special `for' statement: Scanning an Array.
-* Assignment operators: Assignment Ops.
-* Associative arrays: Array Intro.
-* Backslash Continuation: Statements/Lines.
-* Basic function of `gawk': Getting Started.
-* Body of a loop: While.
-* Boolean expressions: Boolean Ops.
-* Boolean operators: Boolean Ops.
-* Boolean patterns: Boolean.
-* Built-in functions, list of: Built-in.
-* Built-in variables: Variables.
-* Calling a function: Function Calls.
-* Case sensitivity and gawk: Read Terminal.
-* Changing contents of a field: Changing Fields.
-* Changing the record separator: Records.
-* Closing files and pipes: Close Output.
-* Command Line: Command Line.
-* Command line formats: Running gawk.
-* Command line, setting `FS' on: Field Separators.
-* Comments: Comments.
-* Comparison expressions: Comparison Ops.
-* Comparison expressions as patterns: Comparison Patterns.
-* Compound statements: Actions.
-* Computed Regular Expressions: Regexp Usage.
-* Concatenation: Concatenation.
-* Conditional Patterns: Conditional Patterns.
-* Conditional expression: Conditional Exp.
-* Constants, types of: Constants.
-* Continuing statements on the next line: Statements/Lines.
-* Conversion of strings and numbers: Conversion.
-* Curly braces: Actions.
-* Curly braces: Getting Started.
-* Default action: Very Simple.
-* Default pattern: Very Simple.
-* Deleting elements of arrays: Delete.
-* Differences between `gawk' and `awk': Arithmetic Ops.
-* Differences between `gawk' and `awk': Constants.
-* Documenting `awk' programs: Comments.
-* Dynamic Regular Expressions: Regexp Usage.
-* Element assignment: Assigning Elements.
-* Element of array: Reference to Elements.
-* Emacs Lisp: When.
-* Empty pattern: Empty.
-* Escape sequence notation: Constants.
-* Examining fields: Fields.
-* Executable Scripts: Executable Scripts.
-* Expression, conditional: Conditional Exp.
-* Expressions: Actions.
-* Expressions, boolean: Boolean Ops.
-* Expressions, comparison: Comparison Ops.
-* Field separator, `FS': Field Separators.
-* Field separator, choice of: Field Separators.
-* Field separator, setting on command line: Field Separators.
-* Field, changing contents of: Changing Fields.
-* Fields: Fields.
-* Fields, negative-numbered: Non-Constant Fields.
-* Fields, semantics of: Field Separators.
-* Fields, separating: Field Separators.
-* Format specifier: Format-Control.
-* Format string: Basic Printf.
-* Formatted output: Printf.
-* Function call: Function Calls.
-* Function definitions: Actions.
-* Functions, user-defined: User-defined.
-* General input: Reading Files.
-* History of `awk': History.
-* How gawk works: Two Rules.
-* Increment operators: Increment Ops.
-* Input file, sample: The Files.
-* Input, `getline' function: Getline.
-* Input, general: Reading Files.
-* Input, multiple line records: Multiple.
-* Input, standard: Read Terminal.
-* Input, standard: Reading Files.
-* Interaction of `awk' with other programs: I/O Functions.
-* Invocation of `gawk': Command Line.
-* Language, `awk': This Manual.
-* Loop: While.
-* Loops, breaking out of: Break.
-* Lvalue: Assignment Ops.
-* Manual, using this: This Manual.
-* Metacharacters: Regexp Operators.
-* Mod function, semantics of: Arithmetic Ops.
-* Modifiers (in format specifiers): Modifiers.
-* Multiple line records: Multiple.
-* Multiple passes over data: Command Line.
-* Multiple statements on one line: Statements/Lines.
-* Negative-numbered fields: Non-Constant Fields.
-* Number of fields, `NF': Fields.
-* Number of records, `FNR': Records.
-* Number of records, `NR': Records.
-* Numerical constant: Constants.
-* Numerical value: Constants.
-* One-liners: One-liners.
-* Operator, Ternary: Conditional Patterns.
-* Operators, `$': Fields.
-* Operators, arithmetic: Arithmetic Ops.
-* Operators, assignment: Assignment Ops.
-* Operators, boolean: Boolean Ops.
-* Operators, increment: Increment Ops.
-* Operators, regular expression matching: Regexp Usage.
-* Operators, relational: Comparison Ops.
-* Operators, relational: Comparison Patterns.
-* Operators, string: Concatenation.
-* Operators, string-matching: Regexp Usage.
-* Options, Command Line: Command Line.
-* Output: Printing.
-* Output field separator, `OFS': Output Separators.
-* Output record separator, `ORS': Output Separators.
-* Output redirection: Redirection.
-* Output, formatted: Printf.
-* Output, piping: Redirection.
-* Passes, Multiple: Command Line.
-* Pattern, case sensitive: Read Terminal.
-* Pattern, comparison expressions: Comparison Patterns.
-* Pattern, default: Very Simple.
-* Pattern, definition of: Getting Started.
-* Pattern, empty: Empty.
-* Pattern, regular expressions: Regexp.
-* Patterns, `BEGIN': BEGIN/END.
-* Patterns, `END': BEGIN/END.
-* Patterns, Conditional: Conditional Patterns.
-* Patterns, boolean: Boolean.
-* Patterns, definition of: Patterns.
-* Patterns, types of: Patterns.
-* Pipes for output: Redirection.
-* Printing, general: Printing.
-* Program, `awk': This Manual.
-* Program, Self contained: Executable Scripts.
-* Program, definition of: Getting Started.
-* Programs, documenting: Comments.
-* Range pattern: Ranges.
-* Reading files, `getline' function: Getline.
-* Reading files, general: Reading Files.
-* Reading files, multiple line records: Multiple.
-* Record separator, `RS': Records.
-* Records, multiple line: Multiple.
-* Redirection of output: Redirection.
-* Reference to array: Reference to Elements.
-* Regexp: Regexp.
-* Regular Expressions, Computed: Regexp Usage.
-* Regular Expressions, Dynamic: Regexp Usage.
-* Regular expression matching operators: Regexp Usage.
-* Regular expression, metacharacters: Regexp Operators.
-* Regular expressions as patterns: Regexp.
-* Regular expressions, field separators and: Field Separators.
-* Relational operators: Comparison Patterns.
-* Relational operators: Comparison Ops.
-* Removing elements of arrays: Delete.
-* Rule, definition of: Getting Started.
-* Running gawk programs: Running gawk.
-* Sample input file: The Files.
-* Scanning an array: Scanning an Array.
-* Script, definition of: Getting Started.
-* Scripts, Executable: Executable Scripts.
-* Scripts, Shell: Executable Scripts.
-* Self contained Programs: Executable Scripts.
-* Separator character, choice of: Field Separators.
-* Shell Scripts: Executable Scripts.
-* Single quotes, why they are needed: One-shot.
-* Special variables, user modifiable: User-modified.
-* Standard input: Read Terminal.
-* Standard input: Reading Files.
-* Statements: Statements.
-* Statements: Actions.
-* String constants: Constants.
-* String operators: Concatenation.
-* String value: Constants.
-* String-matching operators: Regexp Usage.
-* Subscripts, multi-dimensional in arrays: Multi-dimensional.
-* Ternary Operator: Conditional Patterns.
-* Use of comments: Comments.
-* User-defined functions: User-defined.
-* User-defined variables: Variables.
-* Uses of `awk': Preface.
-* Using this manual: This Manual.
-* Variables, built-in: Variables.
-* Variables, user-defined: Variables.
-* What is `awk': Preface.
-* When to use `awk': When.
-* file, `awk' program: Long.
-* patterns, range: Ranges.
-* program file: Long.
-* regexp search operators: Regexp Usage.
-* running long programs: Long.
-
-
- 
-Tag Table:
-Node: Top918
-Node: Preface2804
-Node: History4267
-Node: License5644
-Node: This Manual18989
-Node: The Files20330
-Node: Getting Started22914
-Node: Very Simple24249
-Node: Two Rules26030
-Node: More Complex28066
-Node: Running gawk30908
-Node: One-shot31827
-Node: Read Terminal32945
-Node: Long33862
-Node: Executable Scripts34991
-Node: Command Line36534
-Node: Comments40168
-Node: Statements/Lines41067
-Node: When43498
-Node: Reading Files45420
-Node: Records47119
-Node: Fields49902
-Node: Non-Constant Fields52789
-Node: Changing Fields54591
-Node: Field Separators57302
-Node: Multiple62004
-Node: Assignment Options64393
-Node: Getline65608
-Node: Close Input74958
-Node: Printing76023
-Node: Print76748
-Node: Print Examples78712
-Node: Output Separators80751
-Node: Redirection82417
-Node: Close Output85886
-Node: Printf88132
-Node: Basic Printf88908
-Node: Format-Control90261
-Node: Modifiers91806
-Node: Printf Examples93108
-Node: One-liners95707
-Node: Patterns97642
-Node: Empty100130
-Node: Regexp100402
-Node: Regexp Usage101173
-Node: Regexp Operators102947
-Node: Comparison Patterns107890
-Node: Ranges109336
-Node: BEGIN/END110722
-Node: Boolean113151
-Node: Conditional Patterns115605
-Node: Actions116105
-Node: Expressions117435
-Node: Constants119124
-Node: Variables121097
-Node: Arithmetic Ops122454
-Node: Concatenation123840
-Node: Comparison Ops124569
-Node: Boolean Ops125973
-Node: Assignment Ops128266
-Node: Increment Ops131817
-Node: Conversion134112
-Node: Conditional Exp136066
-Node: Function Calls137384
-Node: Statements139939
-Node: If141253
-Node: While142627
-Node: Do144232
-Node: For145265
-Node: Break148306
-Node: Continue149848
-Node: Next151476
-Node: Exit152985
-Node: Arrays154514
-Node: Array Intro155624
-Node: Reference to Elements159227
-Node: Assigning Elements161115
-Node: Array Example161615
-Node: Scanning an Array163336
-Node: Delete165642
-Node: Multi-dimensional166529
-Node: Multi-scanning169746
-Node: Built-in171303
-Node: Numeric Functions172806
-Node: String Functions176601
-Node: I/O Functions183717
-Node: User-defined185189
-Node: Definition Syntax185834
-Node: Function Example187928
-Node: Function Caveats189034
-Node: Return Statement191386
-Node: Special193612
-Node: User-modified194478
-Node: Auto-set196511
-Node: Sample Program200558
-Node: Notes204316
-Node: Extensions204909
-Node: Future Extensions206490
-Node: Improvements207922
-Node: Manual Improvements210034
-Node: Glossary210928
-Node: Index217934
-
-End Tag Table
diff --git a/gawk-info-1 b/gawk-info-1
deleted file mode 100644
index b40278a4..00000000
--- a/gawk-info-1
+++ /dev/null
@@ -1,1231 +0,0 @@
-Info file gawk-info, produced by Makeinfo, -*- Text -*- from input
-file gawk.texinfo.
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of
-this manual under the conditions for verbatim copying, provided that
-the entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be stated in a
-translation approved by the Foundation.
-
-
-
-File: gawk-info, Node: Top, Next: Preface, Prev: (dir), Up: (dir)
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them; it
-contains the following chapters:
-
-* Menu:
-
-* Preface:: What you can do with `awk'; brief history
- and acknowledgements.
-
-* License:: Your right to copy and distribute `gawk'.
-
-* This Manual:: Using this manual.
-
- Includes sample input files that you can use.
-
-* Getting Started:: A basic introduction to using `awk'.
- How to run an `awk' program. Command line syntax.
-
-* Reading Files:: How to read files and manipulate fields.
-
-* Printing:: How to print using `awk'. Describes the
- `print' and `printf' statements.
- Also describes redirection of output.
-
-* One-liners:: Short, sample `awk' programs.
-
-* Patterns:: The various types of patterns explained in detail.
-
-* Actions:: The various types of actions are introduced here.
- Describes expressions and the various operators in
- detail. Also describes comparison expressions.
-
-* Statements:: The various control statements are described in
- detail.
-
-* Arrays:: The description and use of arrays. Also includes
- array--oriented control statements.
-
-* User-defined:: User--defined functions are described in detail.
-
-* Built-in:: The built--in functions are summarized here.
-
-* Special:: The special variables are summarized here.
-
-* Sample Program:: A sample `awk' program with a complete explanation.
-
-* Notes:: Something about the implementation of `gawk'.
-
-* Glossary:: An explanation of some unfamiliar terms.
-
-* Index::
-
-
-
-File: gawk-info, Node: Preface, Next: License, Prev: Top, Up: Top
-
-Preface
-*******
-
-If you are like many computer users, you frequently would like to
-make changes in various text files wherever certain patterns appear,
-or extract data from parts of certain lines while discarding the
-rest. To write a program to do this in a language such as C or
-Pascal is a time--consuming inconvenience that may take many lines of
-code. The job may be easier with `awk'.
-
-The `awk' utility interprets a special--purpose programming language
-that makes it possible to handle simple data--reformatting jobs
-easily with just a few lines of code.
-
-The GNU implementation of `awk' is called `gawk'; it is fully upward
-compatible with the System V Release 3.1 and later version of `awk'.
-All properly written `awk' programs should work with `gawk'. So we
-usually don't distinguish between `gawk' and other `awk'
-implementations in this manual.
-
-This manual teaches you what `awk' does and how you can use `awk'
-effectively. You should already be familiar with basic,
-general--purpose, operating system commands such as `ls'. Using
-`awk' you can:
-
- * manage small, personal databases,
-
- * generate reports,
-
- * validate data,
-
- * produce indexes, and perform other document preparation tasks,
-
- * even experiment with algorithms that can be adapted later to
- other computer languages!
-
-* Menu:
-
-* History:: The history of gawk and awk. Acknowledgements.
-
-
-
-File: gawk-info, Node: History, Up: Preface
-
-History of `awk' and `gawk'
-===========================
-
-The name `awk' comes from the initials of its designers: Alfred V.
-Aho, Peter J. Weinberger, and Brian W. Kernighan. The original
-version of `awk' was written in 1977. In 1985 a new version made the
-programming language more powerful, introducing user--defined
-functions, multiple input streams, and computed regular expressions.
-
-The GNU implementation, `gawk', was written in 1986 by Paul Rubin and
-Jay Fenlason, with advice from Richard Stallman. John Woods
-contributed parts of the code as well. In 1988, David Trueman, with
-help from Arnold Robbins, reworked `gawk' for compatibility with the
-newer `awk'.
-
-Many people need to be thanked for their assistance in producing this
-manual. Jay Fenlason contributed many ideas and sample programs.
-Richard Mlynarik and Robert Chassell gave helpful comments on drafts
-of this manual. The paper ``A Supplemental Document for `awk''' by
-John W. Pierce of the Chemistry Department at UC San Diego,
-pinpointed several issues relevant both to `awk' implementation and
-to this manual, that would otherwise have escaped us.
-
-Finally, we would like to thank Brian Kernighan of Bell Labs for
-invaluable assistance during the testing and debugging of `gawk', and
-for help in clarifying several points about the language.
-
-
-
-File: gawk-info, Node: License, Next: This Manual, Prev: Preface, Up: Top
-
-GNU GENERAL PUBLIC LICENSE
-**************************
-
- Version 1, February 1989
-
- Copyright (C) 1989 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
-
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-=========
-
- The license agreements of most software companies try to keep users
-at the mercy of those companies. By contrast, our General Public
-License is intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-The General Public License applies to the Free Software Foundation's
-software and to any other program whose authors commit to using it.
-You can use it for your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Specifically, the General Public License is designed to make
-sure that you have the freedom to give away or sell copies of free
-software, that you receive source code or can get it if you want it,
-that you can change the software or use pieces of it in new free
-programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if
-you distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of a such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must tell them their rights.
-
- We protect your rights with two steps: (1) copyright the software,
-and (2) offer you this license which gives you legal permission to
-copy, distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on,
-we want its recipients to know that what they have is not the
-original, so that any problems introduced by others will not reflect
-on the original authors' reputations.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 1. This License Agreement applies to any program or other work
- which contains a notice placed by the copyright holder saying it
- may be distributed under the terms of this General Public
- License. The ``Program'', below, refers to any such program or
- work, and a ``work based on the Program'' means either the
- Program or any work containing the Program or a portion of it,
- either verbatim or with modifications. Each licensee is
- addressed as ``you''.
-
- 2. You may copy and distribute verbatim copies of the Program's
- source code as you receive it, in any medium, provided that you
- conspicuously and appropriately publish on each copy an
- appropriate copyright notice and disclaimer of warranty; keep
- intact all the notices that refer to this General Public License
- and to the absence of any warranty; and give any other
- recipients of the Program a copy of this General Public License
- along with the Program. You may charge a fee for the physical
- act of transferring a copy.
-
- 3. You may modify your copy or copies of the Program or any portion
- of it, and copy and distribute such modifications under the
- terms of Paragraph 1 above, provided that you also do the
- following:
-
- * cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change; and
-
- * cause the whole of any work that you distribute or publish,
- that in whole or in part contains the Program or any part
- thereof, either with or without modifications, to be
- licensed at no charge to all third parties under the terms
- of this General Public License (except that you may choose
- to grant warranty protection to some or all third parties,
- at your option).
-
- * If the modified program normally reads commands
- interactively when run, you must cause it, when started
- running for such interactive use in the simplest and most
- usual way, to print or display an announcement including an
- appropriate copyright notice and a notice that there is no
- warranty (or else, saying that you provide a warranty) and
- that users may redistribute the program under these
- conditions, and telling the user how to view a copy of this
- General Public License.
-
- * You may charge a fee for the physical act of transferring a
- copy, and you may at your option offer warranty protection
- in exchange for a fee.
-
- Mere aggregation of another independent work with the Program
- (or its derivative) on a volume of a storage or distribution
- medium does not bring the other work under the scope of these
- terms.
-
- 4. You may copy and distribute the Program (or a portion or
- derivative of it, under Paragraph 2) in object code or
- executable form under the terms of Paragraphs 1 and 2 above
- provided that you also do one of the following:
-
- * accompany it with the complete corresponding
- machine-readable source code, which must be distributed
- under the terms of Paragraphs 1 and 2 above; or,
-
- * accompany it with a written offer, valid for at least three
- years, to give any third party free (except for a nominal
- charge for the cost of distribution) a complete
- machine-readable copy of the corresponding source code, to
- be distributed under the terms of Paragraphs 1 and 2 above;
- or,
-
- * accompany it with the information you received as to where
- the corresponding source code may be obtained. (This
- alternative is allowed only for noncommercial distribution
- and only if you received the program in object code or
- executable form alone.)
-
- Source code for a work means the preferred form of the work for
- making modifications to it. For an executable file, complete
- source code means all the source code for all modules it
- contains; but, as a special exception, it need not include
- source code for modules which are standard libraries that
- accompany the operating system on which the executable file
- runs, or for standard header files or definitions files that
- accompany that operating system.
-
- 5. You may not copy, modify, sublicense, distribute or transfer the
- Program except as expressly provided under this General Public
- License. Any attempt otherwise to copy, modify, sublicense,
- distribute or transfer the Program is void, and will
- automatically terminate your rights to use the Program under
- this License. However, parties who have received copies, or
- rights to use copies, from you under this General Public License
- will not have their licenses terminated so long as such parties
- remain in full compliance.
-
- 6. By copying, distributing or modifying the Program (or any work
- based on the Program) you indicate your acceptance of this
- license to do so, and all its terms and conditions.
-
- 7. Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from
- the original licensor to copy, distribute or modify the Program
- subject to these terms and conditions. You may not impose any
- further restrictions on the recipients' exercise of the rights
- granted herein.
-
- 8. The Free Software Foundation may publish revised and/or new
- versions of the General Public License from time to time. Such
- new versions will be similar in spirit to the present version,
- but may differ in detail to address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
- Program specifies a version number of the license which applies
- to it and ``any later version'', you have the option of
- following the terms and conditions either of that version or of
- any later version published by the Free Software Foundation. If
- the Program does not specify a version number of the license,
- you may choose any version ever published by the Free Software
- Foundation.
-
- 9. If you wish to incorporate parts of the Program into other free
- programs whose distribution conditions are different, write to
- the author to ask for permission. For software which is
- copyrighted by the Free Software Foundation, write to the Free
- Software Foundation; we sometimes make exceptions for this. Our
- decision will be guided by the two goals of preserving the free
- status of all derivatives of our free software and of promoting
- the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 10. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
- WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
- LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
- HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS''
- WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
- INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
- ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE
- COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 11. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
- WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
- MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
- LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
- INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS
- OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
- YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH
- ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
- ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
-Appendix: How to Apply These Terms to Your New Programs
-=======================================================
-
- If you develop a new program, and you want it to be of the greatest
-possible use to humanity, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these
-terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the ``copyright'' line and a pointer to where the full notice is found.
-
- ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
- Copyright (C) 19YY NAME OF AUTHOR
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Also add information on how to contact you by electronic and paper
-mail.
-
-If the program is interactive, make it output a short notice like
-this when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
- The hypothetical commands `show w' and `show c' should show the
-appropriate parts of the General Public License. Of course, the
-commands you use may be called something other than `show w' and
-`show c'; they could even be mouse-clicks or menu items--whatever
-suits your program.
-
-You should also get your employer (if you work as a programmer) or
-your school, if any, to sign a ``copyright disclaimer'' for the
-program, if necessary. Here a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the
- program `Gnomovision' (a program to direct compilers to make passes
- at assemblers) written by James Hacker.
-
- SIGNATURE OF TY COON, 1 April 1989
- Ty Coon, President of Vice
-
-That's all there is to it!
-
-
-
-File: gawk-info, Node: This Manual, Next: Getting Started, Prev: License, Up: Top
-
-Using This Manual
-*****************
-
-The term `gawk' refers to a program (a version of `awk') developed by
-the Free Software Foundation, and to the language you use to tell it
-what to do. When we need to be careful, we call the program ``the
-`awk' utility'' and the language ``the `awk' language''. The purpose
-of this manual is to explain the `awk' language and how to run the
-`awk' utility.
-
-The term "`awk' program" refers to a program written by you in the
-`awk' programming language.
-
-*Note Getting Started::, for the bare essentials you need to know to
-start using `awk'.
-
-Useful ``one--liners'' are included to give you a feel for the `awk'
-language (*note One-liners::.).
-
-A sizable sample `awk' program has been provided for you (*note
-Sample Program::.).
-
-If you find terms that you aren't familiar with, try looking them up
-in the glossary (*note Glossary::.).
-
-Most of the time complete `awk' programs are used as examples, but in
-some of the more advanced sections, only the part of the `awk'
-program that illustrates the concept being described is shown.
-
-* Menu:
-
-This chapter contains the following sections:
-
-* The Files:: Sample data files for use in the `awk' programs
- illustrated in this manual.
-
-
-
-File: gawk-info, Node: The Files, Up: This Manual
-
-Input Files for the Examples
-============================
-
-This manual contains many sample programs. The data for many of
-those programs comes from two files. The first file, called
-`BBS-list', represents a list of computer bulletin board systems and
-information about those systems.
-
-Each line of this file is one "record". Each record contains the
-name of a computer bulletin board, its phone number, the board's baud
-rate, and a code for the number of hours it is operational. An `A'
-in the last column means the board operates 24 hours all week. A `B'
-in the last column means the board operates evening and weekend
-hours, only. A `C' means the board operates only on weekends.
-
- aardvark 555-5553 1200/300 B
- alpo-net 555-3412 2400/1200/300 A
- barfly 555-7685 1200/300 A
- bites 555-1675 2400/1200/300 A
- camelot 555-0542 300 C
- core 555-2912 1200/300 C
- fooey 555-1234 2400/1200/300 B
- foot 555-6699 1200/300 B
- macfoo 555-6480 1200/300 A
- sdace 555-3430 2400/1200/300 A
- sabafoo 555-2127 1200/300 C
-
-The second data file, called `inventory-shipped', represents
-information about shipments during the year. Each line of this file
-is also one record. Each record contains the month of the year, the
-number of green crates shipped, the number of red boxes shipped, the
-number of orange bags shipped, and the number of blue packages
-shipped, respectively.
-
- Jan 13 25 15 115
- Feb 15 32 24 226
- Mar 15 24 34 228
- Apr 31 52 63 420
- May 16 34 29 208
- Jun 31 42 75 492
- Jul 24 34 67 436
- Aug 15 34 47 316
- Sep 13 55 37 277
- Oct 29 54 68 525
- Nov 20 87 82 577
- Dec 17 35 61 401
-
- Jan 21 36 64 620
- Feb 26 58 80 652
- Mar 24 75 70 495
- Apr 21 70 74 514
-
-If you are reading this in GNU Emacs using Info, you can copy the
-regions of text showing these sample files into your own test files.
-This way you can try out the examples shown in the remainder of this
-document. You do this by using the command `M-x write-region' to
-copy text from the Info file into a file for use with `awk' (see your
-``GNU Emacs Manual'' for more information). Using this information,
-create your own `BBS-list' and `inventory-shipped' files, and
-practice what you learn in this manual.
-
-
-
-File: gawk-info, Node: Getting Started, Next: Reading Files, Prev: This Manual, Up: Top
-
-Getting Started With `awk'
-**************************
-
-The basic function of `awk' is to search files for lines (or other
-units of text) that contain certain patterns. When a line matching
-any of those patterns is found, `awk' performs specified actions on
-that line. Then `awk' keeps processing input lines until the end of
-the file is reached.
-
-An `awk' "program" or "script" consists of a series of "rules".
-(They may also contain "function definitions", but that is an
-advanced feature, so let's ignore it for now. *Note User-defined::.)
-
-A rule contains a "pattern", an "action", or both. Actions are
-enclosed in curly braces to distinguish them from patterns.
-Therefore, an `awk' program is a sequence of rules in the form:
-
- PATTERN { ACTION }
- PATTERN { ACTION }
- ...
-
- * Menu:
-
-* Very Simple:: A very simple example.
-* Two Rules:: A less simple one--line example with two rules.
-* More Complex:: A more complex example.
-* Running gawk:: How to run gawk programs; includes command line syntax.
-* Comments:: Adding documentation to gawk programs.
-* Statements/Lines:: Subdividing or combining statements into lines.
-
-* When:: When to use gawk and when to use other things.
-
-
-
-File: gawk-info, Node: Very Simple, Next: Two Rules, Up: Getting Started
-
-A Very Simple Example
-=====================
-
-The following command runs a simple `awk' program that searches the
-input file `BBS-list' for the string of characters: `foo'. (A string
-of characters is usually called, quite simply, a "string".)
-
- awk '/foo/ { print $0 }' BBS-list
-
-When lines containing `foo' are found, they are printed, because
-`print $0' means print the current line. (Just `print' by itself
-also means the same thing, so we could have written that instead.)
-
-You will notice that slashes, `/', surround the string `foo' in the
-actual `awk' program. The slashes indicate that `foo' is a pattern
-to search for. This type of pattern is called a "regular
-expression", and is covered in more detail later (*note Regexp::.).
-There are single quotes around the `awk' program so that the shell
-won't interpret any of it as special shell characters.
-
-Here is what this program prints:
-
- fooey 555-1234 2400/1200/300 B
- foot 555-6699 1200/300 B
- macfoo 555-6480 1200/300 A
- sabafoo 555-2127 1200/300 C
-
-In an `awk' rule, either the pattern or the action can be omitted,
-but not both.
-
-If the pattern is omitted, then the action is performed for *every*
-input line.
-
-If the action is omitted, the default action is to print all lines
-that match the pattern. We could leave out the action (the print
-statement and the curly braces) in the above example, and the result
-would be the same: all lines matching the pattern `foo' would be
-printed. (By comparison, omitting the print statement but retaining
-the curly braces makes an empty action that does nothing; then no
-lines would be printed.)
-
-
-
-File: gawk-info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up: Getting Started
-
-An Example with Two Rules
-=========================
-
-The `awk' utility reads the input files one line at a time. For each
-line, `awk' tries the patterns of all the rules. If several patterns
-match then several actions are run, in the order in which they appear
-in the `awk' program. If no patterns match, then no actions are run.
-
-After processing all the rules (perhaps none) that match the line,
-`awk' reads the next line (however, *note Next::.). This continues
-until the end of the file is reached.
-
-For example, the `awk' program:
-
- /12/ { print $0 }
- /21/ { print $0 }
-
-contains two rules. The first rule has the string `12' as the
-pattern and `print $0' as the action. The second rule has the string
-`21' as the pattern and also has `print $0' as the action. Each
-rule's action is enclosed in its own pair of braces.
-
-This `awk' program prints every line that contains the string `12'
-*or* the string `21'. If a line contains both strings, it is printed
-twice, once by each rule.
-
-If we run this program on our two sample data files, `BBS-list' and
-`inventory-shipped', as shown here:
-
- awk '/12/ { print $0 }
- /21/ { print $0 }' BBS-list inventory-shipped
-
-we get the following output:
-
- aardvark 555-5553 1200/300 B
- alpo-net 555-3412 2400/1200/300 A
- barfly 555-7685 1200/300 A
- bites 555-1675 2400/1200/300 A
- core 555-2912 1200/300 C
- fooey 555-1234 2400/1200/300 B
- foot 555-6699 1200/300 B
- macfoo 555-6480 1200/300 A
- sdace 555-3430 2400/1200/300 A
- sabafoo 555-2127 1200/300 C
- sabafoo 555-2127 1200/300 C
- Jan 21 36 64 620
- Apr 21 70 74 514
-
-Note how the line in `BBS-list' beginning with `sabafoo' was printed
-twice, once for each rule.
-
-
-
-File: gawk-info, Node: More Complex, Next: Running gawk, Prev: Two Rules, Up: Getting Started
-
-A More Complex Example
-======================
-
-Here is an example to give you an idea of what typical `awk' programs
-do. This example shows how `awk' can be used to summarize, select,
-and rearrange the output of another utility. It uses features that
-haven't been covered yet, so don't worry if you don't understand all
-the details.
-
- ls -l | awk '$5 == "Nov" { sum += $4 }
- END { print sum }'
-
-This command prints the total number of bytes in all the files in the
-current directory that were last modified in November (of any year).
-(In the C shell you would need to type a semicolon and then a
-backslash at the end of the first line; in the Bourne shell you can
-type the example as shown.)
-
-The `ls -l' part of this example is a command that gives you a full
-listing of all the files in a directory, including file size and date.
-Its output looks like this:
-
- -rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile
- -rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h
- -rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h
- -rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y
- -rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c
- -rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c
- -rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c
- -rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c
-
-The first field contains read--write permissions, the second field
-contains the number of links to the file, and the third field
-identifies the owner of the file. The fourth field contains the size
-of the file in bytes. The fifth, sixth, and seventh fields contain
-the month, day, and time, respectively, that the file was last
-modified. Finally, the eighth field contains the name of the file.
-
-The `$5 == "Nov"' in our `awk' program is an expression that tests
-whether the fifth field of the output from `ls -l' matches the string
-`Nov'. Each time a line has the string `Nov' in its fifth field, the
-action `{ sum += $4 }' is performed. This adds the fourth field (the
-file size) to the variable `sum'. As a result, when `awk' has
-finished reading all the input lines, `sum' will be the sum of the
-sizes of files whose lines matched the pattern.
-
-After the last line of output from `ls' has been processed, the `END'
-pattern is executed, and the value of `sum' is printed. In this
-example, the value of `sum' would be 80600.
-
-These more advanced `awk' techniques are covered in later sections
-(*note Actions::.). Before you can move on to more advanced `awk'
-programming, you have to know how `awk' interprets your input and
-displays your output. By manipulating "fields" and using special
-"print" statements, you can produce some very useful and spectacular
-looking reports.
-
-
-
-File: gawk-info, Node: Running gawk, Next: Comments, Prev: More Complex, Up: Getting Started
-
-How to Run `awk' Programs
-=========================
-
-There are several ways to run an `awk' program. If the program is
-short, it is easiest to include it in the command that runs `awk',
-like this:
-
- awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ...
-
- where PROGRAM consists of a series of PATTERNS and ACTIONS, as
-described earlier.
-
-When the program is long, you would probably prefer to put it in a
-file and run it with a command like this:
-
- awk -f PROGRAM-FILE INPUT-FILE1 INPUT-FILE2 ...
-
- * Menu:
-
-* One-shot:: Running a short throw--away `awk' program.
-* Read Terminal:: Using no input files (input from terminal instead).
-* Long:: Putting permanent `awk' programs in files.
-* Executable Scripts:: Making self--contained `awk' programs.
-* Command Line:: How the `awk' command line is laid out.
-
-
-
-File: gawk-info, Node: One-shot, Next: Read Terminal, Up: Running gawk
-
-One--shot Throw--away `awk' Programs
-------------------------------------
-
-Once you are familiar with `awk', you will often type simple programs
-at the moment you want to use them. Then you can write the program
-as the first argument of the `awk' command, like this:
-
- awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ...
-
- where PROGRAM consists of a series of PATTERNS and ACTIONS, as
-described earlier.
-
-This command format tells the shell to start `awk' and use the
-PROGRAM to process records in the input file(s). There are single
-quotes around the PROGRAM so that the shell doesn't interpret any
-`awk' characters as special shell characters. They cause the shell
-to treat all of PROGRAM as a single argument for `awk'. They also
-allow PROGRAM to be more than one line long.
-
-This format is also useful for running short or medium--sized `awk'
-programs from shell scripts, because it avoids the need for a
-separate file for the `awk' program. A self--contained shell script
-is more reliable since there are no other files to misplace.
-
-
-
-File: gawk-info, Node: Read Terminal, Next: Long, Prev: One-shot, Up: Running gawk
-
-Running `awk' without Input Files
----------------------------------
-
-You can also use `awk' without any input files. If you type the
-command line:
-
- awk 'PROGRAM'
-
-then `awk' applies the PROGRAM to the "standard input", which usually
-means whatever you type on the terminal. This continues until you
-indicate end--of--file by typing `Control-d'.
-
-For example, if you type:
-
- awk '/th/'
-
-whatever you type next will be taken as data for that `awk' program.
-If you go on to type the following data,
-
- Kathy
- Ben
- Tom
- Beth
- Seth
- Karen
- Thomas
- `Control-d'
-
-then `awk' will print
-
- Kathy
- Beth
- Seth
-
-as matching the pattern `th'. Notice that it did not recognize
-`Thomas' as matching the pattern. The `awk' language is "case
-sensitive", and matches patterns *exactly*.
-
-
-
-File: gawk-info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
-
-Running Long Programs
----------------------
-
-Sometimes your `awk' programs can be very long. In this case it is
-more convenient to put the program into a separate file. To tell
-`awk' to use that file for its program, you type:
-
- awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ...
-
- The `-f' tells the `awk' utility to get the `awk' program from the
-file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For
-example, you could put the program:
-
- /th/
-
-into the file `th-prog'. Then the command:
-
- awk -f th-prog
-
-does the same thing as this one:
-
- awk '/th/'
-
-which was explained earlier (*note Read Terminal::.). Note that you
-don't usually need single quotes around the file name that you
-specify with `-f', because most file names don't contain any of the
-shell's special characters.
-
-If you want to identify your `awk' program files clearly as such, you
-can add the extension `.awk' to the filename. This doesn't affect
-the execution of the `awk' program, but it does make ``housekeeping''
-easier.
-
-
-
-File: gawk-info, Node: Executable Scripts, Next: Command Line, Prev: Long, Up: Running gawk
-
-Executable `awk' Programs
--------------------------
-
-(The following section assumes that you are already somewhat familiar
-with `awk'.)
-
-Once you have learned `awk', you may want to write self--contained
-`awk' scripts, using the `#!' script mechanism. You can do this on
-BSD Unix systems and GNU.
-
-For example, you could create a text file named `hello', containing
-the following (where `BEGIN' is a feature we have not yet discussed):
-
- #! /bin/awk -f
-
- # a sample awk program
-
- BEGIN { print "hello, world" }
-
-After making this file executable (with the `chmod' command), you can
-simply type:
-
- hello
-
-at the shell, and the system will arrange to run `awk' as if you had
-typed:
-
- awk -f hello
-
-Self--contained `awk' scripts are particularly useful for putting
-`awk' programs into production on your system, without your users
-having to know that they are actually using an `awk' program.
-
-If your system does not support the `#!' mechanism, you can get a
-similar effect using a regular shell script. It would look something
-like this:
-
- : a sample awk program
-
- awk 'PROGRAM' "$@"
-
-Using this technique, it is *vital* to enclose the PROGRAM in single
-quotes to protect it from interpretation by the shell. If you omit
-the quotes, only a shell wizard can predict the result.
-
-The `"$@"' causes the shell to forward all the command line arguments
-to the `awk' program, without interpretation.
-
-
-
-File: gawk-info, Node: Command Line, Prev: Executable Scripts, Up: Running gawk
-
-Details of the `awk' Command Line
----------------------------------
-
-(The following section assumes that you are already familiar with
-`awk'.)
-
-There are two ways to run `awk'. Here are templates for both of
-them; items enclosed in `[' and `]' in these templates are optional.
-
- awk [ -FFS ] [ -- ] 'PROGRAM' FILE ...
- awk [ -FFS ] -f SOURCE-FILE [ -f SOURCE-FILE ... ] [ -- ] FILE ...
-
- Options begin with a minus sign, and consist of a single character.
-The options and their meanings are as follows:
-
-`-FFS'
- This sets the `FS' variable to FS (*note Special::.). As a
- special case, if FS is `t', then `FS' will be set to the tab
- character (`"\t"').
-
-`-f SOURCE-FILE'
- Indicates that the `awk' program is to be found in SOURCE-FILE
- instead of in the first non--option argument.
-
-`--'
- This signals the end of the command line options. If you wish
- to specify an input file named `-f', you can precede it with the
- `--' argument to prevent the `-f' from being interpreted as an
- option. This handling of `--' follows the POSIX argument
- parsing conventions.
-
-Any other options will be flagged as invalid with a warning message,
-but are otherwise ignored.
-
-If the `-f' option is *not* used, then the first non--option command
-line argument is expected to be the program text.
-
-The `-f' option may be used more than once on the command line.
-`awk' will read its program source from all of the named files, as if
-they had been concatenated together into one big file. This is
-useful for creating libraries of `awk' functions. Useful functions
-can be written once, and then retrieved from a standard place,
-instead of having to be included into each individual program. You
-can still type in a program at the terminal and use library
-functions, by specifying `/dev/tty' as one of the arguments to a
-`-f'. Type your program, and end it with the keyboard end--of--file
-character `Control-d'.
-
-Any additional arguments on the command line are made available to
-your `awk' program in the `ARGV' array (*note Special::.). These
-arguments are normally treated as input files to be processed in the
-order specified. However, an argument that has the form VAR`='VALUE,
-means to assign the value VALUE to the variable VAR--it does not
-specify a file at all.
-
-Command line options and the program text (if present) are omitted
-from the `ARGV' array. All other arguments, including variable
-assignments, are included (*note Special::.).
-
-The distinction between file name arguments and variable--assignment
-arguments is made when `awk' is about to open the next input file.
-At that point in execution, it checks the ``file name'' to see
-whether it is really a variable assignment; if so, instead of trying
-to read a file it will, *at that point in the execution*, assign the
-variable.
-
-Therefore, the variables actually receive the specified values after
-all previously specified files have been read. In particular, the
-values of variables assigned in this fashion are *not* available
-inside a `BEGIN' rule (*note BEGIN/END::.), since such rules are run
-before `awk' begins scanning the argument list.
-
-The variable assignment feature is most useful for assigning to
-variables such as `RS', `OFS', and `ORS', which control input and
-output formats, before listing the data files. It is also useful for
-controlling state if multiple passes are needed over a data file.
-For example:
-
- awk 'pass == 1 { PASS 1 STUFF }
- pass == 2 { PASS 2 STUFF }' pass=1 datafile pass=2 datafile
-
-
-
-File: gawk-info, Node: Comments, Next: Statements/Lines, Prev: Running gawk, Up: Getting Started
-
-Comments in `awk' Programs
-==========================
-
-When you write a complicated `awk' program, you can put "comments" in
-the program file to help you remember what the program does, and how
-it works.
-
-A comment starts with the the sharp sign character, `#', and
-continues to the end of the line. The `awk' language ignores the
-rest of a line following a sharp sign. For example, we could have
-put the following into `th-prog':
-
- # This program finds records containing the pattern `th'. This is how
- # you continue comments on additional lines.
- /th/
-
-You can put comment lines into keyboard--composed throw--away `awk'
-programs also, but this usually isn't very useful; the purpose of a
-comment is to help yourself or another person understand the program
-at another time.
-
-
-
-File: gawk-info, Node: Statements/Lines, Next: When, Prev: Comments, Up: Getting Started
-
-`awk' Statements versus Lines
-=============================
-
-Most often, each line in an `awk' program is a separate statement or
-separate rule, like this:
-
- awk '/12/ { print $0 }
- /21/ { print $0 }' BBS-list inventory-shipped
-
-But sometimes statements can be more than one line, and lines can
-contain several statements.
-
-You can split a statement into multiple lines by inserting a newline
-after any of the following:
-
- , { ? : || &&
-
-Lines ending in `do' or `else' automatically have their statements
-continued on the following line(s). A newline at any other point
-ends the statement.
-
-If you would like to split a single statement into two lines at a
-point where a newline would terminate it, you can "continue" it by
-ending the first line with a backslash character, `\'. This is
-allowed absolutely anywhere in the statement, even in the middle of a
-string or regular expression. For example:
-
- awk '/This program is too long, so continue it\
- on the next line/ { print $1 }'
-
-We have generally not used backslash continuation in the sample
-programs in this manual. Since there is no limit on the length of a
-line, it is never strictly necessary; it just makes programs
-prettier. We have preferred to make them even more pretty by keeping
-the statements short. Backslash continuation is most useful when
-your `awk' program is in a separate source file, instead of typed in
-on the command line.
-
-*Warning: this does not work if you are using the C shell.*
-Continuation with backslash works for `awk' programs in files, and
-also for one--shot programs *provided* you are using the Bourne
-shell, the Korn shell, or the Bourne--again shell. But the C shell
-used on Berkeley Unix behaves differently! There, you must use two
-backslashes in a row, followed by a newline.
-
-When `awk' statements within one rule are short, you might want to
-put more than one of them on a line. You do this by separating the
-statements with semicolons, `;'. This also applies to the rules
-themselves. Thus, the above example program could have been written:
-
- /12/ { print $0 } ; /21/ { print $0 }
-
-*Note:* It is a new requirement that rules on the same line require
-semicolons as a separator in the `awk' language; it was done for
-consistency with the statements in the action part of rules.
-
-
-
-File: gawk-info, Node: When, Prev: Statements/Lines, Up: Getting Started
-
-When to Use `awk'
-=================
-
-What use is all of this to me, you might ask? Using additional
-operating system utilities, more advanced patterns, field separators,
-arithmetic statements, and other selection criteria, you can produce
-much more complex output. The `awk' language is very useful for
-producing reports from large amounts of raw data, like summarizing
-information from the output of standard operating system programs
-such as `ls'. (*Note A More Complex Example: More Complex.)
-
-Programs written with `awk' are usually much smaller than they would
-be in other languages. This makes `awk' programs easy to compose and
-use. Often `awk' programs can be quickly composed at your terminal,
-used once, and thrown away. Since `awk' programs are interpreted,
-you can avoid the usually lengthy edit--compile--test--debug cycle of
-software development.
-
-Complex programs have been written in `awk', including a complete
-retargetable assembler for 8--bit microprocessors (*note Glossary::.
-for more information) and a microcode assembler for a special purpose
-Prolog computer. However, `awk''s capabilities are strained by tasks
-of such complexity.
-
-If you find yourself writing `awk' scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated
-string or pattern matching capabilities. The shell is also good at
-string and pattern matching; in addition it allows powerful use of
-the standard utilities. More conventional languages like C, C++, or
-Lisp offer better facilities for system programming and for managing
-the complexity of large programs. Programs in these languages may
-require more lines of source code than the equivalent `awk' programs,
-but they will be easier to maintain and usually run more efficiently.
-
-
-
-File: gawk-info, Node: Reading Files, Next: Printing, Prev: Getting Started, Up: Top
-
-Reading Files (Input)
-*********************
-
-In the typical `awk' program, all input is read either from the
-standard input (usually the keyboard) or from files whose names you
-specify on the `awk' command line. If you specify input files, `awk'
-reads data from the first one until it reaches the end; then it reads
-the second file until it reaches the end, and so on. The name of the
-current input file can be found in the special variable `FILENAME'
-(*note Special::.).
-
-The input is split automatically into "records", and processed by the
-rules one record at a time. (Records are the units of text mentioned
-in the introduction; by default, a record is a line of text.) Each
-record read is split automatically into "fields", to make it more
-convenient for a rule to work on parts of the record under
-consideration.
-
-On rare occasions you will need to use the `getline' command, which
-can do explicit input from any number of files.
-
-* Menu:
-
-* Records:: Controlling how data is split into records.
-* Fields:: An introduction to fields.
-* Field Separators:: The field separator and how to change it.
-* Multiple:: Reading multi--line records.
-
-* Assignment Options:: Setting variables on the command line and a summary
- of command line syntax. This is an advanced method
- of input.
-
-* Getline:: Reading files under explicit program control
- using the `getline' function.
-* Close Input:: Closing an input file (so you can read from
- the beginning once more).
-
-
-
-File: gawk-info, Node: Records, Next: Fields, Up: Reading Files
-
-How Input is Split into Records
-===============================
-
-The `awk' language divides its input into records and fields.
-Records are separated from each other by the "record separator". By
-default, the record separator is the "newline" character. Therefore,
-normally, a record is a line of text.
-
-Sometimes you may want to use a different character to separate your
-records. You can use different characters by changing the special
-variable `RS'.
-
-The value of `RS' is a string that says how to separate records; the
-default value is `"\n"', the string of just a newline character.
-This is why lines of text are the default record. Although `RS' can
-have any string as its value, only the first character of the string
-will be used as the record separator. The other characters are
-ignored. `RS' is exceptional in this regard; `awk' uses the full
-value of all its other special variables.
-
-The value of `RS' is changed by "assigning" it a new value (*note
-Assignment Ops::.). One way to do this is at the beginning of your
-`awk' program, before any input has been processed, using the special
-`BEGIN' pattern (*note BEGIN/END::.). This way, `RS' is changed to
-its new value before any input is read. The new value of `RS' is
-enclosed in quotation marks. For example:
-
- awk 'BEGIN { RS = "/" } ; { print $0 }' BBS-list
-
-changes the value of `RS' to `/', the slash character, before reading
-any input. Records are now separated by a slash. The second rule in
-the `awk' program (the action with no pattern) will proceed to print
-each record. Since each `print' statement adds a newline at the end
-of its output, the effect of this `awk' program is to copy the input
-with each slash changed to a newline.
-
-Another way to change the record separator is on the command line,
-using the variable--assignment feature (*note Command Line::.).
-
- awk '...' RS="/" SOURCE-FILE
-
-`RS' will be set to `/' before processing SOURCE-FILE.
-
-The empty string (a string of no characters) has a special meaning as
-the value of `RS': it means that records are separated only by blank
-lines. *Note Multiple::, for more details.
-
-The `awk' utility keeps track of the number of records that have been
-read so far from the current input file. This value is stored in a
-special variable called `FNR'. It is reset to zero when a new file
-is started. Another variable, `NR', is the total number of input
-records read so far from all files. It starts at zero but is never
-automatically reset to zero.
-
-If you change the value of `RS' in the middle of an `awk' run, the
-new value is used to delimit subsequent records, but the record
-currently being processed (and records already finished) are not
-affected.
-
-
diff --git a/gawk-info-2 b/gawk-info-2
deleted file mode 100644
index a228c5b9..00000000
--- a/gawk-info-2
+++ /dev/null
@@ -1,1265 +0,0 @@
-Info file gawk-info, produced by Makeinfo, -*- Text -*- from input
-file gawk.texinfo.
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of
-this manual under the conditions for verbatim copying, provided that
-the entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be stated in a
-translation approved by the Foundation.
-
-
-
-File: gawk-info, Node: Fields, Next: Non-Constant Fields, Prev: Records, Up: Reading Files
-
-Examining Fields
-================
-
-When `awk' reads an input record, the record is automatically
-separated or "parsed" by the interpreter into pieces called "fields".
-By default, fields are separated by whitespace, like words in a line.
-Whitespace in `awk' means any string of one or more spaces and/or
-tabs; other characters such as newline, formfeed, and so on, that are
-considered whitespace by other languages are *not* considered
-whitespace by `awk'.
-
-The purpose of fields is to make it more convenient for you to refer
-to these pieces of the record. You don't have to use them--you can
-operate on the whole record if you wish--but fields are what make
-simple `awk' programs so powerful.
-
-To refer to a field in an `awk' program, you use a dollar--sign, `$',
-followed by the number of the field you want. Thus, `$1' refers to
-the first field, `$2' to the second, and so on. For example, suppose
-the following is a line of input:
-
- This seems like a pretty nice example.
-
- Here the first field, or `$1', is `This'; the second field, or `$2',
-is `seems'; and so on. Note that the last field, `$7', is
-`example.'. Because there is no space between the `e' and the `.',
-the period is considered part of the seventh field.
-
-No matter how many fields there are, the last field in a record can
-be represented by `$NF'. So, in the example above, `$NF' would be
-the same as `$7', which is `example.'. Why this works is explained
-below (*note Non-Constant Fields::.). If you try to refer to a field
-beyond the last one, such as `$8' when the record has only 7 fields,
-you get the empty string.
-
-Plain `NF', with no `$', is a special variable whose value is the
-number of fields in the current record.
-
-`$0', which looks like an attempt to refer to the zeroth field, is a
-special case: it represents the whole input record. This is what you
-would use when you aren't interested in fields.
-
-Here are some more examples:
-
- awk '$1 ~ /foo/ { print $0 }' BBS-list
-
-This example contains the "matching" operator `~' (*note Comparison
-Ops::.). Using this operator, all records in the file `BBS-list'
-whose first field contains the string `foo' are printed.
-
-By contrast, the following example:
-
- awk '/foo/ { print $1, $NF }' BBS-list
-
-looks for the string `foo' in *the entire record* and prints the
-first field and the last field for each input record containing the
-pattern.
-
-The following program will search the system password file, and print
-the entries for users who have no password.
-
- awk -F: '$2 == ""' /etc/passwd
-
-This program uses the `-F' option on the command line to set the file
-separator. (Fields in `/etc/passwd' are separated by colons. The
-second field represents a user's encrypted password, but if the field
-is empty, that user has no password.)
-
-
-
-File: gawk-info, Node: Non-Constant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files
-
-Non-constant Field Numbers
-==========================
-
-The number of a field does not need to be a constant. Any expression
-in the `awk' language can be used after a `$' to refer to a field.
-The `awk' utility evaluates the expression and uses the "numeric
-value" as a field number. Consider this example:
-
- awk '{ print $NR }'
-
-Recall that `NR' is the number of records read so far: 1 in the first
-record, 2 in the second, etc. So this example will print the first
-field of the first record, the second field of the second record, and
-so on. For the twentieth record, field number 20 will be printed;
-most likely this will make a blank line, because the record will not
-have 20 fields.
-
-Here is another example of using expressions as field numbers:
-
- awk '{ print $(2*2) }' BBS-list
-
-The `awk' language must evaluate the expression `(2*2)' and use its
-value as the field number to print. The `*' sign represents
-multiplication, so the expression `2*2' evaluates to 4. This
-example, then, prints the hours of operation (the fourth field) for
-every line of the file `BBS-list'.
-
-When you use non--constant field numbers, you may ask for a field
-with a negative number. This always results in an empty string, just
-like a field whose number is too large for the input record. For
-example, `$(1-4)' would try to examine field number -3; it would
-result in an empty string.
-
-If the field number you compute is zero, you get the entire record.
-
-The number of fields in the current record is stored in the special
-variable `NF' (*note Special::.). The expression `$NF' is not a
-special feature: it is the direct consequence of evaluating `NF' and
-using its value as a field number.
-
-
-
-File: gawk-info, Node: Changing Fields, Next: Field Separators, Prev: Non-Constant Fields, Up: Reading Files
-
-Changing the Contents of a Field
-================================
-
-You can change the contents of a field as seen by `awk' within an
-`awk' program; this changes what `awk' perceives as the current input
-record. (The actual input is untouched: `awk' never modifies the
-input file.)
-
-Look at this example:
-
- awk '{ $3 = $2 - 10; print $2, $3 }' inventory-shipped
-
-The `-' sign represents subtraction, so this program reassigns field
-three, `$3', to be the value of field two minus ten, ``$2' - 10'.
-(*Note Arithmetic Ops::.) Then field two, and the new value for
-field three, are printed.
-
-In order for this to work, the text in field `$2' must make sense as
-a number; the string of characters must be converted to a number in
-order for the computer to do arithmetic on it. The number resulting
-from the subtraction is converted back to a string of characters
-which then becomes field 3. *Note Conversion::.
-
-When you change the value of a field (as perceived by `awk'), the
-text of the input record is recalculated to contain the new field
-where the old one was. `$0' will from that time on reflect the
-altered field. Thus,
-
- awk '{ $2 = $2 - 10; print $0 }' inventory-shipped
-
-will print a copy of the input file, with 10 subtracted from the
-second field of each line.
-
-You can also assign contents to fields that are out of range. For
-example:
-
- awk '{ $6 = ($5 + $4 + $3 + $2)/4) ; print $6 }' inventory-shipped
-
-We've just created `$6', whose value is the average of fields `$2',
-`$3', `$4', and `$5'. The `+' sign represents addition, and the `/'
-sign represents division. For the file `inventory-shipped' `$6'
-represents the average number of parcels shipped for a particular
-month.
-
-Creating a new field changes what `awk' interprets as the current
-input record. The value of `$0' will be recomputed. This
-recomputation affects and is affected by features not yet discussed,
-in particular, the "Output Field Separator", `OFS', which is used to
-separate the fields (*note Output Separators::.), and `NF' (the
-number of fields; *note Fields::.). For example, the value of `NF'
-will be set to the number of the highest out--of--range field you
-create.
-
-Note, however, that merely *referencing* an out--of--range field will
-*not* change the value of either `$0' or `NF'. Referencing an
-out--of--range field merely produces a null string. For example:
-
- if ($(NF+1) != "")
- print "can't happen"
- else
- print "everything is normal"
-
-should print `everything is normal'. (*Note If::, for more
-information about `awk''s `if-else' statements.)
-
-
-
-File: gawk-info, Node: Field Separators, Next: Multiple, Prev: Changing Fields, Up: Reading Files
-
-Specifying How Fields Are Separated
-===================================
-
-You can change the way `awk' splits a record into fields by changing
-the value of the "field separator". The field separator is
-represented by the special variable `FS' in an `awk' program, and can
-be set by `-F' on the command line. The `awk' language scans each
-input line for the field separator character to determine the
-positions of fields within that line. Shell programmers take note!
-`awk' uses the variable `FS', not `IFS'.
-
-The default value of the field separator is a string containing a
-single space. This value is actually a special case; as you know, by
-default, fields are separated by whitespace sequences, not by single
-spaces: two spaces in a row do not delimit an empty field.
-``Whitespace'' is defined as sequences of one or more spaces or tab
-characters.
-
-You change the value of `FS' by "assigning" it a new value. You can
-do this using the special `BEGIN' pattern (*note BEGIN/END::.). This
-pattern allows you to change the value of `FS' before any input is
-read. The new value of `FS' is enclosed in quotations. For example,
-set the value of `FS' to the string `","':
-
- awk 'BEGIN { FS = "," } ; { print $2 }'
-
-and use the input line:
-
- John Q. Smith, 29 Oak St., Walamazoo, MI 42139
-
-This `awk' program will extract the string `29 Oak St.'.
-
-Sometimes your input data will contain separator characters that
-don't separate fields the way you thought they would. For instance,
-the person's name in the example we've been using might have a title
-or suffix attached, such as `John Q. Smith, LXIX'. If you assigned
-`FS' to be `,' then:
-
- awk 'BEGIN { FS = "," } ; { print $2 }
-
-would extract `LXIX', instead of `29 Oak St.'. If you were expecting
-the program to print the address, you would be surprised. So, choose
-your data layout and separator characters carefully to prevent
-problems like this from happening.
-
-You can assign `FS' to be a series of characters. For example, the
-assignment:
-
- FS = ", \t"
-
-makes every area of an input line that consists of a comma followed
-by a space and a tab, into a field separator. (`\t' stands for a tab.)
-
-If `FS' is any single character other than a blank, then that
-character is used as the field separator, and two successive
-occurrences of that character do delimit an empty field.
-
-If you assign `FS' to a string longer than one character, that string
-is evaluated as a "regular expression" (*note Regexp::.). The value
-of the regular expression is used as a field separator.
-
-`FS' can be set on the command line. You use the `-F' argument to do
-so. For example:
-
- awk -F, 'PROGRAM' INPUT-FILES
-
-sets `FS' to be the `,' character. Notice that the argument uses a
-capital `F'. Contrast this with `-f', which specifies a file
-containing an `awk' program. Case is significant in command options:
-the `-F' and `-f' options have nothing to do with each other. You
-can use both options at the same time to set the `FS' argument *and*
-get an `awk' program from a file.
-
-As a special case, if the argument to `-F' is `t', then `FS' is set
-to the tab character. (This is because if you type `-F\t', without
-the quotes, at the shell, the `\' gets deleted, so `awk' figures that
-you really want your fields to be separated with tabs, and not `t's.
-Use `FS="t"' if you really do want to separate your fields with `t's.)
-
-For example, let's use an `awk' program file called `baud.awk' that
-contains the pattern `/300/', and the action `print $1'. We'll use
-the operating system utility `cat' to ``look'' at our program:
-
- % cat baud.awk
- /300/ { print $1 }
-
-Let's also set `FS' to be the `-' character. We will apply all this
-information to the file `BBS-list'. This `awk' program will now
-print a list of the names of the bulletin boards that operate at 300
-baud and the first three digits of their phone numbers.
-
- awk -F- -f baud.awk BBS-list
-
-produces this output:
-
- aardvark 555
- alpo
- barfly 555
- bites 555
- camelot 555
- core 555
- fooey 555
- foot 555
- macfoo 555
- sdace 555
- sabafoo 555
-
-Note the second line of output. If you check the original file, you
-will see that the second line looked like this:
-
- alpo-net 555-3412 2400/1200/300 A
-
-The `-' as part of the system's name was used as the field separator,
-instead of the `-' in the phone number that was originally intended.
-This demonstrates why you have to be careful in choosing your field
-and record separators.
-
-
-
-File: gawk-info, Node: Multiple, Next: Assignment Options, Prev: Field Separators, Up: Reading Files
-
-Multiple--Line Records
-======================
-
-In some data bases, a single line cannot conveniently hold all the
-information in one entry. Then you will want to use multi--line
-records.
-
-The first step in doing this is to choose your data format: when
-records are not defined as single lines, how will you want to define
-them? What should separate records?
-
-One technique is to use an unusual character or string to separate
-records. For example, you could use the formfeed character (written
-`\f' in `awk', as in C) to separate them, making each record a page
-of the file. To do this, just set the variable `RS' to `"\f"' (a
-string containing the formfeed character), or whatever string you
-prefer to use.
-
-Another technique is to have blank lines separate records. By a
-special dispensation, a null string as the value of `RS' indicates
-that records are separated by one or more blank lines. If you set
-`RS' to the null string, a record will always end at the first blank
-line encountered. And the next record won't start until the first
-nonblank line that follows--no matter how many blank lines appear in
-a row, they will be considered one record--separator.
-
-The second step is to separate the fields in the record. One way to
-do this is to put each field on a separate line: to do this, just set
-the variable `FS' to the string `"\n"'. (This simple regular
-expression matches a single newline.) Another idea is to divide each
-of the lines into fields in the normal manner; the regular expression
-`"[ \t\n]+"' will do this nicely by treating the newlines inside the
-record just like spaces.
-
-When `RS' is set to the null string, the newline character *always*
-acts as a field separator. This is in addition to whatever value
-`FS' has. The probable reason for this rule is so that you get
-rational behavior in the default case (i.e. `FS == " "'). This can
-be a problem if you really don't want the newline character to
-separate fields, since there is no way to do that. However, you can
-work around this by using the `split' function to manually break up
-your data (*note String Functions::.).
-
-Here is how to use records separated by blank lines and break each
-line into fields normally:
-
- awk 'BEGIN { RS = ""; FS = "[ \t\n]+" } ; { print $0 }' BBS-list
-
-
-
-File: gawk-info, Node: Assignment Options, Next: Getline, Prev: Multiple, Up: Reading Files
-
-Assigning Variables on the Command Line
-=======================================
-
-You can include variable "assignments" among the file names on the
-command line used to invoke `awk' (*note Command Line::.). Such
-assignments have the form:
-
- VARIABLE=TEXT
-
-and allow you to change variables either at the beginning of the
-`awk' run or in between input files. The variable assignment is
-performed at a time determined by its position among the input file
-arguments: after the processing of the preceding input file argument.
-For example:
-
- awk '{ print $n }' n=4 inventory-shipped n=2 BBS-list
-
-prints the value of field number `n' for all input records. Before
-the first file is read, the command line sets the variable `n' equal
-to 4. This causes the fourth field of the file `inventory-shipped'
-to be printed. After the first file has finished, but before the
-second file is started, `n' is set to 2, so that the second field of
-the file `BBS-list' will be printed.
-
-Command line arguments are made available for explicit examination by
-the `awk' program in an array named `ARGV' (*note Special::.).
-
-
-
-File: gawk-info, Node: Getline, Prev: Assignment Options, Up: Reading Files
-
-Explicit Input with `getline'
-=============================
-
-So far we have been getting our input files from `awk''s main input
-stream--either the standard input (usually your terminal) or the
-files specified on the command line. The `awk' language has a
-special built--in function called `getline' that can be used to read
-input under your explicit control.
-
-This command is quite complex and should *not* be used by beginners.
-The command (and its variations) is covered here because this is the
-section about input. The examples that follow the explanation of the
-`getline' command include material that has not been covered yet.
-Therefore, come back and attempt the `getline' command *after* you
-have reviewed the rest of this manual and have a good knowledge of
-how `awk' works.
-
-When retrieving input, `getline' returns a 1 if it found a record,
-and a 0 if the end of the file was encountered. If there was some
-error in getting a record, such as a file that could not be opened,
-then `getline' returns a -1.
-
-In the following examples, COMMAND stands for a string value that
-represents a shell command.
-
-`getline'
- The `getline' function can be used by itself, in an `awk'
- program, to read input from the current input. All it does in
- this case is read the next input record and split it up into
- fields. This is useful if you've finished processing the
- current record, but you want to do some special processing
- *right now* on the next record. Here's an example:
-
- awk '{
- if (t = index($0, "/*")) {
- if(t > 1)
- tmp = substr($0, 1, t - 1)
- else
- tmp = ""
- u = index(substr($0, t + 2), "*/")
- while (! u) {
- getline
- t = -1
- u = index($0, "*/")
- }
- if(u <= length($0) - 2)
- $0 = tmp substr($0, t + u + 3)
- else
- $0 = tmp
- }
- print $0
- }'
-
- This `awk' program deletes all comments, `/* ... */', from the
- input. By replacing the `print $0' with other statements, you
- could perform more complicated processing on the de--commented
- input, such as search it for matches for a regular expression.
-
- This form of the `getline' command sets `NF' (the number of
- fields; *note Fields::.), `NR' (the number of records read so
- far), the `FNR' variable (*note Records::.), and the value of
- `$0'.
-
- *Note:* The new value of `$0' will be used in testing the
- patterns of any subsequent rules. The original value of `$0'
- that triggered the rule which executed `getline' is lost. By
- contrast, the `next' statement reads a new record but
- immediately begins processing it normally, starting with the
- first rule in the program. *Note Next::.
-
-`getline VAR'
- This form of `getline' reads a record into the variable VAR.
- This is useful when you want your program to read the next
- record from the input file, but you don't want to subject the
- record to the normal input processing.
-
- For example, suppose the next line is a comment, or a special
- string, and you want to read it, but you must make certain that
- it won't accidentally trigger any rules. This version of
- `getline' will allow you to read that line and store it in a
- variable so that the main read--a--line--and--check--each--rule
- loop of `awk' never sees it.
-
- The following example swaps every two lines of input. For
- example, given:
-
- wan
- tew
- free
- phore
-
- it outputs:
-
- tew
- wan
- phore
- free
-
- Here's the program:
-
- awk '{
- if ((getline tmp) > 0) {
- print tmp
- print $0
- } else
- print $0
- }'
-
- The `getline' function used in this way sets only `NR' and `FNR'
- (and of course, VAR). The record is not split into fields, so
- the values of the fields (including `$0') and the value of `NF'
- do not change.
-
-`getline < FILE'
- This form of the `getline' function takes its input from the
- file FILE. Here FILE is a string--valued expression that
- specifies the file name.
-
- This form is useful if you want to read your input from a
- particular file, instead of from the main input stream. For
- example, the following program reads its input record from the
- file `foo.input' when it encounters a first field with a value
- equal to 10 in the current input file.
-
- awk '{
- if ($1 == 10) {
- getline < "foo.input"
- print
- } else
- print
- }'
-
- Since the main input stream is not used, the values of `NR' and
- `FNR' are not changed. But the record read is split into fields
- in the normal manner, so the values of `$0' and other fields are
- changed. So is the value of `NF'.
-
- This does not cause the record to be tested against all the
- patterns in the `awk' program, in the way that would happen if
- the record were read normally by the main processing loop of
- `awk'. However the new record is tested against any subsequent
- rules, just as when `getline' is used without a redirection.
-
-`getline VAR < FILE'
- This form of the `getline' function takes its input from the
- file FILE and puts it in the variable VAR. As above, FILE is a
- string--valued expression that specifies the file to read from.
-
- In this version of `getline', none of the built--in variables
- are changed, and the record is not split into fields. The only
- variable changed is VAR.
-
- For example, the following program copies all the input files to
- the output, except for records that say `@include FILENAME'.
- Such a record is replaced by the contents of the file FILENAME.
-
- awk '{
- if (NF == 2 && $1 == "@include") {
- while ((getline line < $2) > 0)
- print line
- close($2)
- } else
- print
- }'
-
- Note here how the name of the extra input file is not built into
- the program; it is taken from the data, from the second field on
- the `@include' line.
-
- The `close' command is used to ensure that if two identical
- `@include' lines appear in the input, the entire specified file
- is included twice. *Note Close Input::.
-
- One deficiency of this program is that it does not process
- nested `@include' statements the way a true macro preprocessor
- would.
-
-`COMMAND | getline'
- You can "pipe" the output of a command into `getline'. A pipe
- is simply a way to link the output of one program to the input
- of another. In this case, the string COMMAND is run as a shell
- command and its output is piped into `awk' to be used as input.
- This form of `getline' reads one record from the pipe.
-
- For example, the following program copies input to output,
- except for lines that begin with `@execute', which are replaced
- by the output produced by running the rest of the line as a
- shell command:
-
- awk '{
- if ($1 == "@execute") {
- tmp = substr($0, 10)
- while ((tmp | getline) > 0)
- print
- close(tmp)
- } else
- print
- }'
-
- The `close' command is used to ensure that if two identical
- `@execute' lines appear in the input, the command is run again
- for each one. *Note Close Input::.
-
- Given the input:
-
- foo
- bar
- baz
- @execute who
- bletch
-
- the program might produce:
-
- foo
- bar
- baz
- hack ttyv0 Jul 13 14:22
- hack ttyp0 Jul 13 14:23 (gnu:0)
- hack ttyp1 Jul 13 14:23 (gnu:0)
- hack ttyp2 Jul 13 14:23 (gnu:0)
- hack ttyp3 Jul 13 14:23 (gnu:0)
- bletch
-
- Notice that this program ran the command `who' and printed the
- result. (If you try this program yourself, you will get
- different results, showing you logged in.)
-
- This variation of `getline' splits the record into fields, sets
- the value of `NF' and recomputes the value of `$0'. The values
- of `NR' and `FNR' are not changed.
-
-`COMMAND | getline VAR'
- The output of the command COMMAND is sent through a pipe to
- `getline' and into the variable VAR. For example, the following
- program reads the current date and time into the variable
- `current_time', using the utility called `date', and then prints
- it.
-
- awk 'BEGIN {
- "date" | getline current_time
- close("date")
- print "Report printed on " current_time
- }'
-
- In this version of `getline', none of the built--in variables
- are changed, and the record is not split into fields.
-
-
-
-File: gawk-info, Node: Close Input, Up: Getline
-
-Closing Input Files
--------------------
-
-If the same file name or the same shell command is used with
-`getline' more than once during the execution of the `awk' program,
-the file is opened (or the command is executed) only the first time.
-At that time, the first record of input is read from that file or
-command. The next time the same file or command is used in
-`getline', another record is read from it, and so on.
-
-What this implies is that if you want to start reading the same file
-again from the beginning, or if you want to rerun a shell command
-(rather that reading more output from the command), you must take
-special steps. What you can do is use the `close' statement:
-
- close (FILENAME)
-
-This statement closes a file or pipe, represented here by FILENAME.
-The string value of FILENAME must be the same value as the string
-used to open the file or pipe to begin with.
-
-Once this statement is executed, the next `getline' from that file or
-command will reopen the file or rerun the command.
-
-
-
-File: gawk-info, Node: Printing, Next: One-liners, Prev: Reading Files, Up: Top
-
-Printing Output
-***************
-
-One of the most common things that actions do is to output or "print"
-some or all of the input. For simple output, use the `print'
-statement. For fancier formatting use the `printf' statement. Both
-are described in this chapter.
-
-* Menu:
-
-* Print:: The `print' statement.
-* Print Examples:: Simple examples of `print' statements.
-* Output Separators:: The output separators and how to change them.
-
-* Redirection:: How to redirect output to multiple files and pipes.
-* Close Output:: How to close output files and pipes.
-
-* Printf:: The `printf' statement.
-
-
-
-File: gawk-info, Node: Print, Next: Print Examples, Up: Printing
-
-The `print' Statement
-=====================
-
-The `print' statement does output with simple, standardized
-formatting. You specify only the strings or numbers to be printed,
-in a list separated by commas. They are output, separated by single
-spaces, followed by a newline. The statement looks like this:
-
- print ITEM1, ITEM2, ...
-
- The entire list of items may optionally be enclosed in parentheses.
-The parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a
-redirection (*note Redirection::.). The relational operators are
-`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison
-Ops::.).
-
-The items printed can be constant strings or numbers, fields of the
-current record (such as `$1'), variables, or any `awk' expressions.
-The `print' statement is completely general for computing *what*
-values to print. With one exception (*note Output Separators::.),
-what you can't do is specify *how* to print them--how many columns to
-use, whether to use exponential notation or not, and so on. For
-that, you need the `printf' statement (*note Printf::.).
-
-To print a fixed piece of text, write a string constant as one item,
-such as `"Hello there"'. If you forget to use the double--quote
-characters, your text will be taken as an `awk' expression, and you
-will probably get an error. Keep in mind that a space will be
-printed between any two items.
-
-The simple statement `print' with no items is equivalent to `print
-$0': it prints the entire current record. To print a blank line, use
-`print ""', where `""' is the null, or empty, string.
-
-Most often, each `print' statement makes one line of output. But it
-isn't limited to one line. If an item value is a string that
-contains a newline, the newline is output along with the rest of the
-string. A single `print' can make any number of lines this way.
-
-
-
-File: gawk-info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing
-
-Examples of `print' Statements
-==============================
-
-Here is an example that prints the first two fields of each input
-record, with a space between them:
-
- awk '{ print $1, $2 }' inventory-shipped
-
-Its output looks like this:
-
- Jan 13
- Feb 15
- Mar 15
- ...
-
- A common mistake in using the `print' statement is to omit the comma
-between two items. This often has the effect of making the items run
-together in the output, with no space. The reason for this is that
-juxtaposing two string expressions in `awk' means to concatenate
-them. For example, without the comma:
-
- awk '{ print $1 $2 }' inventory-shipped
-
-prints:
-
- Jan13
- Feb15
- Mar15
- ...
-
- Neither example's output makes much sense to someone unfamiliar with
-the file `inventory-shipped'. A heading line at the beginning would
-make it clearer. Let's add some headings to our table of months
-(`$1') and green crates shipped (`$2'). We do this using the BEGIN
-pattern (*note BEGIN/END::.) to cause the headings to be printed only
-once:
-
- awk 'BEGIN { print "Month Crates"
- print "---- -----" }
- { print $1, $2 }' inventory-shipped
-
-Did you already guess what will happen? This program prints the
-following:
-
- Month Crates
- ---- -----
- Jan 13
- Feb 15
- Mar 15
- ...
-
- The headings and the table data don't line up! We can fix this by
-printing some spaces between the two fields:
-
- awk 'BEGIN { print "Month Crates"
- print "---- -----" }
- { print $1, " ", $2 }' inventory-shipped
-
-You can imagine that this way of lining up columns can get pretty
-complicated when you have many columns to fix. Counting spaces for
-two or three columns can be simple, but more than this and you can
-get ``lost'' quite easily. This is why the `printf' statement was
-created (*note Printf::.); one of its specialties is lining up
-columns of data.
-
-
-
-File: gawk-info, Node: Output Separators, Next: Redirection, Prev: Print Examples, Up: Printing
-
-Output Separators
-=================
-
-As mentioned previously, a `print' statement contains a list of
-items, separated by commas. In the output, the items are normally
-separated by single spaces. But they do not have to be spaces; a
-single space is only the default. You can specify any string of
-characters to use as the "output field separator", by setting the
-special variable `OFS'. The initial value of this variable is the
-string `" "'.
-
-The output from an entire `print' statement is called an "output
-record". Each `print' statement outputs one output record and then
-outputs a string called the "output record separator". The special
-variable `ORS' specifies this string. The initial value of the
-variable is the string `"\n"' containing a newline character; thus,
-normally each `print' statement makes a separate line.
-
-You can change how output fields and records are separated by
-assigning new values to the variables `OFS' and/or `ORS'. The usual
-place to do this is in the `BEGIN' rule (*note BEGIN/END::.), so that
-it happens before any input is processed. You may also do this with
-assignments on the command line, before the names of your input files.
-
-The following example prints the first and second fields of each
-input record separated by a semicolon, with a blank line added after
-each line:
-
- awk 'BEGIN { OFS = ";"; ORS = "\n\n" }
- { print $1, $2 }' BBS-list
-
-If the value of `ORS' does not contain a newline, all your output
-will be run together on a single line, unless you output newlines
-some other way.
-
-
-
-File: gawk-info, Node: Redirection, Next: Printf, Prev: Output Separators, Up: Printing
-
-Redirecting Output of `print' and `printf'
-==========================================
-
-So far we have been dealing only with output that prints to the
-standard output, usually your terminal. Both `print' and `printf'
-can be told to send their output to other places. This is called
-"redirection".
-
-A redirection appears after the `print' or `printf' statement.
-Redirections in `awk' are written just like redirections in shell
-commands, except that they are written inside the `awk' program.
-
-Here are the three forms of output redirection. They are all shown
-for the `print' statement, but they work for `printf' also.
-
-`print ITEMS > OUTPUT-FILE'
- This type of redirection prints the items onto the output file
- OUTPUT-FILE. The file name OUTPUT-FILE can be any expression.
- Its value is changed to a string and then used as a filename
- (*note Expressions::.).
-
- When this type of redirection is used, the OUTPUT-FILE is erased
- before the first output is written to it. Subsequent writes do
- not erase OUTPUT-FILE, but append to it. If OUTPUT-FILE does
- not exist, then it is created.
-
- For example, here is how one `awk' program can write a list of
- BBS names to a file `name-list' and a list of phone numbers to a
- file `phone-list'. Each output file contains one name or number
- per line.
-
- awk '{ print $2 > "phone-list"
- print $1 > "name-list" }' BBS-list
-
-`print ITEMS >> OUTPUT-FILE'
- This type of redirection prints the items onto the output file
- OUTPUT-FILE. The difference between this and the single--`>'
- redirection is that the old contents (if any) of OUTPUT-FILE are
- not erased. Instead, the `awk' output is appended to the file.
-
-`print ITEMS | COMMAND'
- It is also possible to send output through a "pipe" instead of
- into a file. This type of redirection opens a pipe to COMMAND
- and writes the values of ITEMS through this pipe, to another
- process created to execute COMMAND.
-
- The redirection argument COMMAND is actually an `awk'
- expression. Its value is converted to a string, whose contents
- give the shell command to be run.
-
- For example, this produces two files, one unsorted list of BBS
- names and one list sorted in reverse alphabetical order:
-
- awk '{ print $1 > "names.unsorted"
- print $1 | "sort -r > names.sorted" }' BBS-list
-
- Here the unsorted list is written with an ordinary redirection
- while the sorted list is written by piping through the `sort'
- utility.
-
- Here is an example that uses redirection to mail a message to a
- mailing list `bug-system'. This might be useful when trouble is
- encountered in an `awk' script run periodically for system
- maintenance.
-
- print "Awk script failed:", $0 | "mail bug-system"
- print "processing record number", FNR, "of", FILENAME | "mail bug-system"
- close ("mail bug-system")
-
- We use a `close' statement here because it's a good idea to
- close the pipe as soon as all the intended output has been sent
- to it. *Note Close Output::, for more information on this.
-
-Redirecting output using `>', `>>', or `|' asks the system to open a
-file or pipe only if the particular FILE or COMMAND you've specified
-has not already been written to by your program.
-
-
-
-File: gawk-info, Node: Close Output, Up: Redirection
-
-Closing Output Files and Pipes
-------------------------------
-
-When a file or pipe is opened, the filename or command associated
-with it is remembered by `awk' and subsequent writes to the same file
-or command are appended to the previous writes. The file or pipe
-stays open until `awk' exits. This is usually convenient.
-
-Sometimes there is a reason to close an output file or pipe earlier
-than that. To do this, use the `close' command, as follows:
-
- close (FILENAME)
-
-or
-
- close (COMMAND)
-
-The argument FILENAME or COMMAND can be any expression. Its value
-must exactly equal the string used to open the file or pipe to begin
-with--for example, if you open a pipe with this:
-
- print $1 | "sort -r > names.sorted"
-
-then you must close it with this:
-
- close ("sort -r > names.sorted")
-
-Here are some reasons why you might need to close an output file:
-
- * To write a file and read it back later on in the same `awk'
- program. Close the file when you are finished writing it; then
- you can start reading it with `getline' (*note Getline::.).
-
- * To write numerous files, successively, in the same `awk'
- program. If you don't close the files, eventually you will
- exceed the system limit on the number of open files in one
- process. So close each one when you are finished writing it.
-
- * To make a command finish. When you redirect output through a
- pipe, the command reading the pipe normally continues to try to
- read input as long as the pipe is open. Often this means the
- command cannot really do its work until the pipe is closed. For
- example, if you redirect output to the `mail' program, the
- message will not actually be sent until the pipe is closed.
-
- * To run the same subprogram a second time, with the same arguments.
- This is not the same thing as giving more input to the first run!
-
- For example, suppose you pipe output to the `mail' program. If
- you output several lines redirected to this pipe without closing
- it, they make a single message of several lines. By contrast,
- if you close the pipe after each line of output, then each line
- makes a separate message.
-
-
-
-File: gawk-info, Node: Printf, Prev: Redirection, Up: Printing
-
-Using `printf' Statements For Fancier Printing
-==============================================
-
-If you want more precise control over the output format than `print'
-gives you, use `printf'. With `printf' you can specify the width to
-use for each item, and you can specify various stylistic choices for
-numbers (such as what radix to use, whether to print an exponent,
-whether to print a sign, and how many digits to print after the
-decimal point). You do this by specifying a "format string".
-
-* Menu:
-
-* Basic Printf:: Syntax of the `printf' statement.
-* Format-Control:: Format-control letters.
-* Modifiers:: Format--specification modifiers.
-* Printf Examples:: Several examples.
-
-
-
-File: gawk-info, Node: Basic Printf, Next: Format-Control, Up: Printf
-
-Introduction to the `printf' Statement
---------------------------------------
-
-The `printf' statement looks like this:
-
- printf FORMAT, ITEM1, ITEM2, ...
-
- The entire list of items may optionally be enclosed in parentheses.
-The parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a
-redirection (*note Redirection::.). The relational operators are
-`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison
-Ops::.).
-
-The difference between `printf' and `print' is the argument FORMAT.
-This is an expression whose value is taken as a string; its job is to
-say how to output each of the other arguments. It is called the
-"format string".
-
-The format string is essentially the same as in the C library
-function `printf'. Most of FORMAT is text to be output verbatim.
-Scattered among this text are "format specifiers", one per item.
-Each format specifier says to output the next item at that place in
-the format.
-
-The `printf' statement does not automatically append a newline to its
-output. It outputs nothing but what the format specifies. So if you
-want a newline, you must include one in the format. The output
-separator variables `OFS' and `ORS' have no effect on `printf'
-statements.
-
-
-
-File: gawk-info, Node: Format-Control, Next: Modifiers, Prev: Basic Printf, Up: Printf
-
-Format--Control Characters
---------------------------
-
-A format specifier starts with the character `%' and ends with a
-"format--control letter"; it tells the `printf' statement how to
-output one item. (If you actually want to output a `%', write `%%'.)
-The format--control letter specifies what kind of value to print.
-The rest of the format specifier is made up of optional "modifiers"
-which are parameters such as the field width to use.
-
-Here is a list of them:
-
-`c'
- This prints a number as an ASCII character. Thus, `printf "%c",
- 65' outputs the letter `A'. The output for a string value is
- the first character of the string.
-
-`d'
- This prints a decimal integer.
-
-`e'
- This prints a number in scientific (exponential) notation. For
- example,
-
- printf "%4.3e", 1950
-
- prints `1.950e+03', with a total of 4 significant figures of
- which 3 follow the decimal point. The `4.3' are "modifiers",
- discussed below.
-
-`f'
- This prints a number in floating point notation.
-
-`g'
- This prints either scientific notation or floating point
- notation, whichever is shorter.
-
-`o'
- This prints an unsigned octal integer.
-
-`s'
- This prints a string.
-
-`x'
- This prints an unsigned hexadecimal integer.
-
-`%'
- This isn't really a format--control letter, but it does have a
- meaning when used after a `%': the sequence `%%' outputs one
- `%'. It does not consume an argument.
-
-
-
-File: gawk-info, Node: Modifiers, Next: Printf Examples, Prev: Format-Control, Up: Printf
-
-Modifiers for `printf' Formats
-------------------------------
-
-A format specification can also include "modifiers" that can control
-how much of the item's value is printed and how much space it gets.
-The modifiers come between the `%' and the format--control letter.
-Here are the possible modifiers, in the order in which they may appear:
-
-`-'
- The minus sign, used before the width modifier, says to
- left--justify the argument within its specified width. Normally
- the argument is printed right--justified in the specified width.
-
-`WIDTH'
- This is a number representing the desired width of a field.
- Inserting any number between the `%' sign and the format control
- character forces the field to be expanded to this width. The
- default way to do this is to pad with spaces on the left.
-
-`.PREC'
- This is a number that specifies the precision to use when
- printing. This specifies the number of digits you want printed
- to the right of the decimal place.
-
-The C library `printf''s dynamic WIDTH and PREC capability (for
-example, `"%*.*s"') is not supported. However, it can be easily
-simulated using concatenation to dynamically build the format string.
-
-
-
-File: gawk-info, Node: Printf Examples, Prev: Modifiers, Up: Printf
-
-Examples of Using `printf'
---------------------------
-
-Here is how to use `printf' to make an aligned table:
-
- awk '{ printf "%-10s %s\n", $1, $2 }' BBS-list
-
-prints the names of bulletin boards (`$1') of the file `BBS-list' as
-a string of 10 characters, left justified. It also prints the phone
-numbers (`$2') afterward on the line. This will produce an aligned
-two--column table of names and phone numbers, like so:
-
- aardvark 555-5553
- alpo-net 555-3412
- barfly 555-7685
- bites 555-1675
- camelot 555-0542
- core 555-2912
- fooey 555-1234
- foot 555-6699
- macfoo 555-6480
- sdace 555-3430
- sabafoo 555-2127
-
-Did you notice that we did not specify that the phone numbers be
-printed as numbers? They had to be printed as strings because the
-numbers are separated by a dash. This dash would be interpreted as a
-"minus" sign if we had tried to print the phone numbers as numbers.
-This would have led to some pretty confusing results.
-
-We did not specify a width for the phone numbers because they are the
-last things on their lines. We don't need to put spaces after them.
-
-We could make our table look even nicer by adding headings to the
-tops of the columns. To do this, use the BEGIN pattern (*note
-BEGIN/END::.) to cause the header to be printed only once, at the
-beginning of the `awk' program:
-
- awk 'BEGIN { print "Name Number"
- print "--- -----" }
- { printf "%-10s %s\n", $1, $2 }' BBS-list
-
-Did you notice that we mixed `print' and `printf' statements in the
-above example? We could have used just `printf' statements to get
-the same results:
-
- awk 'BEGIN { printf "%-10s %s\n", "Name", "Number"
- printf "%-10s %s\n", "---", "-----" }
- { printf "%-10s %s\n", $1, $2 }' BBS-list
-
-By outputting each column heading with the same format specification
-used for the elements of the column, we have made sure that the
-headings will be aligned just like the columns.
-
-The fact that the same format specification is used can be emphasized
-by storing it in a variable, like so:
-
- awk 'BEGIN { format = "%-10s %s\n"
- printf format, "Name", "Number"
- printf format, "---", "-----" }
- { printf format, $1, $2 }' BBS-list
-
-See if you can use the `printf' statement to line up the headings and
-table data for our `inventory-shipped' example covered earlier in the
-section on the `print' statement (*note Print::.).
-
-
-
-File: gawk-info, Node: One-liners, Next: Patterns, Prev: Printing, Up: Top
-
-Useful ``One-liners''
-*********************
-
-Useful `awk' programs are often short, just a line or two. Here is a
-collection of useful, short programs to get you started. Some of
-these programs contain constructs that haven't been covered yet. The
-description of the program will give you a good idea of what is going
-on, but please read the rest of the manual to become an `awk' expert!
-
-`awk '{ num_fields = num_fields + NF }'
-`` END { print num_fields }'''
- This program prints the total number of fields in all input lines.
-
-`awk 'length($0) > 80''
- This program prints every line longer than 80 characters. The
- sole rule has a relational expression as its pattern, and has no
- action (so the default action, printing the record, is used).
-
-`awk 'NF > 0''
- This program prints every line that has at least one field.
- This is an easy way to delete blank lines from a file (or
- rather, to create a new file similar to the old file but from
- which the blank lines have been deleted).
-
-`awk '{ if (NF > 0) print }''
- This program also prints every line that has at least one field.
- Here we allow the rule to match every line, then decide in the
- action whether to print.
-
-`awk 'BEGIN { for (i = 1; i <= 7; i++)'
-`` print int(101 * rand()) }'''
- This program prints 7 random numbers from 0 to 100, inclusive.
-
-`ls -l FILES | awk '{ x += $4 } ; END { print "total bytes: " x }''
- This program prints the total number of bytes used by FILES.
-
-`expand FILE | awk '{ if (x < length()) x = length() }'
-`` END { print "maximum line length is " x }'''
- This program prints the maximum line length of FILE. The input
- is piped through the `expand' program to change tabs into
- spaces, so the widths compared are actually the right--margin
- columns.
-
-
diff --git a/gawk-info-3 b/gawk-info-3
deleted file mode 100644
index b333f57c..00000000
--- a/gawk-info-3
+++ /dev/null
@@ -1,1385 +0,0 @@
-Info file gawk-info, produced by Makeinfo, -*- Text -*- from input
-file gawk.texinfo.
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of
-this manual under the conditions for verbatim copying, provided that
-the entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be stated in a
-translation approved by the Foundation.
-
-
-
-File: gawk-info, Node: Patterns, Next: Actions, Prev: One-liners, Up: Top
-
-Patterns
-********
-
-Patterns control the execution of rules: a rule is executed when its
-pattern matches the input record. The `awk' language provides
-several special patterns that are described in the sections that
-follow. Patterns include:
-
-NULL
- The empty pattern, which matches every input record. (*Note The
- Empty Pattern: Empty.)
-
-/REGULAR EXPRESSION/
- A regular expression as a pattern. It matches when the text of
- the input record fits the regular expression. (*Note Regular
- Expressions as Patterns: Regexp.)
-
-CONDEXP
- A single comparison expression. It matches when it is true.
- (*Note Comparison Expressions as Patterns: Comparison Patterns.)
-
-`BEGIN'
-`END'
- Special patterns to supply start--up or clean--up information to
- `awk'. (*Note Specifying Record Ranges With Patterns: BEGIN/END.)
-
-PAT1, PAT2
- A pair of patterns separated by a comma, specifying a range of
- records. (*Note Specifying Record Ranges With Patterns: Ranges.)
-
-CONDEXP1 BOOLEAN CONDEXP2
- A "compound" pattern, which combines expressions with the
- operators `and', `&&', and `or', `||'. (*Note Boolean
- Operators and Patterns: Boolean.)
-
-! CONDEXP
- The pattern CONDEXP is evaluated. Then the `!' performs a
- boolean ``not'' or logical negation operation; if the input line
- matches the pattern in CONDEXP then the associated action is
- *not* executed. If the input line did not match that pattern,
- then the action *is* executed. (*Note Boolean Operators and
- Patterns: Boolean.)
-
-(EXPR)
- Parentheses may be used to control how operators nest.
-
-PAT1 ? PAT2 : PAT3
- The first pattern is evaluated. If it is true, the input line
- is tested against the second pattern, otherwise it is tested
- against the third. (*Note Conditional Patterns: Conditional
- Patterns.)
-
-* Menu:
-
-The following subsections describe these forms in detail:
-
-* Empty:: The empty pattern, which matches every record.
-
-* Regexp:: Regular expressions such as `/foo/'.
-
-* Comparison Patterns:: Comparison expressions such as `$1 > 10'.
-
-* Boolean:: Combining comparison expressions.
-
-* Ranges:: Using pairs of patterns to specify record ranges.
-
-* BEGIN/END:: Specifying initialization and cleanup rules.
-
-* Conditional Patterns:: Patterns such as `pat1 ? pat2 : pat3'.
-
-
-
-File: gawk-info, Node: Empty, Next: Regexp, Up: Patterns
-
-The Empty Pattern
-=================
-
-An empty pattern is considered to match *every* input record. For
-example, the program:
-
- awk '{ print $1 }' BBS-list
-
-prints just the first field of every record.
-
-
-
-File: gawk-info, Node: Regexp, Next: Comparison Patterns, Prev: Empty, Up: Patterns
-
-Regular Expressions as Patterns
-===============================
-
-A "regular expression", or "regexp", is a way of describing classes
-of strings. When enclosed in slashes (`/'), it makes an `awk'
-pattern that matches every input record that contains a match for the
-regexp.
-
-The simplest regular expression is a sequence of letters, numbers, or
-both. Such a regexp matches any string that contains that sequence.
-Thus, the regexp `foo' matches any string containing `foo'. (More
-complicated regexps let you specify classes of similar strings.)
-
-* Menu:
-
-* Usage: Regexp Usage. How regexps are used in patterns.
-* Operators: Regexp Operators. How to write a regexp.
-
-
-
-File: gawk-info, Node: Regexp Usage, Next: Regexp Operators, Up: Regexp
-
-How to use Regular Expressions
-------------------------------
-
-When you enclose `foo' in slashes, you get a pattern that matches a
-record that contains `foo'. For example, this prints the second
-field of each record that contains `foo' anywhere:
-
- awk '/foo/ { print $2 }' BBS-list
-
-Regular expressions can also be used in comparison expressions. Then
-you can specify the string to match against; it need not be the
-entire current input record. These comparison expressions can be
-used as patterns or in `if' and `while' statements.
-
-`EXP ~ /REGEXP/'
- This is true if the expression EXP (taken as a character string)
- is matched by REGEXP. The following example matches, or
- selects, all input records with the letter `J' in the first field:
-
- awk '$1 ~ /J/' inventory-shipped
-
- So does this:
-
- awk '{ if ($1 ~ /J/) print }' inventory-shipped
-
-`EXP !~ /REGEXP/'
- This is true if the expression EXP (taken as a character string)
- is *not* matched by REGEXP. The following example matches, or
- selects, all input records whose first field *does not* contain
- the letter `J':
-
- awk '$1 !~ /J/' inventory-shipped
-
-The right hand side of a `~' or `!~' operator need not be a constant
-regexp (i.e. a string of characters between `/'s). It can also be
-"computed", or "dynamic". For example:
-
- identifier = "[A-Za-z_][A-Za-z_0-9]+"
- $0 ~ identifier
-
-sets `identifier' to a regexp that describes `awk' variable names,
-and tests if the input record matches this regexp.
-
-A dynamic regexp may actually be any expression. The expression is
-evaluated, and the result is treated as a string that describes a
-regular expression.
-
-
-
-File: gawk-info, Node: Regexp Operators, Prev: Regexp Usage, Up: Regexp
-
-Regular Expression Operators
-----------------------------
-
-You can combine regular expressions with the following characters,
-called "regular expression operators", or "metacharacters", to
-increase the power and versatility of regular expressions. This is a
-table of metacharacters:
-
-`\'
- This is used to suppress the special meaning of a character when
- matching. For example:
-
- \$
-
- matches the character `$'.
-
-`^'
- This matches the beginning of the string or the beginning of a
- line within the string. For example:
-
- ^@chapter
-
- matches the `@chapter' at the beginning of a string, and can be
- used to identify chapter beginnings in Texinfo source files.
-
-`$'
- This is similar to `^', but it matches only at the end of a
- string or the end of a line within the string. For example:
-
- /p$/
-
- as a pattern matches a record that ends with a `p'.
-
-`.'
- This matches any single character except a newline. For example:
-
- .P
-
- matches any single character followed by a `P' in a string.
- Using concatenation we can make regular expressions like `U.A',
- which matches any three--character string that begins with `U'
- and ends with `A'.
-
-`[...]'
- This is called a "character set". It matches any one of a group
- of characters that are enclosed in the square brackets. For
- example:
-
- [MVX]
-
- matches any of the characters `M', `V', or `X' in a string.
-
- Ranges of characters are indicated by using a hyphen between the
- beginning and ending characters, and enclosing the whole thing
- in brackets. For example:
-
- [0-9]
-
- matches any string that contains a digit.
-
- Note that special patterns have to be followed to match the
- characters, `]', `-', and `^' when they are enclosed in the
- square brackets. To match a `]', make it the first character in
- the set. For example:
-
- []d]
-
- matches either `]', or `d'.
-
- To match `-', write it as `--', which is a range containing only
- `-'. You may also make the `-' be the first or last character
- in the set. To match `^', make it any character except the
- first one of a set.
-
-`[^ ...]'
- This is the "complemented character set". The first character
- after the `[' *must* be a `^'. This matches any characters
- *except* those in the square brackets. For example:
-
- [^0-9]
-
- matches any characters that are not digits.
-
-`|'
- This is the "alternation operator" and it is used to specify
- alternatives. For example:
-
- ^P|[0-9]
-
- matches any string that matches either `^P' or `[0-9]'. This
- means it matches any string that contains a digit or starts with
- `P'.
-
-`(...)'
- Parentheses are used for grouping in regular expressions as in
- arithmetic. They can be used to concatenate regular expressions
- containing the alternation operator, `|'.
-
-`*'
- This symbol means that the preceding regular expression is to be
- repeated as many times as possible to find a match. For example:
-
- ph*
-
- applies the `*' symbol to the preceding `h' and looks for
- matches to one `p' followed by any number of `h''s. This will
- also match just `p' if no `h''s are present.
-
- The `*' means repeat the *smallest* possible preceding
- expression in order to find a match. The `awk' language
- processes a `*' by matching as many repetitions as can be found.
- For example:
-
- awk '/\(c[ad][ad]*r x\)/ { print }' sample
-
- matches every record in the input containing a string of the
- form `(car x)', `(cdr x)', `(cadr x)', and so on.
-
-`+'
- This symbol is similar to `*', but the preceding expression must
- be matched at least once. This means that:
-
- wh+y
-
- would match `why' and `whhy' but not `wy', whereas `wh*y' would
- match all three of these strings. And this is a simpler way of
- writing the last `*' example:
-
- awk '/\(c[ad]+r x\)/ { print }' sample
-
-`?'
- This symbol is similar to `*', but the preceding expression can
- be matched once or not at all. For example:
-
- fe?d
-
- will match `fed' or `fd', but nothing else.
-
-In regular expressions, the `*', `+', and `?' operators have the
-highest precedence, followed by concatenation, and finally by `|'.
-As in arithmetic, parentheses can change how operators are grouped.
-
-Any other character stands for itself. However, it is important to
-note that case in regular expressions *is* significant, both when
-matching ordinary (i.e. non--metacharacter) characters, and inside
-character sets. Thus a `w' in a regular expression matches only a
-lower case `w' and not either an uppercase or lowercase `w'. When
-you want to do a case--independent match, you have to use a character
-set: `[Ww]'.
-
-
-
-File: gawk-info, Node: Comparison Patterns, Next: Ranges, Prev: Regexp, Up: Patterns
-
-Comparison Expressions as Patterns
-==================================
-
-"Comparison patterns" use "relational operators" to compare strings
-or numbers. The relational operators are the same as in C. Here is
-a table of them:
-
-`X < Y'
- True if X is less than Y.
-
-`X <= Y'
- True if X is less than or equal to Y.
-
-`X > Y'
- True if X is greater than Y.
-
-`X >= Y'
- True if X is greater than or equal to Y.
-
-`X == Y'
- True if X is equal to Y.
-
-`X != Y'
- True if X is not equal to Y.
-
-Comparison expressions can be used as patterns to control whether a
-rule is executed. The expression is evaluated for each input record
-read, and the pattern is considered matched if the condition is "true".
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (*note Conversion::.). Strings are compared by comparing the
-first character of each, then the second character of each, and so on.
-Thus, `"10"' is less than `"9"'.
-
-The following example prints the second field of each input record
-whose first field is precisely `foo'.
-
- awk '$1 == "foo" { print $2 }' BBS-list
-
-Contrast this with the following regular expression match, which
-would accept any record with a first field that contains `foo':
-
- awk '$1 ~ "foo" { print $2 }' BBS-list
-
-
-
-File: gawk-info, Node: Ranges, Next: BEGIN/END, Prev: Comparison Patterns, Up: Patterns
-
-Specifying Record Ranges With Patterns
-======================================
-
-A "range pattern" is made of two patterns separated by a comma:
-`BEGPAT, ENDPAT'. It matches ranges of consecutive input records.
-The first pattern BEGPAT controls where the range begins, and the
-second one ENDPAT controls where it ends.
-
-They work as follows: BEGPAT is matched against every input record;
-when a record matches BEGPAT, the range pattern becomes "turned on".
-The range pattern matches this record. As long as it stays turned
-on, it automatically matches every input record read. But meanwhile,
-ENDPAT is matched against every input record, and when it matches,
-the range pattern is turned off again for the following record. Now
-we go back to checking BEGPAT against each record. For example:
-
- awk '$1 == "on", $1 == "off"'
-
-prints every record between on/off pairs, inclusive.
-
-The record that turns on the range pattern and the one that turns it
-off both match the range pattern. If you don't want to operate on
-these records, you can write `if' statements in the rule's action to
-distinguish them.
-
-It is possible for a pattern to be turned both on and off by the same
-record, if both conditions are satisfied by that record. Then the
-action is executed for just that record.
-
-
-
-File: gawk-info, Node: BEGIN/END, Next: Boolean, Prev: Ranges, Up: Patterns
-
-`BEGIN' and `END' Special Patterns
-==================================
-
-`BEGIN' and `END' are special patterns. They are not used to match
-input records. Rather, they are used for supplying start--up or
-clean--up information to your `awk' script. A `BEGIN' rule is
-executed, once, before the first input record has been read. An
-`END' rule is executed, once, after all the input has been read. For
-example:
-
- awk 'BEGIN { print "Analysis of ``foo'' program" }
- /foo/ { ++foobar }
- END { print "``foo'' appears " foobar " times." }' BBS-list
-
-This program finds out how many times the string `foo' appears in the
-input file `BBS-list'. The `BEGIN' pattern prints out a title for
-the report. There is no need to use the `BEGIN' pattern to
-initialize the counter `foobar' to zero, as `awk' does this for us
-automatically (*note Variables::.). The second rule increments the
-variable `foobar' every time a record containing the pattern `foo' is
-read. The last rule prints out the value of `foobar' at the end of
-the run.
-
-The special patterns `BEGIN' and `END' do not combine with other
-kinds of patterns.
-
-An `awk' program may have multiple `BEGIN' and/or `END' rules. The
-contents of multiple `BEGIN' or `END' rules are treated as if they
-had been enclosed in a single rule, in the order that the rules are
-encountered in the `awk' program. (This feature was introduced with
-the new version of `awk'.)
-
-Multiple `BEGIN' and `END' sections are also useful for writing
-library functions that need to do initialization and/or cleanup of
-their own. Note that the order in which library functions are named
-on the command line will affect the order in which their `BEGIN' and
-`END' rules will be executed. Therefore you have to be careful how
-you write your library functions. (*Note Command Line::, for more
-information on using library functions.)
-
-If an `awk' program only has a `BEGIN' rule, and no other rules, then
-the program will exit after the `BEGIN' rule has been run. Older
-versions of `awk' used to read their input until end of file was
-seen. However, if an `END' rule exists as well, then the input will
-be read, even if there are no other rules in the program.
-
-`BEGIN' and `END' rules must have actions; there is no default action
-for these rules since there is no current record when they run.
-
-
-
-File: gawk-info, Node: Boolean, Next: Conditional Patterns, Prev: BEGIN/END, Up: Patterns
-
-Boolean Operators and Patterns
-==============================
-
-A boolean pattern is a combination of other patterns using the
-boolean operators ``or'' (`||'), ``and'' (`&&'), and ``not'' (`!'),
-along with parentheses to control nesting. Whether the boolean
-pattern matches an input record is computed from whether its
-subpatterns match.
-
-The subpatterns of a boolean pattern can be regular expressions,
-matching expressions, comparisons, or other boolean combinations of
-such. Range patterns cannot appear inside boolean operators, since
-they don't make sense for classifying a single record, and neither
-can the special patterns `BEGIN' and `END', which never match any
-input record.
-
-Here are descriptions of the three boolean operators.
-
-`PAT1 && PAT2'
- Matches if both PAT1 and PAT2 match by themselves. For example,
- the following command prints all records in the input file
- `BBS-list' that contain both `2400' and `foo'.
-
- awk '/2400/ && /foo/' BBS-list
-
- Whether PAT2 matches is tested only if PAT1 succeeds. This can
- make a difference when PAT2 contains expressions that have side
- effects: in the case of `/foo/ && ($2 == bar++)', the variable
- `bar' is not incremented if there is no `foo' in the record.
-
-`PAT1 || PAT2'
- Matches if at least one of PAT1 and PAT2 matches the current
- input record. For example, the following command prints all
- records in the input file `BBS-list' that contain *either*
- `2400' or `foo', or both.
-
- awk '/2400/ || /foo/' BBS-list
-
- Whether PAT2 matches is tested only if PAT1 fails to match.
- This can make a difference when PAT2 contains expressions that
- have side effects.
-
-`!PAT'
- Matches if PAT does not match. For example, the following
- command prints all records in the input file `BBS-list' that do
- *not* contain the string `foo'.
-
- awk '! /foo/' BBS-list
-
-Note that boolean patterns are built from other patterns just as
-boolean expressions are built from other expressions (*note Boolean
-Ops::.). Any boolean expression is also a valid boolean pattern.
-But the converse is not true: simple regular expression patterns such
-as `/foo/' are not allowed in boolean expressions. Regular
-expressions can appear in boolean expressions only in conjunction
-with the matching operators, `~' and `!~'.
-
-
-
-File: gawk-info, Node: Conditional Patterns, Prev: Boolean, Up: Patterns
-
-Conditional Patterns
-====================
-
-Patterns may use a "conditional expression" much like the conditional
-expression of the C language. This takes the form:
-
- PAT1 ? PAT2 : PAT3
-
-The first pattern is evaluated. If it evaluates to TRUE, then the
-input record is tested against PAT2. Otherwise it is tested against
-PAT3. The conditional pattern matches if PAT2 or PAT3 (whichever one
-is selected) matches.
-
-
-
-File: gawk-info, Node: Actions, Next: Expressions, Prev: Patterns, Up: Top
-
-Actions: The Basics
-*******************
-
-The "action" part of an `awk' rule tells `awk' what to do once a
-match for the pattern is found. An action consists of one or more
-`awk' "statements", enclosed in curly braces (`{' and `}'). The
-curly braces must be used even if the action contains only one
-statement, or even if it contains no statements at all. Action
-statements are separated by newlines or semicolons.
-
-Besides the print statements already covered (*note Printing::.),
-there are four kinds of action statements: expressions, control
-statements, compound statements, and function definitions.
-
- * "Expressions" include assignments, arithmetic, function calls,
- and more (*note Expressions::.).
-
- * "Control statements" specify the control flow of `awk' programs.
- The `awk' language gives you C--like constructs (`if', `for',
- `while', and so on) as well as a few special ones (*note
- Statements::.).
-
- * A "compound statement" is just one or more `awk' statements
- enclosed in curly braces. This way you can group several
- statements to form the body of an `if' or similar statement.
-
- * You can define "user--defined functions" for use elsewhere in
- the `awk' program (*note User-defined::.).
-
-
-
-File: gawk-info, Node: Expressions, Next: Statements, Prev: Actions, Up: Top
-
-Actions: Expressions
-********************
-
-Expressions are the basic building block of `awk' actions. An
-expression evaluates to a value, which you can print, test, store in
-a variable or pass to a function.
-
-But, beyond that, an expression can assign a new value to a variable
-or a field, with an assignment operator.
-
-An expression can serve as a statement on its own. Most other action
-statements are made up of various combinations of expressions. As in
-other languages, expressions in `awk' include variables, array
-references, constants, and function calls, as well as combinations of
-these with various operators.
-
-* Menu:
-
-* Constants:: String and numeric constants.
-* Variables:: Variables give names to values for future use.
-* Fields:: Field references such as `$1' are also expressions.
-* Arrays:: Array element references are expressions.
-
-* Arithmetic Ops:: Arithmetic operations (`+', `-', etc.)
-* Concatenation:: Concatenating strings.
-* Comparison Ops:: Comparison of numbers and strings with `<', etc.
-* Boolean Ops:: Combining comparison expressions using boolean operators
- `||' (``or''), `&&' (``and'') and `!' (``not'').
-
-* Assignment Ops:: Changing the value of a variable or a field.
-* Increment Ops:: Incrementing the numeric value of a variable.
-
-* Conversion:: The conversion of strings to numbers and vice versa.
-* Conditional Exp:: Conditional expressions select between two subexpressions
- under control of a third subexpression.
-* Function Calls:: A function call is an expression.
-
-
-
-File: gawk-info, Node: Constants, Next: Variables, Up: Expressions
-
-Constant Expressions
-====================
-
-There are two types of constants: numeric constants and string
-constants.
-
-The "numeric constant" is a number. This number can be an integer, a
-decimal fraction, or a number in scientific (exponential) notation.
-Note that all numeric values are represented within `awk' in
-double--precision floating point. Here are some examples of numeric
-constants, which all have the same value:
-
- 105
- 1.05e+2
- 1050e-1
-
-A string constant consists of a sequence of characters enclosed in
-double--quote marks. For example:
-
- "parrot"
-
-represents the string constant `parrot'. Strings in `gawk' can be of
-any length and they can contain all the possible 8--bit ASCII
-characters including ASCII NUL. Other `awk' implementations may have
-difficulty with some character codes.
-
-Some characters cannot be included literally in a string. You
-represent them instead with "escape sequences", which are character
-sequences beginning with a backslash (`\').
-
-One use of the backslash is to include double--quote characters in a
-string. Since a plain double--quote would end the string, you must
-use `\"'. Backslash itself is another character that can't be
-included normally; you write `\\' to put one backslash in the string.
-
-Another use of backslash is to represent unprintable characters such
-as newline. While there is nothing to stop you from writing these
-characters directly in an `awk' program, they may look ugly.
-
-`\b'
- Represents a backspaced, H'.
-
-`\f'
- Represents a formfeed, L'.
-
-`\n'
- Represents a newline, J'.
-
-`\r'
- Represents a carriage return, M'.
-
-`\t'
- Represents a horizontal tab, I'.
-
-`\v'
- Represents a vertical tab, K'.
-
-`\NNN'
- Represents the octal value NNN, where NNN is one to three digits
- between 0 and 7. For example, the code for the ASCII ESC
- (escape) character is `\033'.
-
-
-
-File: gawk-info, Node: Variables, Next: Arithmetic Ops, Prev: Constants, Up: Expressions
-
-Variables
-=========
-
-Variables let you give names to values and refer to them later. You
-have already seen variables in many of the examples. The name of a
-variable must be a sequence of letters, digits and underscores, but
-it may not begin with a digit. Case is significant in variable
-names; `a' and `A' are distinct variables.
-
-A variable name is a valid expression by itself; it represents the
-variable's current value. Variables are given new values with
-"assignment operators" and "increment operators". *Note Assignment
-Ops::.
-
-A few variables have special built--in meanings, such as `FS', the
-field separator, and `NF', the number of fields in the current input
-record. *Note Special::, for a list of them. Special variables can
-be used and assigned just like all other variables, but their values
-are also used or changed automatically by `awk'. Each special
-variable's name is made entirely of upper case letters.
-
-Variables in `awk' can be assigned either numeric values or string
-values. By default, variables are initialized to the null string,
-which has the numeric value zero. So there is no need to
-``initialize'' each variable explicitly in `awk', the way you would
-need to do in C or most other traditional programming languages.
-
-
-
-File: gawk-info, Node: Arithmetic Ops, Next: Concatenation, Prev: Variables, Up: Expressions
-
-Arithmetic Operators
-====================
-
-The `awk' language uses the common arithmetic operators when
-evaluating expressions. All of these arithmetic operators follow
-normal precedence rules, and work as you would expect them to. This
-example divides field 3 by field 4, adds field 2, stores the result
-into field 1, and prints the results:
-
- awk '{ $1 = $2 + $3 / $4; print }' inventory-shipped
-
-The arithmetic operators in `awk' are:
-
-`X + Y'
- Addition.
-
-`X - Y'
- Subtraction.
-
-`- X'
- Negation.
-
-`X / Y'
- Division. Since all numbers in `awk' are double--precision
- floating point, the result is not rounded to an integer: `3 / 4'
- has the value 0.75.
-
-`X * Y'
- Multiplication.
-
-`X % Y'
- Remainder. The quotient is rounded toward zero to an integer,
- multiplied by Y and this result is subtracted from X. This
- operation is sometimes known as ``trunc--mod''. The following
- relation always holds:
-
- `b * int(a / b) + (a % b) == a'
-
- One undesirable effect of this definition of remainder is that X
- % Y is negative if X is negative. Thus,
-
- -17 % 8 = -1
-
-`X ^ Y'
-`X ** Y'
- Exponentiation: X raised to the Y power. `2 ^ 3' has the value
- 8. The character sequence `**' is equivalent to `^'.
-
-
-
-File: gawk-info, Node: Concatenation, Next: Comparison Ops, Prev: Arithmetic Ops, Up: Expressions
-
-String Concatenation
-====================
-
-There is only one string operation: concatenation. It does not have
-a specific operator to represent it. Instead, concatenation is
-performed by writing expressions next to one another, with no
-operator. For example:
-
- awk '{ print "Field number one: " $1 }' BBS-list
-
-produces, for the first record in `BBS-list':
-
- Field number one: aardvark
-
-If you hadn't put the space after the `:', the line would have run
-together. For example:
-
- awk '{ print "Field number one:" $1 }' BBS-list
-
-produces, for the first record in `BBS-list':
-
- Field number one:aardvark
-
-
-
-File: gawk-info, Node: Comparison Ops, Next: Boolean Ops, Prev: Concatenation, Up: Expressions
-
-Comparison Expressions
-======================
-
-"Comparison expressions" use "relational operators" to compare
-strings or numbers. The relational operators are the same as in C.
-Here is a table of them:
-
-`X < Y'
- True if X is less than Y.
-
-`X <= Y'
- True if X is less than or equal to Y.
-
-`X > Y'
- True if X is greater than Y.
-
-`X >= Y'
- True if X is greater than or equal to Y.
-
-`X == Y'
- True if X is equal to Y.
-
-`X != Y'
- True if X is not equal to Y.
-
-`X ~ REGEXP'
- True if regexp REGEXP matches the string X.
-
-`X !~ REGEXP'
- True if regexp REGEXP does not match the string X.
-
-`SUBSCRIPT in ARRAY'
- True if array ARRAY has an element with the subscript SUBSCRIPT.
-
-Comparison expressions have the value 1 if true and 0 if false.
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (*note Conversion::.). Strings are compared by comparing the
-first character of each, then the second character of each, and so on.
-Thus, `"10"' is less than `"9"'.
-
-For example,
-
- $1 == "foo"
-
-has the value of 1, or is true, if the first field of the current
-input record is precisely `foo'. By contrast,
-
- $1 ~ /foo/
-
-has the value 1 if the first field contains `foo'.
-
-
-
-File: gawk-info, Node: Boolean Ops, Next: Assignment Ops, Prev: Comparison Ops, Up: Expressions
-
-Boolean Operators
-=================
-
-A boolean expression is combination of comparison expressions or
-matching expressions, using the boolean operators ``or'' (`||'),
-``and'' (`&&'), and ``not'' (`!'), along with parentheses to control
-nesting. The truth of the boolean expression is computed by
-combining the truth values of the component expressions.
-
-Boolean expressions can be used wherever comparison and matching
-expressions can be used. They can be used in `if' and `while'
-statements. They have numeric values (1 if true, 0 if false).
-
-In addition, every boolean expression is also a valid boolean
-pattern, so you can use it as a pattern to control the execution of
-rules.
-
-Here are descriptions of the three boolean operators, with an example
-of each. It may be instructive to compare these examples with the
-analogous examples of boolean patterns (*note Boolean::.), which use
-the same boolean operators in patterns instead of expressions.
-
-`BOOLEAN1 && BOOLEAN2'
- True if both BOOLEAN1 and BOOLEAN2 are true. For example, the
- following statement prints the current input record if it
- contains both `2400' and `foo'.
-
- if ($0 ~ /2400/ && $0 ~ /foo/) print
-
- The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is
- true. This can make a difference when BOOLEAN2 contains
- expressions that have side effects: in the case of `$0 ~ /foo/
- && ($2 == bar++)', the variable `bar' is not incremented if
- there is no `foo' in the record.
-
-`BOOLEAN1 || BOOLEAN2'
- True if at least one of BOOLEAN1 and BOOLEAN2 is true. For
- example, the following command prints all records in the input
- file `BBS-list' that contain *either* `2400' or `foo', or both.
-
- awk '{ if ($0 ~ /2400/ || $0 ~ /foo/) print }' BBS-list
-
- The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is
- true. This can make a difference when BOOLEAN2 contains
- expressions that have side effects.
-
-`!BOOLEAN'
- True if BOOLEAN is false. For example, the following program
- prints all records in the input file `BBS-list' that do *not*
- contain the string `foo'.
-
- awk '{ if (! ($0 ~ /foo/)) print }' BBS-list
-
-
-
-File: gawk-info, Node: Assignment Ops, Next: Increment Ops, Prev: Boolean Ops, Up: Expressions
-
-Assignment Operators
-====================
-
-An "assignment" is an expression that stores a new value into a
-variable. For example, let's assign the value 1 to the variable `z':
-
- z = 1
-
-After this expression is executed, the variable `z' has the value 1.
-Whatever old value `z' had before the assignment is forgotten.
-
-The `=' sign is called an "assignment operator". It is the simplest
-assignment operator because the value of the right--hand operand is
-stored unchanged.
-
-The left--hand operand of an assignment can be a variable (*note
-Variables::.), a field (*note Changing Fields::.) or an array element
-(*note Arrays::.). These are all called "lvalues", which means they
-can appear on the left side of an assignment operator. The
-right--hand operand may be any expression; it produces the new value
-which the assignment stores in the specified variable, field or array
-element.
-
-Assignments can store string values also. For example, this would
-store the value `"this food is good"' in the variable `message':
-
- thing = "food"
- predicate = "good"
- message = "this " thing " is " predicate
-
-(This also illustrates concatenation of strings.)
-
-It is important to note that variables do *not* have permanent types.
-The type of a variable is simply the type of whatever value it
-happens to hold at the moment. In the following program fragment,
-the variable `foo' has a numeric value at first, and a string value
-later on:
-
- foo = 1
- print foo
- foo = "bar"
- print foo
-
-When the second assignment gives `foo' a string value, the fact that
-it previously had a numeric value is forgotten.
-
-An assignment is an expression, so it has a value: the same value
-that is assigned. Thus, `z = 1' as an expression has the value 1.
-One consequence of this is that you can write multiple assignments
-together:
-
- x = y = z = 0
-
-stores the value 0 in all three variables. It does this because the
-value of `z = 0', which is 0, is stored into `y', and then the value
-of `y = z = 0', which is 0, is stored into `x'.
-
-You can use an assignment anywhere an expression is called for. For
-example, it is valid to write `x != (y = 1)' to set `y' to 1 and then
-test whether `x' equals 1. But this style tends to make programs
-hard to read; except in a one--shot program, you should rewrite it to
-get rid of such nesting of assignments. This is never very hard.
-
-Aside from `=', there are several other assignment operators that do
-arithmetic with the old value of the variable. For example, the
-operator `+=' computes a new value by adding the right--hand value to
-the old value of the variable. Thus, the following assignment adds 5
-to the value of `foo':
-
- foo += 5
-
-This is precisely equivalent to the following:
-
- foo = foo + 5
-
-Use whichever one makes the meaning of your program clearer.
-
-Here is a table of the arithmetic assignment operators. In each
-case, the right--hand operand is an expression whose value is
-converted to a number.
-
-`LVALUE += INCREMENT'
- Adds INCREMENT to the value of LVALUE to make the new value of
- LVALUE.
-
-`LVALUE -= DECREMENT'
- Subtracts DECREMENT from the value of LVALUE.
-
-`LVALUE *= COEFFICIENT'
- Multiplies the value of LVALUE by COEFFICIENT.
-
-`LVALUE /= QUOTIENT'
- Divides the value of LVALUE by QUOTIENT.
-
-`LVALUE %= MODULUS'
- Sets LVALUE to its remainder by MODULUS.
-
-`LVALUE ^= POWER'
-`LVALUE **= POWER'
- Raises LVALUE to the power POWER.
-
-
-
-File: gawk-info, Node: Increment Ops, Next: Conversion, Prev: Assignment Ops, Up: Expressions
-
-Increment Operators
-===================
-
-"Increment operators" increase or decrease the value of a variable by
-1. You could do the same thing with an assignment operator, so the
-increment operators add no power to the `awk' language; but they are
-convenient abbreviations for something very common.
-
-The operator to add 1 is written `++'. There are two ways to use
-this operator: pre--incrementation and post--incrementation.
-
-To pre--increment a variable V, write `++V'. This adds 1 to the
-value of V and that new value is also the value of this expression.
-The assignment expression `V += 1' is completely equivalent.
-
-Writing the `++' after the variable specifies post--increment. This
-increments the variable value just the same; the difference is that
-the value of the increment expression itself is the variable's *old*
-value. Thus, if `foo' has value 4, then the expression `foo++' has
-the value 4, but it changes the value of `foo' to 5.
-
-The post--increment `foo++' is nearly equivalent to writing `(foo +=
-1) - 1'. It is not perfectly equivalent because all numbers in `awk'
-are floating point: in floating point, `foo + 1 - 1' does not
-necessarily equal `foo'. But the difference will be minute as long
-as you stick to numbers that are fairly small (less than a trillion).
-
-Any lvalue can be incremented. Fields and array elements are
-incremented just like variables.
-
-The decrement operator `--' works just like `++' except that it
-subtracts 1 instead of adding. Like `++', it can be used before the
-lvalue to pre--decrement or after it to post--decrement.
-
-Here is a summary of increment and decrement expressions.
-
-`++LVALUE'
- This expression increments LVALUE and the new value becomes the
- value of this expression.
-
-`LVALUE++'
- This expression causes the contents of LVALUE to be incremented.
- The value of the expression is the *old* value of LVALUE.
-
-`--LVALUE'
- Like `++LVALUE', but instead of adding, it subtracts. It
- decrements LVALUE and delivers the value that results.
-
-`LVALUE--'
- Like `LVALUE++', but instead of adding, it subtracts. It
- decrements LVALUE. The value of the expression is the *old*
- value of LVALUE.
-
-
-
-File: gawk-info, Node: Conversion, Next: Conditional Exp, Prev: Increment Ops, Up: Expressions
-
-Conversion of Strings and Numbers
-=================================
-
-Strings are converted to numbers, and numbers to strings, if the
-context of your `awk' statement demands it. For example, if the
-values of `foo' or `bar' in the expression `foo + bar' happen to be
-strings, they are converted to numbers before the addition is
-performed. If numeric values appear in string concatenation, they
-are converted to strings. Consider this:
-
- two = 2; three = 3
- print (two three) + 4
-
-This eventually prints the (numeric) value `27'. The numeric
-variables `two' and `three' are converted to strings and concatenated
-together, and the resulting string is converted back to a number
-before adding `4'. The resulting numeric value `27' is printed.
-
-If, for some reason, you need to force a number to be converted to a
-string, concatenate the null string with that number. To force a
-string to be converted to a number, add zero to that string. Strings
-that can't be interpreted as valid numbers are given the numeric
-value zero.
-
-The exact manner in which numbers are converted into strings is
-controlled by the `awk' special variable `OFMT' (*note Special::.).
-Numbers are converted using a special version of the `sprintf'
-function (*note Built-in::.) with `OFMT' as the format specifier.
-
-`OFMT''s default value is `"%.6g"', which prints a value with at
-least six significant digits. You might want to change it to specify
-more precision, if your version of `awk' uses double precision
-arithmetic. Double precision on most modern machines gives you 16 or
-17 decimal digits of precision.
-
-Strange results can happen if you set `OFMT' to a string that doesn't
-tell `sprintf' how to format floating point numbers in a useful way.
-For example, if you forget the `%' in the format, all numbers will be
-converted to the same constant string.
-
-
-
-File: gawk-info, Node: Conditional Exp, Next: Function Calls, Prev: Conversion, Up: Expressions
-
-Conditional Expressions
-=======================
-
-A "conditional expression" is a special kind of expression with three
-operands. It allows you to use one expression's value to select one
-of two other expressions.
-
-The conditional expression looks the same as in the C language:
-
- SELECTOR ? IF-TRUE-EXP : IF-FALSE-EXP
-
-There are three subexpressions. The first, SELECTOR, is always
-computed first. If it is ``true'' (not zero) then IF-TRUE-EXP is
-computed next and its value becomes the value of the whole expression.
-Otherwise, IF-FALSE-EXP is computed next and its value becomes the
-value of the whole expression.
-
-For example, this expression produces the absolute value of `x':
-
- x > 0 ? x : -x
-
-Each time the conditional expression is computed, exactly one of
-IF-TRUE-EXP and IF-FALSE-EXP is computed; the other is ignored. This
-is important when the expressions contain side effects. For example,
-this conditional expression examines element `i' of either array `a'
-or array `b', and increments `i'.
-
- x == y ? a[i++] : b[i++]
-
-This is guaranteed to increment `i' exactly once, because each time
-one or the other of the two increment expressions will be executed
-and the other will not be.
-
-
-
-File: gawk-info, Node: Function Calls, Prev: Conditional Exp, Up: Expressions
-
-Function Calls
-==============
-
-A "function" is a name for a particular calculation. Because it has
-a name, you can ask for it by name at any point in the program. For
-example, the function `sqrt' computes the square root of a number.
-
-A fixed set of functions are "built in", which means they are
-available in every `awk' program. The `sqrt' function is one of
-these. *Note Built-in::, for a list of built--in functions and their
-descriptions. In addition, you can define your own functions in the
-program for use elsewhere in the same program. *Note User-defined::,
-for how to do this.
-
-The way to use a function is with a "function call" expression, which
-consists of the function name followed by a list of "arguments" in
-parentheses. The arguments are expressions which give the raw
-materials for the calculation that the function will do. When there
-is more than one argument, they are separated by commas. If there
-are no arguments, write just `()' after the function name.
-
-*Do not put any space between the function name and the
-open--parenthesis!* A user--defined function name looks just like
-the name of a variable, and space would make the expression look like
-concatenation of a variable with an expression inside parentheses.
-Space before the parenthesis is harmless with built--in functions,
-but it is best not to get into the habit of using space, lest you do
-likewise for a user--defined function one day by mistake.
-
-Each function needs a particular number of arguments. For example,
-the `sqrt' function must be called with a single argument, like this:
-
- sqrt(ARGUMENT)
-
-The argument is the number to take the square root of.
-
-Some of the built--in functions allow you to omit the final argument.
-If you do so, they will use a reasonable default. *Note Built-in::,
-for full details. If arguments are omitted in calls to user--defined
-functions, then those arguments are treated as local variables,
-initialized to the null string (*note User-defined::.).
-
-Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of `sqrt(ARGUMENT)' is the square root of the
-argument. A function can also have side effects, such as assigning
-the values of certain variables or doing I/O.
-
-Here is a command to read numbers, one number per line, and print the
-square root of each one:
-
- awk '{ print "The square root of", $1, "is", sqrt($1) }'
-
-
-
-File: gawk-info, Node: Statements, Next: Arrays, Prev: Expressions, Up: Top
-
-Actions: Statements
-*******************
-
-"Control statements" such as `if', `while', and so on control the
-flow of execution in `awk' programs. Most of the control statements
-in `awk' are patterned on similar statements in C.
-
-The simplest kind of statement is an expression. The other kinds of
-statements start with special keywords such as `if' and `while', to
-distinguish them from simple expressions.
-
-In all the examples in this chapter, BODY can be either a single
-statement or a group of statements. Groups of statements are
-enclosed in braces, and separated by newlines or semicolons.
-
-* Menu:
-
-* Expressions:: One kind of statement simply computes an expression.
-
-* If:: Conditionally execute some `awk' statements.
-
-* While:: Loop until some condition is satisfied.
-
-* Do:: Do specified action while looping until some
- condition is satisfied.
-
-* For:: Another looping statement, that provides
- initialization and increment clauses.
-
-* Break:: Immediately exit the innermost enclosing loop.
-
-* Continue:: Skip to the end of the innermost enclosing loop.
-
-* Next:: Stop processing the current input record.
-
-* Exit:: Stop execution of `awk'.
-
-
-
-File: gawk-info, Node: If, Next: While, Up: Statements
-
-The `if' Statement
-==================
-
-The `if'-`else' statement is `awk''s decision--making statement. The
-`else' part of the statement is optional.
-
- `if (CONDITION) BODY1 else BODY2'
-
-Here CONDITION is an expression that controls what the rest of the
-statement will do. If CONDITION is true, BODY1 is executed;
-otherwise, BODY2 is executed (assuming that the `else' clause is
-present). The condition is considered true if it is nonzero or
-nonnull.
-
-Here is an example:
-
- awk '{ if (x % 2 == 0)
- print "x is even"
- else
- print "x is odd" }'
-
-In this example, if the statement containing `x' is found to be true
-(that is, x is divisible by 2), then the first `print' statement is
-executed, otherwise the second `print' statement is performed.
-
-If the `else' appears on the same line as BODY1, and BODY1 is a
-single statement, then a semicolon must separate BODY1 from `else'.
-To illustrate this, let's rewrite the previous example:
-
- awk '{ if (x % 2 == 0) print "x is even"; else
- print "x is odd" }'
-
-If you forget the `;', `awk' won't be able to parse it, and you will
-get a syntax error.
-
-We would not actually write this example this way, because a human
-reader might fail to see the `else' if it were not the first thing on
-its line.
-
-
-
-File: gawk-info, Node: While, Next: Do, Prev: If, Up: Statements
-
-The `while' Statement
-=====================
-
-In programming, a loop means a part of a program that is (or at least
-can be) executed two or more times in succession.
-
-The `while' statement is the simplest looping statement in `awk'. It
-repeatedly executes a statement as long as a condition is true. It
-looks like this:
-
- while (CONDITION)
- BODY
-
-Here BODY is a statement that we call the "body" of the loop, and
-CONDITION is an expression that controls how long the loop keeps
-running.
-
-The first thing the `while' statement does is test CONDITION. If
-CONDITION is true, it executes the statement BODY. After BODY has
-been executed, CONDITION is tested again and this process is repeated
-until CONDITION is no longer true. If CONDITION is initially false,
-the body of the loop is never executed.
-
- awk '{ i = 1
- while (i <= 3) {
- print $i
- i++
- }
- }'
-
-This example prints the first three input fields, one per line.
-
-The loop works like this: first, the value of `i' is set to 1. Then,
-the `while' tests whether `i' is less than or equal to three. This
-is the case when `i' equals one, so the `i'-th field is printed.
-Then the `i++' increments the value of `i' and the loop repeats.
-
-When `i' reaches 4, the loop exits. Here BODY is a compound
-statement enclosed in braces. As you can see, a newline is not
-required between the condition and the body; but using one makes the
-program clearer unless the body is a compound statement or is very
-simple.
-
-
-
-File: gawk-info, Node: Do, Next: For, Prev: While, Up: Statements
-
-The `do'--`while' Statement
-===========================
-
-The `do' loop is a variation of the `while' looping statement. The
-`do' loop executes the BODY once, then repeats BODY as long as
-CONDITION is true. It looks like this:
-
- do
- BODY
- while (CONDITION)
-
-Even if CONDITION is false at the start, BODY is executed at least
-once (and only once, unless executing BODY makes CONDITION true).
-Contrast this with the corresponding `while' statement:
-
- while (CONDITION)
- BODY
-
-This statement will not execute BODY even once if CONDITION is false
-to begin with.
-
-Here is an example of a `do' statement:
-
- awk '{ i = 1
- do {
- print $0
- i++
- } while (i <= 10)
- }'
-
-prints each input record ten times. It isn't a very realistic
-example, since in this case an ordinary `while' would do just as
-well. But this is normal; there is only occasionally a real use for
-a `do' statement.
-
-
diff --git a/gawk-info-4 b/gawk-info-4
deleted file mode 100644
index c8e9b7ee..00000000
--- a/gawk-info-4
+++ /dev/null
@@ -1,1400 +0,0 @@
-Info file gawk-info, produced by Makeinfo, -*- Text -*- from input
-file gawk.texinfo.
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of
-this manual under the conditions for verbatim copying, provided that
-the entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be stated in a
-translation approved by the Foundation.
-
-
-
-File: gawk-info, Node: For, Next: Break, Prev: Do, Up: Statements
-
-The `for' Statement
-===================
-
-The `for' statement makes it more convenient to count iterations of a
-loop. The general form of the `for' statement looks like this:
-
- for (INITIALIZATION; CONDITION; INCREMENT)
- BODY
-
-This statement starts by executing INITIALIZATION. Then, as long as
-CONDITION is true, it repeatedly executes BODY and then INCREMENT.
-Typically INITIALIZATION sets a variable to either zero or one,
-INCREMENT adds 1 to it, and CONDITION compares it against the desired
-number of iterations.
-
-Here is an example of a `for' statement:
-
- awk '{ for (i = 1; i <= 3; i++)
- print $i
- }'
-
-This prints the first three fields of each input record, one field
-per line.
-
-In the `for' statement, BODY stands for any statement, but
-INITIALIZATION, CONDITION and INCREMENT are just expressions. You
-cannot set more than one variable in the INITIALIZATION part unless
-you use a multiple assignment statement such as `x = y = 0', which is
-possible only if all the initial values are equal. (But you can
-initialize additional variables by writing their assignments as
-separate statements preceding the `for' loop.)
-
-The same is true of the INCREMENT part; to increment additional
-variables, you must write separate statements at the end of the loop.
-The C compound expression, using C's comma operator, would be useful
-in this context, but it is not supported in `awk'.
-
-Most often, INCREMENT is an increment expression, as in the example
-above. But this is not required; it can be any expression whatever.
-For example, this statement prints odd numbers from 1 to 100:
-
- # print odd numbers from 1 to 100
- for (i = 1; i <= 100; i += 2)
- print i
-
-Any of the three expressions following `for' may be omitted if you
-don't want it to do anything. Thus, `for (;x > 0;)' is equivalent to
-`while (x > 0)'. If the CONDITION part is empty, it is treated as
-TRUE, effectively yielding an infinite loop.
-
-In most cases, a `for' loop is an abbreviation for a `while' loop, as
-shown here:
-
- INITIALIZATION
- while (CONDITION) {
- BODY
- INCREMENT
- }
-
-(The only exception is when the `continue' statement (*note
-Continue::.) is used inside the loop; changing a `for' statement to a
-`while' statement in this way can change the effect of the `continue'
-statement inside the loop.)
-
-The `awk' language has a `for' statement in addition to a `while'
-statement because often a `for' loop is both less work to type and
-more natural to think of. Counting the number of iterations is very
-common in loops. It can be easier to think of this counting as part
-of looping rather than as something to do inside the loop.
-
-The next section has more complicated examples of `for' loops.
-
-There is an alternate version of the `for' loop, for iterating over
-all the indices of an array:
-
- for (i in array)
- PROCESS array[i]
-
-*Note Arrays::, for more information on this version of the `for' loop.
-
-
-
-File: gawk-info, Node: Break, Next: Continue, Prev: For, Up: Statements
-
-The `break' Statement
-=====================
-
-The `break' statement jumps out of the innermost `for', `while', or
-`do'--`while' loop that encloses it. The following example finds the
-smallest divisor of any number, and also identifies prime numbers:
-
- awk '# find smallest divisor of num
- { num = $1
- for (div = 2; div*div <= num; div++)
- if (num % div == 0)
- break
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num }'
-
-When the remainder is zero in the first `if' statement, `awk'
-immediately "breaks" out of the containing `for' loop. This means
-that `awk' proceeds immediately to the statement following the loop
-and continues processing. (This is very different from the `exit'
-statement (*note Exit::.) which stops the entire `awk' program.)
-
-Here is another program equivalent to the previous one. It
-illustrates how the CONDITION of a `for' or `while' could just as
-well be replaced with a `break' inside an `if':
-
- awk '# find smallest divisor of num
- { num = $1
- for (div = 2; ; div++) {
- if (num % div == 0) {
- printf "Smallest divisor of %d is %d\n", num, div
- break
- }
- if (div*div > num) {
- printf "%d is prime\n", num
- break
- }
- }
- }'
-
-
-
-File: gawk-info, Node: Continue, Next: Next, Prev: Break, Up: Statements
-
-The `continue' Statement
-========================
-
-The `continue' statement, like `break', is used only inside `for',
-`while', and `do'--`while' loops. It skips over the rest of the loop
-body, causing the next cycle around the loop to begin immediately.
-Contrast this with `break', which jumps out of the loop altogether.
-Here is an example:
-
- # print names that don't contain the string "ignore"
-
- # first, save the text of each line
- { names[NR] = $0 }
-
- # print what we're interested in
- END {
- for (x in names) {
- if (names[x] ~ /ignore/)
- continue
- print names[x]
- }
- }
-
-If any of the input records contain the string `ignore', this example
-skips the print statement and continues back to the first statement
-in the loop.
-
-This isn't a practical example of `continue', since it would be just
-as easy to write the loop like this:
-
- for (x in names)
- if (x !~ /ignore/)
- print x
-
-The `continue' statement causes `awk' to skip the rest of what is
-inside a `for' loop, but it resumes execution with the increment part
-of the `for' loop. The following program illustrates this fact:
-
- awk 'BEGIN {
- for (x = 0; x <= 20; x++) {
- if (x == 5)
- continue
- printf ("%d ", x)
- }
- print ""
- }'
-
-This program prints all the numbers from 0 to 20, except for 5, for
-which the `printf' is skipped. Since the increment `x++' is not
-skipped, `x' does not remain stuck at 5.
-
-
-
-File: gawk-info, Node: Next, Next: Exit, Prev: Continue, Up: Statements
-
-The `next' Statement
-====================
-
-The `next' statement forces `awk' to immediately stop processing the
-current record and go on to the next record. This means that no
-further rules are executed for the current record. The rest of the
-current rule's action is not executed either.
-
-Contrast this with the effect of the `getline' function (*note
-Getline::.). That too causes `awk' to read the next record
-immediately, but it does not alter the flow of control in any way.
-So the rest of the current action executes with a new input record.
-
-At the grossest level, `awk' program execution is a loop that reads
-an input record and then tests each rule pattern against it. If you
-think of this loop as a `for' statement whose body contains the
-rules, then the `next' statement is analogous to a `continue'
-statement: it skips to the end of the body of the loop, and executes
-the increment (which reads another record).
-
-For example, if your `awk' program works only on records with four
-fields, and you don't want it to fail when given bad input, you might
-use the following rule near the beginning of the program:
-
- NF != 4 {
- printf ("line %d skipped: doesn't have 4 fields", FNR) > "/dev/tty"
- next
- }
-
-so that the following rules will not see the bad record. The error
-message is redirected to `/dev/tty' (the terminal), so that it won't
-get lost amid the rest of the program's regular output.
-
-
-
-File: gawk-info, Node: Exit, Prev: Next, Up: Statements
-
-The `exit' Statement
-====================
-
-The `exit' statement causes `awk' to immediately stop executing the
-current rule and to stop processing input; any remaining input is
-ignored.
-
-If an `exit' statement is executed from a `BEGIN' rule the program
-stops processing everything immediately. No input records will be
-read. However, if an `END' rule is present, it will be executed
-(*note BEGIN/END::.).
-
-If `exit' is used as part of an `END' rule, it causes the program to
-stop immediately.
-
-An `exit' statement that is part an ordinary rule (that is, not part
-of a `BEGIN' or `END' rule) stops the execution of any further
-automatic rules, but the `END' rule is executed if there is one. If
-you don't want the `END' rule to do its job in this case, you can set
-a variable to nonzero before the `exit' statement, and check that
-variable in the `END' rule.
-
-If an argument is supplied to `exit', its value is used as the exit
-status code for the `awk' process. If no argument is supplied,
-`exit' returns status zero (success).
-
-For example, let's say you've discovered an error condition you
-really don't know how to handle. Conventionally, programs report
-this by exiting with a nonzero status. Your `awk' program can do
-this using an `exit' statement with a nonzero argument. Here's an
-example of this:
-
- BEGIN {
- if (("date" | getline date_now) < 0) {
- print "Can't get system date"
- exit 4
- }
- }
-
-
-
-File: gawk-info, Node: Arrays, Next: Built-in, Prev: Statements, Up: Top
-
-Actions: Using Arrays in `awk'
-******************************
-
-An "array" is a table of various values, called "elements". The
-elements of an array are distinguished by their "indices". Names of
-arrays in `awk' are strings of alphanumeric characters and
-underscores, just like regular variables.
-
-You cannot use the same identifier as both a variable and as an array
-name in one `awk' program.
-
-* Menu:
-
-* Intro: Array Intro. Basic facts abou arrays in `awk'.
-* Reference to Elements:: How to examine one element of an array.
-* Assigning Elements:: How to change an element of an array.
-* Example: Array Example. Sample program explained.
-
-* Scanning an Array:: A variation of the `for' statement. It loops
- through the indices of an array's existing elements.
-
-* Delete:: The `delete' statement removes an element from an array.
-
-* Multi-dimensional:: Emulating multi--dimensional arrays in `awk'.
-* Multi-scanning:: Scanning multi--dimensional arrays.
-
-
-
-File: gawk-info, Node: Array Intro, Next: Reference to Elements, Up: Arrays
-
-Introduction to Arrays
-======================
-
-The `awk' language has one--dimensional "arrays" for storing groups
-of related strings or numbers. Each array must have a name; valid
-array names are the same as valid variable names, and they do
-conflict with variable names: you can't have both an array and a
-variable with the same name at any point in an `awk' program.
-
-Arrays in `awk' superficially resemble arrays in other programming
-languages; but there are fundamental differences. In `awk', you
-don't need to declare the size of an array before you start to use it.
-What's more, in `awk' any number or even a string may be used as an
-array index.
-
-In most other languages, you have to "declare" an array and specify
-how many elements or components it has. In such languages, the
-declaration causes a contiguous block of memory to be allocated for
-that many elements. An index in the array must be a positive
-integer; for example, the index 0 specifies the first element in the
-array, which is actually stored at the beginning of the block of
-memory. Index 1 specifies the second element, which is stored in
-memory right after the first element, and so on. It is impossible to
-add more elements to the array, because it has room for only as many
-elements as you declared. (Some languages have arrays whose first
-index is 1, others require that you specify both the first and last
-index when you declare the array. In such a language, an array could
-be indexed, for example, from -3 to 17.) A contiguous array of four
-elements might look like this, conceptually, if the element values
-are 8, `"foo"', `""' and 30:
-
- +--------+--------+-------+--------+
- | 8 | "foo" | "" | 30 | value
- +--------+--------+-------+--------+
- 0 1 2 3 index
-
-Only the values are stored; the indices are implicit from the order
-of the values. 8 is the value at index 0, because 8 appears in the
-position with 0 elements before it.
-
-Arrays in `awk' are different: they are "associative". This means
-that each array is a collection of pairs: an index, and its
-corresponding array element value:
-
- Element 4 Value 30
- Element 2 Value "foo"
- Element 1 Value 8
- Element 3 Value ""
-
-We have shown the pairs in jumbled order because their order doesn't
-mean anything.
-
-One advantage of an associative array is that new pairs can be added
-at any time. For example, suppose we add to that array a tenth
-element whose value is `"number ten"'. The result is this:
-
- Element 10 Value "number ten"
- Element 4 Value 30
- Element 2 Value "foo"
- Element 1 Value 8
- Element 3 Value ""
-
-Now the array is "sparse" (i.e. some indices are missing): it has
-elements number 4 and 10, but doesn't have an element 5, 6, 7, 8, or 9.
-
-Another consequence of associative arrays is that the indices don't
-have to be positive integers. Any number, or even a string, can be
-an index. For example, here is an array which translates words from
-English into French:
-
- Element "dog" Value "chien"
- Element "cat" Value "chat"
- Element "one" Value "un"
- Element 1 Value "un"
-
-Here we decided to translate the number 1 in both spelled--out and
-numeral form--thus illustrating that a single array can have both
-numbers and strings as indices.
-
-When `awk' creates an array for you, e.g. with the `split' built--in
-function (*note String Functions::.), that array's indices start at
-the number one.
-
-
-
-File: gawk-info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Arrays
-
-Referring to an Array Element
-=============================
-
-The principal way of using an array is to refer to one of its elements.
-An array reference is an expression which looks like this:
-
- ARRAY[INDEX]
-
-Here ARRAY is the name of an array. The expression INDEX is the
-index of the element of the array that you want. The value of the
-array reference is the current value of that array element.
-
-For example, `foo[4.3]' is an expression for the element of array
-`foo' at index 4.3.
-
-If you refer to an array element that has no recorded value, the
-value of the reference is `""', the null string. This includes
-elements to which you have not assigned any value, and elements that
-have been deleted (*note Delete::.). Such a reference automatically
-creates that array element, with the null string as its value. (In
-some cases, this is unfortunate, because it might waste memory inside
-`awk').
-
-You can find out if an element exists in an array at a certain index
-with the expression:
-
- INDEX in ARRAY
-
-This expression tests whether or not the particular index exists,
-without the side effect of creating that element if it is not present.
-The expression has the value 1 (true) if `ARRAY[SUBSCRIPT]' exists,
-and 0 (false) if it does not exist.
-
-For example, to find out whether the array `frequencies' contains the
-subscript `"2"', you would ask:
-
- if ("2" in frequencies) print "Subscript \"2\" is present."
-
-Note that this is *not* a test of whether or not the array
-`frequencies' contains an element whose *value* is `"2"'. (There is
-no way to that except to scan all the elements.) Also, this *does
-not* create `frequencies["2"]', while the following (incorrect)
-alternative would:
-
- if (frequencies["2"] != "") print "Subscript \"2\" is present."
-
-
-
-File: gawk-info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Arrays
-
-Assigning Array Elements
-========================
-
-Array elements are lvalues: they can be assigned values just like
-`awk' variables:
-
- ARRAY[SUBSCRIPT] = VALUE
-
-Here ARRAY is the name of your array. The expression SUBSCRIPT is
-the index of the element of the array that you want to assign a
-value. The expression VALUE is the value you are assigning to that
-element of the array.
-
-
-
-File: gawk-info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Arrays
-
-Basic Example of an Array
-=========================
-
-The following program takes a list of lines, each beginning with a
-line number, and prints them out in order of line number. The line
-numbers are not in order, however, when they are first read: they
-are scrambled. This program sorts the lines by making an array using
-the line numbers as subscripts. It then prints out the lines in
-sorted order of their numbers. It is a very simple program, and will
-get confused if it encounters repeated numbers, gaps, or lines that
-don't begin with a number.
-
- BEGIN {
- max=0
- }
-
- {
- if ($1 > max)
- max = $1
- arr[$1] = $0
- }
-
- END {
- for (x = 1; x <= max; x++)
- print arr[x]
- }
-
-The first rule just initializes the variable `max'. (This is not
-strictly necessary, since an uninitialized variable has the null
-string as its value, and the null string is effectively zero when
-used in a context where a number is required.)
-
-The second rule keeps track of the largest line number seen so far;
-it also stores each line into the array `arr', at an index that is
-the line's number.
-
-The third rule runs after all the input has been read, to print out
-all the lines.
-
-When this program is run with the following input:
-
- 5 I am the Five man
- 2 Who are you? The new number two!
- 4 . . . And four on the floor
- 1 Who is number one?
- 3 I three you.
-
- its output is this:
-
- 1 Who is number one?
- 2 Who are you? The new number two!
- 3 I three you.
- 4 . . . And four on the floor
- 5 I am the Five man
-
-
-
-File: gawk-info, Node: Scanning an Array, Next: Delete, Prev: Array Example, Up: Arrays
-
-Scanning All Elements of an Array
-=================================
-
-In programs that use arrays, often you need a loop that will execute
-once for each element of an array. In other languages, where arrays
-are contiguous and indices are limited to positive integers, this is
-easy: the largest index is one less than the length of the array, and
-you can find all the valid indices by counting from zero up to that
-value. This technique won't do the job in `awk', since any number or
-string may be an array index. So `awk' has a special kind of `for'
-statement for scanning an array:
-
- for (VAR in ARRAY)
- BODY
-
-This loop executes BODY once for each different value that your
-program has previously used as an index in ARRAY, with the variable
-VAR set to that index.
-
-Here is a program that uses this form of the `for' statement. The
-first rule scans the input records and notes which words appear (at
-least once) in the input, by storing a 1 into the array `used' with
-the word as index. The second rule scans the elements of `used' to
-find all the distinct words that appear in the input. It prints each
-word that is more than 10 characters long, and also prints the number
-of such words. *Note Built-in::, for more information on the
-built--in function `length'.
-
- # Record a 1 for each word that is used at least once.
- {
- for (i = 0; i < NF; i++)
- used[$i] = 1
- }
-
- # Find number of distinct words more than 10 characters long.
- END {
- num_long_words = 0
- for (x in used)
- if (length(x) > 10) {
- ++num_long_words
- print x
- }
- print num_long_words, "words longer than 10 characters"
- }
-
-*Note Sample Program::, for a more detailed example of this type.
-
-The order in which elements of the array are accessed by this
-statement is determined by the internal arrangement of the array
-elements within `awk' and cannot be controlled or changed. This can
-lead to problems if new elements are added to ARRAY by statements in
-BODY; you cannot predict whether or not the `for' loop will reach
-them. Similarly, changing VAR inside the loop can produce strange
-results. It is best to avoid such things.
-
-
-
-File: gawk-info, Node: Delete, Next: Multi-dimensional, Prev: Scanning an Array, Up: Arrays
-
-The `delete' Statement
-======================
-
-You can remove an individual element of an array using the `delete'
-statement:
-
- delete ARRAY[INDEX]
-
-When an array element is deleted, it is as if you had never referred
-to it and had never given it any value. Any value the element
-formerly had can no longer be obtained.
-
-Here is an example of deleting elements in an array:
-
- awk '{ for (i in frequencies)
- delete frequencies[i]
- }'
-
-This example removes all the elements from the array `frequencies'.
-
-If you delete an element, the `for' statement to scan the array will
-not report that element, and the `in' operator to check for the
-presence of that element will return 0:
-
- delete foo[4]
- if (4 in foo)
- print "This will never be printed"
-
-
-
-File: gawk-info, Node: Multi-dimensional, Next: Multi-scanning, Prev: Delete, Up: Arrays
-
-Multi--dimensional arrays
-=========================
-
-A multi--dimensional array is an array in which an element is
-identified by a sequence of indices, not a single index. For
-example, a two--dimensional array requires two indices. The usual
-way (in most languages, including `awk') to refer to an element of a
-two--dimensional array named `grid' is with `grid[x,y]'.
-
-Multi--dimensional arrays are supported in `awk' through
-concatenation of indices into one string. What happens is that `awk'
-converts the indices into strings (*note Conversion::.) and
-concatenates them together, with a separator between them. This
-creates a single string that describes the values of the separate
-indices. The combined string is used as a single index into an
-ordinary, one--dimensional array. The separator used is the value of
-the special variable `SUBSEP'.
-
-For example, suppose the value of `SUBSEP' is `","' and the
-expression `foo[5,12]="value"' is executed. The numbers 5 and 12
-will be concatenated with a comma between them, yielding `"5,12"';
-thus, the array element `foo["5,12"]' will be set to `"value"'.
-
-Once the element's value is stored, `awk' has no record of whether it
-was stored with a single index or a sequence of indices. The two
-expressions `foo[5,12]' and `foo[5 SUBSEP 12]' always have the same
-value.
-
-The default value of `SUBSEP' is not a comma; it is the string
-`"\034"', which contains a nonprinting character that is unlikely to
-appear in an `awk' program or in the input data.
-
-The usefulness of choosing an unlikely character comes from the fact
-that index values that contain a string matching `SUBSEP' lead to
-combined strings that are ambiguous. Suppose that `SUBSEP' is a
-comma; then `foo["a,b", "c"]' and `foo["a", "b,c"]' will be
-indistinguishable because both are actually stored as `foo["a,b,c"]'.
-Because `SUBSEP' is `"\034"', such confusion can actually happen only
-when an index contains the character `"\034"', which is a rare event.
-
-You can test whether a particular index--sequence exists in a
-``multi--dimensional'' array with the same operator `in' used for
-single dimensional arrays. Instead of a single index as the
-left--hand operand, write the whole sequence of indices, separated by
-commas, in parentheses:
-
- (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY
-
-The following example treats its input as a two--dimensional array of
-fields; it rotates this array 90 degrees clockwise and prints the
-result. It assumes that all lines have the same number of elements.
-
- awk 'BEGIN {
- max_nf = max_nr = 0
- }
-
- {
- if (max_nf < NF)
- max_nf = NF
- max_nr = NR
- for (x = 1; x <= NF; x++)
- vector[x, NR] = $x
- }
-
- END {
- for (x = 1; x <= max_nf; x++) {
- for (y = max_nr; y >= 1; --y)
- printf("%s ", vector[x, y])
- printf("\n")
- }
- }'
-
-When given the input:
-
- 1 2 3 4 5 6
- 2 3 4 5 6 1
- 3 4 5 6 1 2
- 4 5 6 1 2 3
-
-it produces:
-
- 4 3 2 1
- 5 4 3 2
- 6 5 4 3
- 1 6 5 4
- 2 1 6 5
- 3 2 1 6
-
-
-
-File: gawk-info, Node: Multi-scanning, Prev: Multi-dimensional, Up: Arrays
-
-Scanning Multi--dimensional Arrays
-==================================
-
-There is no special `for' statement for scanning a
-``multi--dimensional'' array; there cannot be one, because in truth
-there are no multi--dimensional arrays or elements; there is only a
-multi--dimensional *way of accessing* an array.
-
-However, if your program has an array that is always accessed as
-multi--dimensional, you can get the effect of scanning it by
-combining the scanning `for' statement (*note Scanning an Array::.)
-with the `split' built--in function (*note String Functions::.). It
-works like this:
-
- for (combined in ARRAY) {
- split (combined, separate, SUBSEP)
- ...
- }
-
-This finds each concatenated, combined index in the array, and splits
-it into the individual indices by breaking it apart where the value
-of `SUBSEP' appears. The split--out indices become the elements of
-the array `separate'.
-
-Thus, suppose you have previously stored in `ARRAY[1, "foo"]'; then
-an element with index `"1\034foo"' exists in ARRAY. (Recall that the
-default value of `SUBSEP' contains the character with code 034.)
-Sooner or later the `for' statement will find that index and do an
-iteration with `combined' set to `"1\034foo"'. Then the `split'
-function will be called as follows:
-
- split ("1\034foo", separate, "\034")
-
-The result of this is to set `separate[1]' to 1 and `separate[2]' to
-`"foo"'. Presto, the original sequence of separate indices has been
-recovered.
-
-
-
-File: gawk-info, Node: Built-in, Next: User-defined, Prev: Arrays, Up: Top
-
-Built--in functions
-*******************
-
-"Built--in" functions are functions always available for your `awk'
-program to call. This chapter defines all the built--in functions
-that exist; some of them are mentioned in other sections, but they
-are summarized here for your convenience. (You can also define new
-functions yourself. *Note User-defined::.)
-
-In most cases, any extra arguments given to built--in functions are
-ignored. The defaults for omitted arguments vary from function to
-function and are described under the individual functions.
-
-The name of a built--in function need not be followed immediately by
-the opening left parenthesis of the arguments; whitespace is allowed.
-However, it is wise to write no space there, since user--defined
-functions do not allow space.
-
-When a function is called, expressions that create the function's
-actual parameters are evaluated completely before the function call
-is performed. For example, in the code fragment:
-
- i = 4
- j = myfunc(i++)
-
-the variable `i' will be set to 5 before `myfunc' is called with a
-value of 4 for its actual parameter.
-
-* Menu:
-
-* Numeric Functions:: Functions that work with numbers,
- including `int', `sin' and `rand'.
-
-* String Functions:: Functions for string manipulation,
- such as `split', `match', and `sprintf'.
-
-* I/O Functions:: Functions for files and shell commands
-
-
-
-File: gawk-info, Node: Numeric Functions, Next: String Functions, Up: Built-in
-
-Numeric Built--in Functions
-===========================
-
-The general syntax of the numeric built--in functions is the same for
-each. Here is an example of that syntax:
-
- awk '# Read input records containing a pair of points: x0, y0, x1, y1.
- # Print the points and the distance between them.
- { printf "%f %f %f %f %f\n", $1, $2, $3, $4,
- sqrt(($2-$1) * ($2-$1) + ($4-$3) * ($4-$3)) }'
-
-This calculates the square root of a calculation that uses the values
-of the fields. It then prints the first four fields of the input
-record and the result of the square root calculation.
-
-Here is the full list of numeric built--in functions:
-
-`int(X)'
- This gives you the integer part of X, truncated toward 0. This
- produces the nearest integer to X, located between X and 0.
-
- For example, `int(3)' is 3, `int(3.9)' is 3, `int(-3.9)' is -3,
- and `int(-3)' is -3 as well.
-
-`sqrt(X)'
- This gives you the positive square root of X. It reports an
- error if X is negative.
-
-`exp(X)'
- This gives you the exponential of X, or reports an error if X is
- out of range. The range of values X can have depends on your
- machine's floating point representation.
-
-`log(X)'
- This gives you the natural logarithm of X, if X is positive;
- otherwise, it reports an error.
-
-`sin(X)'
- This gives you the sine of X, with X in radians.
-
-`cos(X)'
- This gives you the cosine of X, with X in radians.
-
-`atan2(Y, X)'
- This gives you the arctangent of Y/X, with both in radians.
-
-`rand()'
- This gives you a random number. The values of `rand()' are
- uniformly--distributed between 0 and 1. The value is never 0
- and never 1.
-
- Often you want random integers instead. Here is a user--defined
- function you can use to obtain a random nonnegative integer less
- than N:
-
- function randint(n) {
- return int(n * rand())
- }
-
- The multiplication produces a random real number at least 0, and
- less than N. We then make it an integer (using `int') between 0
- and `N-1'.
-
- Here is an example where a similar function is used to produce
- random integers between 1 and N:
-
- awk '
- # Function to roll a simulated die.
- function roll(n) { return 1 + int(rand() * n) }
-
- # Roll 3 six--sided dice and print total number of points.
- {
- printf("%d points\n", roll(6)+roll(6)+roll(6))
- }'
-
- *Note* that `rand()' starts generating numbers from the same
- point, or "seed", each time you run `awk'. This means that the
- same program will produce the same results each time you run it.
- The numbers are random within one `awk' run, but predictable
- from run to run. This is convenient for debugging, but if you
- want a program to do different things each time it is used, you
- must change the seed to a value that will be different in each
- run. To do this, use `srand'.
-
-`srand(X)'
- The function `srand(X)' sets the starting point, or "seed", for
- generating random numbers to the value X.
-
- Each seed value leads to a particular sequence of ``random''
- numbers. Thus, if you set the seed to the same value a second
- time, you will get the same sequence of ``random'' numbers again.
-
- If you omit the argument X, as in `srand()', then the current
- date and time of day are used for a seed. This is the way to
- get random numbers that are truly unpredictable.
-
- The return value of `srand()' is the previous seed. This makes
- it easy to keep track of the seeds for use in consistently
- reproducing sequences of random numbers.
-
-
-
-File: gawk-info, Node: String Functions, Next: I/O Functions, Prev: Numeric Functions, Up: Built-in
-
-Built--in Functions for String Manipulation
-===========================================
-
-`index(IN, FIND)'
- This searches the string IN for the first occurrence of the
- string FIND, and returns the position where that occurrence
- begins in the string IN. For example:
-
- awk 'BEGIN { print index("peanut", "an") }'
-
- prints `3'. If FIND is not found, `index' returns 0.
-
-`length(STRING)'
- This gives you the number of characters in STRING. If STRING is
- a number, the length of the digit string representing that
- number is returned. For example, `length("abcde")' is 5.
- Whereas, `length(15 * 35)' works out to 3. How? Well, 15 * 35
- = 525, and 525 is then converted to the string `"525"', which
- has three characters.
-
-`match(STRING, REGEXP)'
- The `match' function searches the string, STRING, for the
- longest, leftmost substring matched by the regular expression,
- REGEXP. It returns the character position, or "index", of where
- that substring begins (1, if it starts at the beginning of
- STRING). If no match if found, it returns 0.
-
- The `match' function sets the special variable `RSTART' to the
- index. It also sets the special variable `RLENGTH' to the
- length of the matched substring. If no match is found, `RSTART'
- is set to 0, and `RLENGTH' to -1.
-
- For example:
-
- awk '{
- if ($1 == "FIND")
- regex = $2
- else {
- where = match($0, regex)
- if (where)
- print "Match of", regex, "found at", where, "in", $0
- }
- }'
-
- This program looks for lines that match the regular expression
- stored in the variable `regex'. This regular expression can be
- changed. If the first word on a line is `FIND', `regex' is
- changed to be the second word on that line. Therefore, given:
-
- FIND fo*bar
- My program was a foobar
- But none of it would doobar
- FIND Melvin
- JF+KM
- This line is property of The Reality Engineering Co.
- This file was created by Melvin.
-
- `awk' prints:
-
- Match of fo*bar found at 18 in My program was a foobar
- Match of Melvin found at 26 in This file was created by Melvin.
-
-`split(STRING, ARRAY, FIELD_SEPARATOR)'
- This divides STRING up into pieces separated by FIELD_SEPARATOR,
- and stores the pieces in ARRAY. The first piece is stored in
- `ARRAY[1]', the second piece in `ARRAY[2]', and so forth. The
- string value of the third argument, FIELD_SEPARATOR, is used as
- a regexp to search for to find the places to split STRING. If
- the FIELD_SEPARATOR is omitted, the value of `FS' is used.
- `split' returns the number of elements created.
-
- The `split' function, then, splits strings into pieces in a
- manner similar to the way input lines are split into fields.
- For example:
-
- split("auto-da-fe", a, "-")
-
- splits the string `auto-da-fe' into three fields using `-' as
- the separator. It sets the contents of the array `a' as follows:
-
- a[1] = "auto"
- a[2] = "da"
- a[3] = "fe"
-
- The value returned by this call to `split' is 3.
-
-`sprintf(FORMAT, EXPRESSION1,...)'
- This returns (without printing) the string that `printf' would
- have printed out with the same arguments (*note Printf::.). For
- example:
-
- sprintf("pi = %.2f (approx.)", 22/7)
-
- returns the string `"pi = 3.14 (approx.)"'.
-
-`sub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)'
- The `sub' function alters the value of TARGET_VARIABLE. It
- searches this value, which should be a string, for the leftmost
- substring matched by the regular expression, REGEXP, extending
- this match as far as possible. Then the entire string is
- changed by replacing the matched text with REPLACEMENT_STRING.
- The modified string becomes the new value of TARGET_VARIABLE.
-
- This function is peculiar because TARGET_VARIABLE is not simply
- used to compute a value, and not just any expression will do: it
- must be a variable, field or array reference, so that `sub' can
- store a modified value there. If this argument is omitted, then
- the default is to use and alter `$0'.
-
- For example:
-
- str = "water, water, everywhere"
- sub(/at/, "ith", str)
-
- sets `str' to `"wither, water, everywhere"', by replacing the
- leftmost, longest occurrence of `at' with `ith'.
-
- The `sub' function returns the number of substitutions made
- (either one or zero).
-
- The special character, `&', in the replacement string,
- REPLACEMENT_STRING, stands for the precise substring that was
- matched by REGEXP. (If the regexp can match more than one
- string, then this precise substring may vary.) For example:
-
- awk '{ sub(/candidate/, "& and his wife"); print }'
-
- will change the first occurrence of ``candidate'' to ``candidate
- and his wife'' on each input line.
-
- The effect of this special character can be turned off by
- preceding it with a backslash (`\&'). To include a backslash in
- the replacement string, it too must be preceded with a (second)
- backslash.
-
- Note: if you use `sub' with a third argument that is not a
- variable, field or array element reference, then it will still
- search for the pattern and return 0 or 1, but the modified
- string is thrown away because there is no place to put it. For
- example:
-
- sub(/USA/, "United States", "the USA and Canada")
-
- will indeed produce a string `"the United States and Canada"',
- but there will be no way to use that string!
-
-`gsub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)'
- This is similar to the `sub' function, except `gsub' replaces
- *all* of the longest, leftmost, *non--overlapping* matching
- substrings it can find. The ``g'' in `gsub' stands for
- "global", which means replace *everywhere*. For example:
-
- awk '{ gsub(/Britain/, "United Kingdom"); print }'
-
- replaces all occurrences of the string `Britain' with `United
- Kingdom' for all input records.
-
- The `gsub' function returns the number of substitutions made.
- If the variable to be searched and altered, TARGET_VARIABLE, is
- omitted, then the entire input record, `$0', is used.
-
- The characters `&' and `\' are special in `gsub' as they are in
- `sub' (see immediately above).
-
-`substr(STRING, START, LENGTH)'
- This returns a LENGTH--character--long substring of STRING,
- starting at character number START. The first character of a
- string is character number one. For example,
- `substr("washington", 5, 3)' returns `"ing"'.
-
- If LENGTH is not present, this function returns the whole suffix
- of STRING that begins at character number START. For example,
- `substr("washington", 5)' returns `"ington"'.
-
-
-
-File: gawk-info, Node: I/O Functions, Prev: String Functions, Up: Built-in
-
-Built--in Functions for I/O to Files and Commands
-=================================================
-
-`close(FILENAME)'
- Close the file FILENAME. The argument may alternatively be a
- shell command that was used for redirecting to or from a pipe;
- then the pipe is closed.
-
- *Note Close Input::, regarding closing input files and pipes.
- *Note Close Output::, regarding closing output files and pipes.
-
-`system(COMMAND)'
- The system function allows the user to execute operating system
- commands and then return to the `awk' program. The `system'
- function executes the command given by the string value of
- COMMAND. It returns, as its value, the status returned by the
- command that was executed. This is known as returning the "exit
- status".
-
- For example, if the following fragment of code is put in your
- `awk' program:
-
- END {
- system("mail -s 'awk run done' operator < /dev/null")
- }
-
- the system operator will be sent mail when the `awk' program
- finishes processing input and begins its end--of--input
- processing.
-
- Note that much the same result can be obtained by redirecting
- `print' or `printf' into a pipe. However, if your `awk' program
- is interactive, this function is useful for cranking up large
- self--contained programs, such as a shell or an editor.
-
-
-
-File: gawk-info, Node: User-defined, Next: Special, Prev: Built-in, Up: Top
-
-User--defined Functions
-***********************
-
-Complicated `awk' programs can often be simplified by defining your
-own functions. User--defined functions can be called just like
-built--in ones (*note Function Calls::.), but it is up to you to
-define them--to tell `awk' what they should do.
-
-* Menu:
-
-* Definition Syntax:: How to write definitions and what they mean.
-* Function Example:: An example function definition and what it does.
-* Function Caveats:: Things to watch out for.
-* Return Statement:: Specifying the value a function returns.
-
-
-
-File: gawk-info, Node: Definition Syntax, Next: Function Example, Up: User-defined
-
-Syntax of Function Definitions
-==============================
-
-The definition of a function named NAME looks like this:
-
- function NAME (PARAMETER-LIST) {
- BODY-OF-FUNCTION
- }
-
-A valid function name is like a valid variable name: a sequence of
-letters, digits and underscores, not starting with a digit.
-
-Such function definitions can appear anywhere between the rules of
-the `awk' program. The general format of an `awk' program, then, is
-now modified to include sequences of rules *and* user--defined
-function definitions.
-
-The function definition need not precede all the uses of the function.
-This is because `awk' reads the entire program before starting to
-execute any of it.
-
-The PARAMETER-LIST is a list of the function's "local" variable
-names, separated by commas. Within the body of the function, local
-variables refer to arguments with which the function is called. If
-the function is called with fewer arguments than it has local
-variables, this is not an error; the extra local variables are simply
-set as the null string.
-
-The local variable values hide or "shadow" any variables of the same
-names used in the rest of the program. The shadowed variables are
-not accessible in the function definition, because there is no way to
-name them while their names have been taken away for the local
-variables. All other variables used in the `awk' program can be
-referenced or set normally in the function definition.
-
-The local variables last only as long as the function is executing.
-Once the function finishes, the shadowed variables come back.
-
-The BODY-OF-FUNCTION part of the definition is the most important
-part, because this is what says what the function should actually *do*.
-The local variables exist to give the body a way to talk about the
-arguments.
-
-Functions may be "recursive", i.e., they can call themselves, either
-directly, or indirectly (via calling a second function that calls the
-first again).
-
-The keyword `function' may also be written `func'.
-
-
-
-File: gawk-info, Node: Function Example, Next: Function Caveats, Prev: Definition Syntax, Up: User-defined
-
-Function Definition Example
-===========================
-
-Here is an example of a user--defined function, called `myprint',
-that takes a number and prints it in a specific format.
-
- function myprint(num)
- {
- printf "%6.3g\n", num
- }
-
-To illustrate, let's use the following `awk' rule to use, or "call",
-our `myprint' function:
-
- $3 > 0 { myprint($3) }'
-
-This program prints, in our special format, all the third fields that
-contain a positive number in our input. Therefore, when given:
-
- 1.2 3.4 5.6 7.8
- 9.10 11.12 13.14 15.16
- 17.18 19.20 21.22 23.24
-
-this program, using our function to format the results, will print:
-
- 5.6
- 13.1
- 21.2
-
-Here is a rather contrived example of a recursive function. It
-prints a string backwards:
-
- function rev (str, len) {
- if (len == 0) {
- printf "\n"
- return
- }
- printf "%c", substr(str, len, 1)
- rev(str, len - 1)
- }
-
-
-
-File: gawk-info, Node: Function Caveats, Next: Return Statement, Prev: Function Example, Up: User-defined
-
-Caveats of Function Calling
-===========================
-
-*Note* that there cannot be any blanks between the function name and
-the left parenthesis of the argument list, when calling a function.
-This is so `awk' can tell you are not trying to concatenate the value
-of a variable with the value of an expression inside the parentheses.
-
-When a function is called, it is given a *copy* of the values of its
-arguments. This is called "passing by value". The caller may use a
-variable as the expression for the argument, but the called function
-does not know this: all it knows is what value the argument had. For
-example, if you write this code:
-
- foo = "bar"
- z = myfunc(foo)
-
-then you should not think of the argument to `myfunc' as being ``the
-variable `foo'''. Instead, think of the argument as the string
-value, `"bar"'.
-
-If the function `myfunc' alters the values of its local variables,
-this has no effect on any other variables. In particular, if
-`myfunc' does this:
-
- function myfunc (win) {
- print win
- win = "zzz"
- print win
- }
-
-to change its first argument variable `win', this *does not* change
-the value of `foo' in the caller. The role of `foo' in calling
-`myfunc' ended when its value, `"bar"', was computed. If `win' also
-exists outside of `myfunc', this definition will not change it--that
-value is shadowed during the execution of `myfunc' and cannot be seen
-or changed from there.
-
-However, when arrays are the parameters to functions, they are *not*
-copied. Instead, the array itself is made available for direct
-manipulation by the function. This is usually called "passing by
-reference". Changes made to an array parameter inside the body of a
-function *are* visible outside that function. *This can be very
-dangerous if you don't watch what you are doing.* For example:
-
- function changeit (array, ind, nvalue) {
- array[ind] = nvalue
- }
-
- BEGIN {
- a[1] = 1 ; a[2] = 2 ; a[3] = 3
- changeit(a, 2, "two")
- printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
- }
-
-will print `a[1] = 1, a[2] = two, a[3] = 3', because the call to
-`changeit' stores `"two"' in the second element of `a'.
-
-
-
-File: gawk-info, Node: Return Statement, Prev: Function Caveats, Up: User-defined
-
-The `return' statement
-======================
-
-The body of a user--defined function can contain a `return' statement.
-This statement returns control to the rest of the `awk' program. It
-can also be used to return a value for use in the rest of the `awk'
-program. It looks like:
-
- `return EXPRESSION'
-
-The EXPRESSION part is optional. If it is omitted, then the returned
-value is undefined and, therefore, unpredictable.
-
-A `return' statement with no value expression is assumed at the end
-of every function definition. So if control reaches the end of the
-function definition, then the function returns an unpredictable value.
-
-Here is an example of a user--defined function that returns a value
-for the largest number among the elements of an array:
-
- function maxelt (vec, i, ret) {
- for (i in vec) {
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- }
- return ret
- }
-
-You call `maxelt' with one argument, an array name. The local
-variables `i' and `ret' are not intended to be arguments; while there
-is nothing to stop you from passing two or three arguments to
-`maxelt', the results would be strange.
-
-When writing a function definition, it is conventional to separate
-the parameters from the local variables with extra spaces, as shown
-above in the definition of `maxelt'.
-
-Here is a program that uses, or calls, our `maxelt' function. This
-program loads an array, calls `maxelt', and then reports the maximum
-number in that array:
-
- awk '
- function maxelt (vec, i, ret) {
- for (i in vec) {
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- }
- return ret
- }
-
- # Load all fields of each record into nums.
- {
- for(i = 1; i <= NF; i++)
- nums[NR, i] = $i
- }
-
- END {
- print maxelt(nums)
- }'
-
-Given the following input:
-
- 1 5 23 8 16
- 44 3 5 2 8 26
- 256 291 1396 2962 100
- -6 467 998 1101
- 99385 11 0 225
-
-our program tells us (predictably) that:
-
- 99385
-
-is the largest number in our array.
-
-
-
-File: gawk-info, Node: Special, Next: Sample Program, Prev: User-defined, Up: Top
-
-Special Variables
-*****************
-
-Most `awk' variables are available for you to use for your own
-purposes; they will never change except when your program assigns
-them, and will never affect anything except when your program
-examines them.
-
-A few variables have special meanings. Some of them `awk' examines
-automatically, so that they enable you to tell `awk' how to do
-certain things. Others are set automatically by `awk', so that they
-carry information from the internal workings of `awk' to your program.
-
-Most of these variables are also documented in the chapters where
-their areas of activity are described.
-
-* Menu:
-
-* User-modified:: Special variables that you change to control `awk'.
-
-* Auto-set:: Special variables where `awk' gives you information.
-
- \ No newline at end of file
diff --git a/gawk-info-5 b/gawk-info-5
deleted file mode 100644
index fd8d7eec..00000000
--- a/gawk-info-5
+++ /dev/null
@@ -1,960 +0,0 @@
-Info file gawk-info, produced by Makeinfo, -*- Text -*- from input
-file gawk.texinfo.
-
-This file documents `awk', a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of
-this manual under the conditions for verbatim copying, provided that
-the entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be stated in a
-translation approved by the Foundation.
-
-
-
-File: gawk-info, Node: User-modified, Next: Auto-set, Up: Special
-
-Special Variables That Control `awk'
-====================================
-
-This is a list of the variables which you can change to control how
-`awk' does certain things.
-
-`FS'
- `FS' is the input field separator (*note Field Separators::.).
- The value is a regular expression that matches the separations
- between fields in an input record.
-
- The default value is `" "', a string consisting of a single
- space. As a special exception, this value actually means that
- any sequence of spaces and tabs is a single separator. It also
- causes spaces and tabs at the beginning or end of a line to be
- ignored.
-
- You can set the value of `FS' on the command line using the `-F'
- option:
-
- awk -F, 'PROGRAM' INPUT-FILES
-
-`OFMT'
- This string is used by `awk' to control conversion of numbers to
- strings (*note Conversion::.). It works by being passed, in
- effect, as the first argument to the `sprintf' function. Its
- default value is `"%.6g"'.
-
-`OFS'
- This is the output field separator (*note Output Separators::.).
- It is output between the fields output by a `print' statement.
- Its default value is `" "', a string consisting of a single space.
-
-`ORS'
- This is the output record separator (*note Output
- Separators::.). It is output at the end of every `print'
- statement. Its default value is the newline character, often
- represented in `awk' programs as `\n'.
-
-`RS'
- This is `awk''s record separator (*note Records::.). Its
- default value is a string containing a single newline character,
- which means that an input record consists of a single line of
- text.
-
-`SUBSEP'
- `SUBSEP' is a subscript separator (*note Multi-dimensional::.).
- It has the default value of `"\034"', and is used to separate
- the parts of the name of a multi--dimensional array. Thus, if
- you access `foo[12,3]', it really accesses `foo["12\0343"]'.
-
-
-
-File: gawk-info, Node: Auto-set, Prev: User-modified, Up: Special
-
-Special Variables That Convey Information to You
-================================================
-
-This is a list of the variables that are set automatically by `awk'
-on certain occasions so as to provide information for your program.
-
-`ARGC'
-`ARGV'
- The command--line arguments available to `awk' are stored in an
- array called `ARGV'. `ARGC' is the number of command--line
- arguments present. `ARGV' is indexed from zero to `ARGC' - 1.
- For example:
-
- awk '{ print ARGV[$1] }' inventory-shipped BBS-list
-
- In this example, `ARGV[0]' contains `"awk"', `ARGV[1]' contains
- `"inventory-shipped"', and `ARGV[2]' contains `"BBS-list"'.
- `ARGC' is 3, one more than the index of the last element in
- `ARGV' since the elements are numbered from zero.
-
- Notice that the `awk' program is not treated as an argument.
- The `-f' `FILENAME' option, and the `-F' option, are also not
- treated as arguments for this purpose.
-
- Variable assignments on the command line *are* treated as
- arguments, and do show up in the `ARGV' array.
-
- Your program can alter `ARGC' the elements of `ARGV'. Each time
- `awk' reaches the end of an input file, it uses the next element
- of `ARGV' as the name of the next input file. By storing a
- different string there, your program can change which files are
- read. You can use `-' to represent the standard input. By
- storing additional elements and incrementing `ARGC' you can
- cause additional files to be read.
-
- If you decrease the value of `ARGC', that eliminates input files
- from the end of the list. By recording the old value of `ARGC'
- elsewhere, your program can treat the eliminated arguments as
- something other than file names.
-
- To eliminate a file from the middle of the list, store the null
- string (`""') into `ARGV' in place of the file's name. As a
- special feature, `awk' ignores file names that have been
- replaced with the null string.
-
-`ENVIRON'
- This is an array that contains the values of the environment.
- The array indices are the environment variable names; the values
- are the values of the particular environment variables. For
- example, `ENVIRON["HOME"]' might be `/u/close'. Changing this
- array does not affect the environment passed on to any programs
- that `awk' may spawn via redirection or the `system' function.
- (This may not work under operating systems other than MS-DOS,
- Unix, or GNU.)
-
-`FILENAME'
- This is the name of the file that `awk' is currently reading.
- If `awk' is reading from the standard input (in other words,
- there are no files listed on the command line), `FILENAME' is
- set to `"-"'. `FILENAME' is changed each time a new file is
- read (*note Reading Files::.).
-
-`FNR'
- `FNR' is the current record number in the current file. `FNR'
- is incremented each time a new record is read (*note Getline::.).
- It is reinitialized to 0 each time a new input file is started.
-
-`NF'
- `NF' is the number of fields in the current input record. `NF'
- is set each time a new record is read, when a new field is
- created, or when $0 changes (*note Fields::.).
-
-`NR'
- This is the number of input records `awk' has processed since
- the beginning of the program's execution. (*note Records::.).
- `NR' is set each time a new record is read.
-
-`RLENGTH'
- `RLENGTH' is the length of the string matched by the `match'
- function (*note String Functions::.). `RLENGTH' is set by
- invoking the `match' function. Its value is the length of the
- matched string, or -1 if no match was found.
-
-`RSTART'
- `RSTART' is the start of the string matched by the `match'
- function (*note String Functions::.). `RSTART' is set by
- invoking the `match' function. Its value is the position of the
- string where the matched string starts, or 0 if no match was
- found.
-
-
-
-File: gawk-info, Node: Sample Program, Next: Notes, Prev: Special, Up: Top
-
-Sample Program
-**************
-
-The following example is a complete `awk' program, which prints the
-number of occurrences of each word in its input. It illustrates the
-associative nature of `awk' arrays by using strings as subscripts.
-It also demonstrates the `for X in ARRAY' construction. Finally, it
-shows how `awk' can be used in conjunction with other utility
-programs to do a useful task of some complexity with a minimum of
-effort. Some explanations follow the program listing.
-
- awk '
- # Print list of word frequencies
- {
- for (i = 1; i <= NF; i++)
- freq[$i]++
- }
-
- END {
- for (word in freq)
- printf "%s\t%d\n", word, freq[word]
- }'
-
-The first thing to notice about this program is that it has two
-rules. The first rule, because it has an empty pattern, is executed
-on every line of the input. It uses `awk''s field--accessing
-mechanism (*note Fields::.) to pick out the individual words from the
-line, and the special variable `NF' (*note Special::.) to know how
-many fields are available.
-
-For each input word, an element of the array `freq' is incremented to
-reflect that the word has been seen an additional time.
-
-The second rule, because it has the pattern `END', is not executed
-until the input has been exhausted. It prints out the contents of
-the `freq' table that has been built up inside the first action.
-
-Note that this program has several problems that would prevent it
-from being useful by itself on real text files:
-
- * Words are detected using the `awk' convention that fields are
- separated by whitespace and that other characters in the input
- (except newlines) don't have any special meaning to `awk'. This
- means that punctuation characters count as part of words.
-
- * The `awk' language considers upper and lower case characters to
- be distinct. Therefore, `foo' and `Foo' will not be treated by
- this program as the same word. This is undesirable since in
- normal text, words are capitalized if they begin sentences, and
- a frequency analyzer should not be sensitive to that.
-
- * The output does not come out in any useful order. You're more
- likely to be interested in which words occur most frequently, or
- having an alphabetized table of how frequently each word occurs.
-
-The way to solve these problems is to use other operating system
-utilities to process the input and output of the `awk' script.
-Suppose the script shown above is saved in the file `frequency.awk'.
-Then the shell command:
-
- tr A-Z a-z < file1 | tr -cd 'a-z\012' \
- | awk -f frequency.awk \
- | sort +1 -nr
-
-produces a table of the words appearing in `file1' in order of
-decreasing frequency.
-
-The first `tr' command in this pipeline translates all the upper case
-characters in `file1' to lower case. The second `tr' command deletes
-all the characters in the input except lower case characters and
-newlines. The second argument to the second `tr' is quoted to
-protect the backslash in it from being interpreted by the shell. The
-`awk' program reads this suitably massaged data and produces a word
-frequency table, which is not ordered.
-
-The `awk' script's output is now sorted by the `sort' command and
-printed on the terminal. The options given to `sort' in this example
-specify to sort by the second field of each input line (skipping one
-field), that the sort keys should be treated as numeric quantities
-(otherwise `15' would come before `5'), and that the sorting should
-be done in descending (reverse) order.
-
-See the general operating system documentation for more information
-on how to use the `tr' and `sort' commands.
-
-
-
-File: gawk-info, Node: Notes, Next: Glossary, Prev: Sample Program, Up: Top
-
-Implementation Notes
-********************
-
-This appendix contains information mainly of interest to implementors
-and maintainers of `gawk'. Everything in it applies specifically to
-`gawk', and not to other implementations.
-
-* Menu:
-
-* Extensions:: Things`gawk' does that Unix `awk' does not.
-
-* Future Extensions:: Things likely to appear in a future release.
-
-* Improvements:: Suggestions for future improvements.
-
-* Manual Improvements:: Suggestions for improvements to this manual.
-
-
-
-File: gawk-info, Node: Extensions, Next: Future Extensions, Up: Notes
-
-GNU Extensions to the AWK Language
-==================================
-
-Several new features are in a state of flux. They are described here
-merely to document them somewhat, but they will probably change. We
-hope they will be incorporated into other versions of `awk', too.
-
-All of these features can be turned off either by compiling `gawk'
-with `-DSTRICT', or by invoking `gawk' as `awk'.
-
-The `AWKPATH' environment variable
- When opening a file supplied via the `-f' option, if the
- filename does not contain a `/', `gawk' will perform a "path
- search" for the file, similar to that performed by the shell.
- `gawk' gets its search path from the `AWKPATH' environment
- variable. If that variable does not exist, it uses the default
- path `".:/usr/lib/awk:/usr/local/lib/awk"'.
-
-Case Independent Matching
- Two new operators have been introduced, `~~', and `!~~'. These
- perform regular expression match and no-match operations that
- are case independent. In other words, `A' and `a' would both
- match `/a/'.
-
-The `-i' option
- This option causes the `~' and `!~' operators to behave like the
- `~~' and `!~~' operators described above.
-
-The `-v' option
- This option prints version information for this particular copy
- of `gawk'. This is so you can determine if your copy of `gawk'
- is up to date with respect to whatever the Free Software
- Foundation is currently distributing. It may disappear in a
- future version of `gawk'.
-
-
-
-File: gawk-info, Node: Future Extensions, Next: Improvements, Prev: Extensions, Up: Notes
-
-Extensions Likely To Appear In A Future Release
-===============================================
-
-Here are some more extensions that indicate the directions we are
-currently considering for `gawk'. Like the previous section, this
-section is also subject to change. None of these are implemented yet.
-
-The `IGNORECASE' special variable
- If `IGNORECASE' is non--zero, then *all* regular expression
- matching will be done in a case--independent fashion. The `-i'
- option and the `~~' and `!~~' operators will go away, as this
- mechanism generalizes those facilities.
-
-More Escape Sequences
- The ANSI C `\a', and `\x' escape sequences will be recognized.
- Unix `awk' does not recognize `\v', although `gawk' does.
-
-`RS' as a regexp
- The meaning of `RS' will be generalized along the lines of `FS'.
-
-Transliteration Functions
- We are planning on adding `toupper' and `tolower' functions
- which will take string arguments, and return strings where the
- case of each letter has been transformed to upper-- or
- lower--case respectively.
-
-Access To System File Descriptors
- `gawk' will recognize the special file names `/dev/stdin',
- `/dev/stdout', `/dev/stderr', and `/dev/fd/N' internally. These
- will allow access to inherited file descriptors from within an
- `awk' program.
-
-
-
-File: gawk-info, Node: Improvements, Next: Manual Improvements, Prev: Future Extensions, Up: Notes
-
-Suggestions for Future Improvements
-===================================
-
-Here are some projects that would--be `gawk' hackers might like to
-take on. They vary in size from a few days to a few weeks of
-programming, depending on which one you choose and how fast a
-programmer you are. Please send any improvements you write to the
-maintainers at the GNU project.
-
- 1. State machine regexp matcher: At present, `gawk' uses the
- backtracking regular expression matcher from the GNU subroutine
- library. If a regexp is really going to be used a lot of times,
- it is faster to convert it once to a description of a finite
- state machine, then run a routine simulating that machine every
- time you want to match the regexp. You could use the matching
- routines used by GNU `egrep'.
-
- 2. Compilation of `awk' programs: `gawk' uses a `Bison'
- (YACC--like) parser to convert the script given it into a syntax
- tree; the syntax tree is then executed by a simple recursive
- evaluator. Both of these steps incur a lot of overhead, since
- parsing can be slow (especially if you also do the previous
- project and convert regular expressions to finite state machines
- at compile time) and the recursive evaluator performs many
- procedure calls to do even the simplest things.
-
- It should be possible for `gawk' to convert the script's parse
- tree into a C program which the user would then compile, using
- the normal C compiler and a special `gawk' library to provide
- all the needed functions (regexps, fields, associative arrays,
- type coercion, and so on).
-
- An easier possibility might be for an intermediate phase of
- `awk' to convert the parse tree into a linear byte code form
- like the one used in GNU Emacs Lisp. The recursive evaluator
- would then be replaced by a straight line byte code interpreter
- that would be intermediate in speed between running a compiled
- program and doing what `gawk' does now.
-
-
-
-File: gawk-info, Node: Manual Improvements, Prev: Improvements, Up: Notes
-
-Suggestions For Future Improvements of This Manual
-==================================================
-
- 1. An error message section has not been included in this version
- of the manual. Perhaps some nice beta testers will document
- some of the messages for the future.
-
- 2. A summary page has not been included, as the ``man'', or help,
- page that comes with the `gawk' code should suffice.
-
- GNU only supports Info, so this manual itself should contain
- whatever forms of information it would be useful to have on an
- Info summary page.
-
- 3. A function and variable index has not been included as we are
- not sure what to put in it.
-
- 4. A section summarizing the differences between V7 `awk' and
- System V Release 4 `awk' would be useful for long--time `awk'
- hackers.
-
-
-
-File: gawk-info, Node: Glossary, Next: Index, Prev: Notes, Up: Top
-
-Glossary
-********
-
-Action
- A series of `awk' statements attached to a rule. If the rule's
- pattern matches an input record, the `awk' language executes the
- rule's action. Actions are always enclosed in curly braces.
-
-Amazing `awk' assembler
- Henry Spencer at the University of Toronto wrote a retargetable
- assembler completely as `awk' scripts. It is thousands of lines
- long, including machine descriptions for several 8--bit
- microcomputers. It is distributed with `gawk' and is a good
- example of a program that would have been better written in
- another language.
-
-Assignment
- An `awk' expression that changes the value of some `awk'
- variable or data object. An object that you can assign to is
- called an "lvalue".
-
-Built-in function
- The `awk' language provides built--in functions that perform
- various numerical and string computations. Examples are `sqrt'
- (for the square root of a number) and `substr' (for a substring
- of a string).
-
-C
- The system programming language that most of GNU is written in.
- The `awk' programming language has C--like syntax, and this
- manual points out similarities between `awk' and C when
- appropriate.
-
-Compound statement
- A series of `awk' statements, enclosed in curly braces.
- Compound statements may be nested.
-
-Concatenation
- Concatenating two strings means sticking them together, one
- after another, giving a new string. For example, the string
- `foo' concatenated with the string `bar' gives the string
- `foobar'.
-
-Conditional expression
- A relation that is either true or false, such as `(a < b)'.
- Conditional expressions are used in `if' and `while' statements,
- and in patterns to select which input records to process.
-
-Curly braces
- The characters `{' and `}'. Curly braces are used in `awk' for
- delimiting actions, compound statements, and function bodies.
-
-Data objects
- These are numbers and strings of characters. Numbers are
- converted into strings and vice versa, as needed.
-
-Escape Sequences
- A special sequence of characters used for describing
- non--printable characters, such as `\n' for newline, or `\033'
- for the ASCII ESC (escape) character.
-
-Field
- When `awk' reads an input record, it splits the record into
- pieces separated by whitespace (or by a separator regexp which
- you can change by setting the special variable `FS'). Such
- pieces are called fields.
-
-Format
- Format strings are used to control the appearance of output in
- the `printf' statement. Also, data conversions from numbers to
- strings are controlled by the format string contained in the
- special variable `OFMT'.
-
-Function
- A specialized group of statements often used to encapsulate
- general or program--specific tasks. `awk' has a number of
- built--in functions, and also allows you to define your own.
-
-`gawk'
- The GNU implementation of `awk'.
-
-`awk' language
- The language in which `awk' programs are written.
-
-`awk' program
- An `awk' program consists of a series of "patterns" and
- "actions", collectively known as "rules". For each input record
- given to the program, the program's rules are all processed in
- turn. `awk' programs may also contain function definitions.
-
-`awk' script
- Another name for an `awk' program.
-
-Input record
- A single chunk of data read in by `awk'. Usually, an `awk'
- input record consists of one line of text.
-
-Keyword
- In the `awk' language, a keyword is a word that has special
- meaning. Keywords are reserved and may not be used as variable
- names.
-
- The keywords are: `if', `else', `while', `do...while', `for',
- `for...in', `break', `continue', `delete', `next', `function',
- `func', and `exit'.
-
-Lvalue
- An expression that can appear on the left side of an assignment
- operator. In most languages, lvalues can be variables or array
- elements. In `awk', a field designator can also be used as an
- lvalue.
-
-Number
- A numeric valued data object. The `gawk' implementation uses
- double precision floating point to represent numbers.
-
-Pattern
- Patterns tell `awk' which input records are interesting to which
- rules.
-
- A pattern is an arbitrary conditional expression against which
- input is tested. If the condition is satisfied, the pattern is
- said to "match" the input record. A typical pattern might
- compare the input record against a regular expression.
-
-Range (of input lines)
- A sequence of consecutive lines from the input file. A pattern
- can specify ranges of input lines for `awk' to process, or it
- can specify single lines.
-
-Recursion
- When a function calls itself, either directly or indirectly. If
- this isn't clear, refer to the entry for ``recursion''.
-
-Redirection
- Redirection means performing input from other than the standard
- input stream, or output to other than the standard output stream.
-
- You can redirect the output of the `print' and `printf'
- statements to a file or a system command, using the `>', `>>',
- and `|' operators. You can redirect input to the `getline'
- statement using the `<' and `|' operators.
-
-Regular Expression
- See ``regexp''.
-
-Regexp
- Short for "regular expression". A regexp is a pattern that
- denotes a set of strings, possibly an infinite set. For
- example, the regexp `R.*xp' matches any string starting with the
- letter `R' and ending with the letters `xp'. In `awk', regexps
- are used in patterns and in conditional expressions.
-
-Rule
- A segment of an `awk' program, that specifies how to process
- single input records. A rule consists of a "pattern" and an
- "action". `awk' reads an input record; then, for each rule, if
- the input record satisfies the rule's pattern, `awk' executes
- the rule's action. Otherwise, the rule does nothing for that
- input record.
-
-Special Variable
- The variables `ARGC', `ARGV', `ENVIRON', `FILENAME', `FNR',
- `FS', `NF', `NR', `OFMT', `OFS', `ORS', `RLENGTH', `RSTART',
- `RS', `SUBSEP', have special meaning to `awk'. Changing some of
- them affects `awk''s running environment.
-
-Stream Editor
- A program that reads records from an input stream and processes
- them one or more at a time. This is in contrast with batch
- programs, which may expect to read their input files in entirety
- before starting to do anything, and with interactive programs,
- which require input from the user.
-
-String
- A datum consisting of a sequence of characters, such as `I am a
- string'. Constant strings are written with double--quotes in
- the `awk' language, and may contain "escape sequences".
-
-Whitespace
- A sequence of blank or tab characters occurring inside an input
- record or a string.
-
-
-
-File: gawk-info, Node: Index, Prev: Glossary, Up: Top
-
-Index
-*****
-
-* Menu:
-
-* #!: Executable Scripts.
-* -f option: Long.
-* `$NF', last field in record: Fields.
-* `$' (field operator): Fields.
-* `>>': Redirection.
-* `>': Redirection.
-* `BEGIN', special pattern: BEGIN/END.
-* `END', special pattern: BEGIN/END.
-* `awk' language: This Manual.
-* `awk' program: This Manual.
-* `break' statement: Break.
-* `close' statement for input: Close Input.
-* `close' statement for output: Close Output.
-* `continue' statement: Continue.
-* `delete' statement: Delete.
-* `exit' statement: Exit.
-* `for (x in ...)': Scanning an Array.
-* `for' statement: For.
-* `if' statement: If.
-* `next' statement: Next.
-* `print $0': Very Simple.
-* `printf' statement, format of: Basic Printf.
-* `printf', format-control characters: Format-Control.
-* `printf', modifiers: Modifiers.
-* `print' statement: Print.
-* `return' statement: Return Statement.
-* `while' statement: While.
-* `|': Redirection.
-* `BBS-list' file: The Files.
-* `inventory-shipped' file: The Files.
-* Accessing fields: Fields.
-* Acronym: History.
-* Action, curly braces: Actions.
-* Action, curly braces: Getting Started.
-* Action, default: Very Simple.
-* Action, definition of: Getting Started.
-* Action, general: Actions.
-* Action, separating statements: Actions.
-* Applications of `awk': When.
-* Arguments in function call: Function Calls.
-* Arguments, Command Line: Command Line.
-* Arithmetic operators: Arithmetic Ops.
-* Array assignment: Assigning Elements.
-* Array reference: Reference to Elements.
-* Arrays: Array Intro.
-* Arrays, definition of: Array Intro.
-* Arrays, deleting an element: Delete.
-* Arrays, determining presence of elements: Reference to Elements.
-* Arrays, multi-dimensional subscripts: Multi-dimensional.
-* Arrays, special `for' statement: Scanning an Array.
-* Assignment operators: Assignment Ops.
-* Associative arrays: Array Intro.
-* Backslash Continuation: Statements/Lines.
-* Basic function of `gawk': Getting Started.
-* Body of a loop: While.
-* Boolean expressions: Boolean Ops.
-* Boolean operators: Boolean Ops.
-* Boolean patterns: Boolean.
-* Built-in functions, list of: Built-in.
-* Built-in variables: Variables.
-* Calling a function: Function Calls.
-* Case sensitivity and gawk: Read Terminal.
-* Changing contents of a field: Changing Fields.
-* Changing the record separator: Records.
-* Closing files and pipes: Close Output.
-* Command Line: Command Line.
-* Command line formats: Running gawk.
-* Command line, setting `FS' on: Field Separators.
-* Comments: Comments.
-* Comparison expressions: Comparison Ops.
-* Comparison expressions as patterns: Comparison Patterns.
-* Compound statements: Actions.
-* Computed Regular Expressions: Regexp Usage.
-* Concatenation: Concatenation.
-* Conditional Patterns: Conditional Patterns.
-* Conditional expression: Conditional Exp.
-* Constants, types of: Constants.
-* Continuing statements on the next line: Statements/Lines.
-* Conversion of strings and numbers: Conversion.
-* Curly braces: Actions.
-* Curly braces: Getting Started.
-* Default action: Very Simple.
-* Default pattern: Very Simple.
-* Deleting elements of arrays: Delete.
-* Differences between `gawk' and `awk': Arithmetic Ops.
-* Differences between `gawk' and `awk': Constants.
-* Documenting `awk' programs: Comments.
-* Dynamic Regular Expressions: Regexp Usage.
-* Element assignment: Assigning Elements.
-* Element of array: Reference to Elements.
-* Emacs Lisp: When.
-* Empty pattern: Empty.
-* Escape sequence notation: Constants.
-* Examining fields: Fields.
-* Executable Scripts: Executable Scripts.
-* Expression, conditional: Conditional Exp.
-* Expressions: Actions.
-* Expressions, boolean: Boolean Ops.
-* Expressions, comparison: Comparison Ops.
-* Field separator, `FS': Field Separators.
-* Field separator, choice of: Field Separators.
-* Field separator, setting on command line: Field Separators.
-* Field, changing contents of: Changing Fields.
-* Fields: Fields.
-* Fields, negative-numbered: Non-Constant Fields.
-* Fields, semantics of: Field Separators.
-* Fields, separating: Field Separators.
-* Format specifier: Format-Control.
-* Format string: Basic Printf.
-* Formatted output: Printf.
-* Function call: Function Calls.
-* Function definitions: Actions.
-* Functions, user-defined: User-defined.
-* General input: Reading Files.
-* History of `awk': History.
-* How gawk works: Two Rules.
-* Increment operators: Increment Ops.
-* Input file, sample: The Files.
-* Input, `getline' function: Getline.
-* Input, general: Reading Files.
-* Input, multiple line records: Multiple.
-* Input, standard: Read Terminal.
-* Input, standard: Reading Files.
-* Interaction of `awk' with other programs: I/O Functions.
-* Invocation of `gawk': Command Line.
-* Language, `awk': This Manual.
-* Loop: While.
-* Loops, breaking out of: Break.
-* Lvalue: Assignment Ops.
-* Manual, using this: This Manual.
-* Metacharacters: Regexp Operators.
-* Mod function, semantics of: Arithmetic Ops.
-* Modifiers (in format specifiers): Modifiers.
-* Multiple line records: Multiple.
-* Multiple passes over data: Command Line.
-* Multiple statements on one line: Statements/Lines.
-* Negative-numbered fields: Non-Constant Fields.
-* Number of fields, `NF': Fields.
-* Number of records, `FNR': Records.
-* Number of records, `NR': Records.
-* Numerical constant: Constants.
-* Numerical value: Constants.
-* One-liners: One-liners.
-* Operator, Ternary: Conditional Patterns.
-* Operators, `$': Fields.
-* Operators, arithmetic: Arithmetic Ops.
-* Operators, assignment: Assignment Ops.
-* Operators, boolean: Boolean Ops.
-* Operators, increment: Increment Ops.
-* Operators, regular expression matching: Regexp Usage.
-* Operators, relational: Comparison Ops.
-* Operators, relational: Comparison Patterns.
-* Operators, string: Concatenation.
-* Operators, string-matching: Regexp Usage.
-* Options, Command Line: Command Line.
-* Output: Printing.
-* Output field separator, `OFS': Output Separators.
-* Output record separator, `ORS': Output Separators.
-* Output redirection: Redirection.
-* Output, formatted: Printf.
-* Output, piping: Redirection.
-* Passes, Multiple: Command Line.
-* Pattern, case sensitive: Read Terminal.
-* Pattern, comparison expressions: Comparison Patterns.
-* Pattern, default: Very Simple.
-* Pattern, definition of: Getting Started.
-* Pattern, empty: Empty.
-* Pattern, regular expressions: Regexp.
-* Patterns, `BEGIN': BEGIN/END.
-* Patterns, `END': BEGIN/END.
-* Patterns, Conditional: Conditional Patterns.
-* Patterns, boolean: Boolean.
-* Patterns, definition of: Patterns.
-* Patterns, types of: Patterns.
-* Pipes for output: Redirection.
-* Printing, general: Printing.
-* Program, `awk': This Manual.
-* Program, Self contained: Executable Scripts.
-* Program, definition of: Getting Started.
-* Programs, documenting: Comments.
-* Range pattern: Ranges.
-* Reading files, `getline' function: Getline.
-* Reading files, general: Reading Files.
-* Reading files, multiple line records: Multiple.
-* Record separator, `RS': Records.
-* Records, multiple line: Multiple.
-* Redirection of output: Redirection.
-* Reference to array: Reference to Elements.
-* Regexp: Regexp.
-* Regular Expressions, Computed: Regexp Usage.
-* Regular Expressions, Dynamic: Regexp Usage.
-* Regular expression matching operators: Regexp Usage.
-* Regular expression, metacharacters: Regexp Operators.
-* Regular expressions as patterns: Regexp.
-* Regular expressions, field separators and: Field Separators.
-* Relational operators: Comparison Patterns.
-* Relational operators: Comparison Ops.
-* Removing elements of arrays: Delete.
-* Rule, definition of: Getting Started.
-* Running gawk programs: Running gawk.
-* Sample input file: The Files.
-* Scanning an array: Scanning an Array.
-* Script, definition of: Getting Started.
-* Scripts, Executable: Executable Scripts.
-* Scripts, Shell: Executable Scripts.
-* Self contained Programs: Executable Scripts.
-* Separator character, choice of: Field Separators.
-* Shell Scripts: Executable Scripts.
-* Single quotes, why they are needed: One-shot.
-* Special variables, user modifiable: User-modified.
-* Standard input: Read Terminal.
-* Standard input: Reading Files.
-* Statements: Statements.
-* Statements: Actions.
-* String constants: Constants.
-* String operators: Concatenation.
-* String value: Constants.
-* String-matching operators: Regexp Usage.
-* Subscripts, multi-dimensional in arrays: Multi-dimensional.
-* Ternary Operator: Conditional Patterns.
-* Use of comments: Comments.
-* User-defined functions: User-defined.
-* User-defined variables: Variables.
-* Uses of `awk': Preface.
-* Using this manual: This Manual.
-* Variables, built-in: Variables.
-* Variables, user-defined: Variables.
-* What is `awk': Preface.
-* When to use `awk': When.
-* file, `awk' program: Long.
-* patterns, range: Ranges.
-* program file: Long.
-* regexp search operators: Regexp Usage.
-* running long programs: Long.
-
-
- 
-Tag Table:
-Node: Top918
-Node: Preface2804
-Node: History4267
-Node: License5644
-Node: This Manual18989
-Node: The Files20330
-Node: Getting Started22914
-Node: Very Simple24249
-Node: Two Rules26030
-Node: More Complex28066
-Node: Running gawk30908
-Node: One-shot31827
-Node: Read Terminal32945
-Node: Long33862
-Node: Executable Scripts34991
-Node: Command Line36534
-Node: Comments40168
-Node: Statements/Lines41067
-Node: When43498
-Node: Reading Files45420
-Node: Records47119
-Node: Fields49902
-Node: Non-Constant Fields52789
-Node: Changing Fields54591
-Node: Field Separators57302
-Node: Multiple62004
-Node: Assignment Options64393
-Node: Getline65608
-Node: Close Input74958
-Node: Printing76023
-Node: Print76748
-Node: Print Examples78712
-Node: Output Separators80751
-Node: Redirection82417
-Node: Close Output85886
-Node: Printf88132
-Node: Basic Printf88908
-Node: Format-Control90261
-Node: Modifiers91806
-Node: Printf Examples93108
-Node: One-liners95707
-Node: Patterns97642
-Node: Empty100130
-Node: Regexp100402
-Node: Regexp Usage101173
-Node: Regexp Operators102947
-Node: Comparison Patterns107890
-Node: Ranges109336
-Node: BEGIN/END110722
-Node: Boolean113151
-Node: Conditional Patterns115605
-Node: Actions116105
-Node: Expressions117435
-Node: Constants119124
-Node: Variables121097
-Node: Arithmetic Ops122454
-Node: Concatenation123840
-Node: Comparison Ops124569
-Node: Boolean Ops125973
-Node: Assignment Ops128266
-Node: Increment Ops131817
-Node: Conversion134112
-Node: Conditional Exp136066
-Node: Function Calls137384
-Node: Statements139939
-Node: If141253
-Node: While142627
-Node: Do144232
-Node: For145265
-Node: Break148306
-Node: Continue149848
-Node: Next151476
-Node: Exit152985
-Node: Arrays154514
-Node: Array Intro155624
-Node: Reference to Elements159227
-Node: Assigning Elements161115
-Node: Array Example161615
-Node: Scanning an Array163336
-Node: Delete165642
-Node: Multi-dimensional166529
-Node: Multi-scanning169746
-Node: Built-in171303
-Node: Numeric Functions172806
-Node: String Functions176601
-Node: I/O Functions183717
-Node: User-defined185189
-Node: Definition Syntax185834
-Node: Function Example187928
-Node: Function Caveats189034
-Node: Return Statement191386
-Node: Special193612
-Node: User-modified194478
-Node: Auto-set196511
-Node: Sample Program200558
-Node: Notes204316
-Node: Extensions204909
-Node: Future Extensions206490
-Node: Improvements207922
-Node: Manual Improvements210034
-Node: Glossary210928
-Node: Index217934
diff --git a/gawk.1 b/gawk.1
deleted file mode 100644
index 3d2068b8..00000000
--- a/gawk.1
+++ /dev/null
@@ -1,1344 +0,0 @@
-.TH GAWK 1 "Free Software Foundation"
-.SH NAME
-gawk \- pattern scanning and processing language
-.SH SYNOPSIS
-.B gawk
-.ig
-[
-.B \-d
-] [
-.B \-D
-] [
-.B \-v
-] [
-.B \-V
-]
-..
-[
-.BI \-F\^ fs
-]
-.B \-f
-.I program-file
-[
-.B \-f
-.I program-file
-\&.\^.\^. ] [
-.B \-\^\-
-] file .\^.\^.
-.br
-.B gawk
-.ig
-[
-.B \-d
-] [
-.B \-D
-] [
-.B \-v
-] [
-.B \-V
-]
-..
-[
-.BI \-F\^ fs
-] [
-.B \-\^\-
-]
-.I program-text
-file .\^.\^.
-.SH DESCRIPTION
-.I Gawk
-is the GNU Project's implementation of the AWK programming language.
-It conforms to the definition and description of the language in
-.IR "The AWK Programming Language" ,
-by Aho, Kernighan, and Weinberger,
-with the additional features defined in the System V Release 4 version
-of \s-1UNIX\s+1
-.IR awk .
-.PP
-The command line consists of options to
-.I gawk
-itself, the AWK program text (if not supplied via the
-.B \-f
-option), and values to be made
-available in the
-.B ARGC
-and
-.B ARGV
-pre-defined AWK variables.
-.PP
-The options that
-.I gawk
-accepts are:
-.TP
-.BI \-F fs
-Use
-.I fs
-for the input field separator (the value of the
-.B FS
-predefined
-variable).
-.TP
-.BI \-f " program-file"
-Read the AWK program source from the file
-.IR program-file ,
-instead of from the first command line argument.
-.TP
-.B \-\^\-
-Signal the end of options. This is useful to allow further arguments to the
-AWK program itself to start with a ``\-''.
-This is mainly for consistency with the argument parsing convention used
-by most other System V programs.
-.PP
-Any other options are flagged as illegal, but are otherwise ignored.
-(However, see the
-.B "GNU EXTENSIONS"
-section, below.)
-.PP
-An AWK program consists of a sequence of pattern-action statements
-and optional function definitions.
-.RS
-.PP
-\fIpattern\fB { \fIaction statements\fB }\fR
-.br
-\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
-.RE
-.PP
-.I Gawk
-first reads the program source from the
-.IR program-file (s)
-if specified, or from the first non-option argument on the command line.
-The
-.B \-f
-option may be used multiple times on the command line.
-.I Gawk
-will read the program text as if all the
-.IR program-file s
-had been concatenated together. This is useful for building libraries
-of AWK functions, without having to include them in each new AWK
-program that uses them. To use a library function in a file from a
-program typed in on the command line, specify
-.B /dev/tty
-as one of the
-.IR program-file s,
-type your program, and end it with a
-.B ^D
-(control-d).
-.PP
-The environment variable
-.B AWKPATH
-specifies a search path to use when finding source files named with
-the
-.B \-f
-option. If this variable does not exist, the default path is
-\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR.
-If a file name given to the
-.B \-f
-option contains a ``/'' character, no path search is performed.
-.PP
-.I Gawk
-compiles the program into an internal form,
-and then proceeds to read
-each file named in the
-.B ARGV
-array.
-If there are no files named on the command line,
-.I gawk
-reads the standard input.
-.PP
-If a ``file'' named on the command line has the form
-.IB var = val
-it is treated as a variable assignment. The variable
-.I var
-will be assigned the value
-.IR val .
-This is most useful for dynamically assigning values to the variables
-AWK uses to control how input is broken into fields and records. It
-is also useful for controlling state if multiple passes are needed over
-a single data file.
-.PP
-For each line in the input,
-.I gawk
-tests to see if it matches any
-.I pattern
-in the AWK program.
-For each pattern that the line matches, the associated
-.I action
-is executed.
-.SH VARIABLES AND FIELDS
-AWK variables are dynamic; they come into existence when they are
-first used. Their values are either floating-point numbers or strings,
-depending upon how they are used. AWK also has single dimension
-arrays; multiply dimensioned arrays may be simulated.
-There are several pre-defined variables that AWK sets as a program
-runs; these will be described as needed and summarized below.
-.PP
-As each input line is read,
-.I gawk
-splits the line into
-.IR fields ,
-using the value of the
-.B FS
-variable as the field separator.
-If
-.B FS
-is a single character, fields are separated by that character.
-Otherwise,
-.B FS
-is expected to be a full regular expression.
-In the special case that
-.B FS
-is a single blank, fields are separated
-by runs of blanks and/or tabs.
-Note that the value of
-.B IGNORECASE
-(see below) will also affect how fields are split when
-.B FS
-is a regular expression.
-.PP
-Each field in the input line may be referenced by its position,
-.BR $1 ,
-.BR $2 ,
-and so on.
-.B $0
-is the whole line. The value of a field may be assigned to as well.
-Fields need not be referenced by constants:
-.RS
-.PP
-.ft B
-n = 5
-.br
-print $n
-.ft R
-.RE
-.PP
-prints the fifth field in the input line.
-The variable
-.B NF
-is set to the total number of fields in the input line.
-.PP
-References to non-existent fields (i.e. fields after
-.BR $NF ),
-produce the null-string. However, assigning to a non-existent field
-(e.g.,
-.BR "$(NF+2) = 5" )
-will increase the value of
-.BR NF ,
-create any intervening fields with the null string as their value, and
-cause the value of
-.B $0
-to be recomputed, with the fields being separated by the value of
-.BR OFS .
-.SS Built-in Variables
-.PP
-AWK's built-in variables are:
-.PP
-.RS
-.TP \l'\fBIGNORECASE\fR'
-.B ARGC
-the number of command line arguments (does not include options to
-.IR gawk ,
-or the program source).
-.TP \l'\fBIGNORECASE\fR'
-.B ARGV
-array of command line arguments. The array is indexed from
-0 to
-.B ARGC
-\- 1.
-Dynamically changing the contents of
-.B ARGV
-can control the files used for data.
-.TP \l'\fBIGNORECASE\fR'
-.B ENVIRON
-An array containing the values of the current environment.
-The array is indexed by the environment variables, each element being
-the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
-.BR /u/arnold ).
-Changing this array does not affect the environment seen by programs which
-.I gawk
-spawns via redirection or the
-.B system
-function.
-.TP \l'\fBIGNORECASE\fR'
-.B FILENAME
-the name of the current input file.
-If no files are specified on the command line, the value of
-.B FILENAME
-is ``\-''.
-.TP \l'\fBIGNORECASE\fR'
-.B FNR
-the input record number in the current input file.
-.TP \l'\fBIGNORECASE\fR'
-.B FS
-the input field separator, a blank by default.
-.TP \l'\fBIGNORECASE\fR'
-.B IGNORECASE
-Controls the case-sensitivity of all regular expression operations. If
-.B IGNORECASE
-has a non-zero value, then pattern matching in rules,
-field splitting with
-.BR FS ,
-regular expression
-matching with
-.B ~
-and
-.BR !~ ,
-and the
-.BR gsub() ,
-.BR match() ,
-.BR split() ,
-and
-.B sub()
-pre-defined functions will all ignore case when doing regular expression
-operations. Thus, if
-.B IGNORECASE
-is not equal to zero,
-.B /aB/
-matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP,
-and \fB"AB"\fP.
-As with all AWK variables, the initial value of
-.B IGNORECASE
-is zero, so all regular expression operations are normally case-sensitive.
-.TP \l'\fBIGNORECASE\fR'
-.B NF
-the number of fields in the current input record.
-.TP \l'\fBIGNORECASE\fR'
-.B NR
-the total number of input records seen so far.
-.TP \l'\fBIGNORECASE\fR'
-.B OFMT
-the output format for numbers,
-.B %.6g
-by default.
-.TP \l'\fBIGNORECASE\fR'
-.B OFS
-the output field separator, a blank by default.
-.TP \l'\fBIGNORECASE\fR'
-.B ORS
-the output record separator, by default a newline.
-.TP \l'\fBIGNORECASE\fR'
-.B RS
-the input record separator, by default a newline.
-.B RS
-is exceptional in that only the first character of its string
-value is used for separating records. If
-.B RS
-is set to the null string, then records are separated by
-blank lines.
-When
-.B RS
-is set to the null string, then the newline character always acts as
-a field separator, in addition to whatever value
-.B FS
-may have.
-.TP \l'\fBIGNORECASE\fR'
-.B RSTART
-the index of the first character matched by
-.BR match() ;
-0 if no match.
-.TP \l'\fBIGNORECASE\fR'
-.B RLENGTH
-the length of the string matched by
-.BR match() ;
-\-1 if no match.
-.TP \l'\fBIGNORECASE\fR'
-.B SUBSEP
-the character used to separate multiple subscripts in array
-elements, by default \fB"\e034"\fR.
-.RE
-.SS Arrays
-.PP
-Arrays are subscripted with an expression between square brackets
-.RB ( [ " and " ] ).
-If the expression is an expression list
-.RI ( expr ", " expr " ...)"
-then the array subscript is a string consisting of the
-concatenation of the (string) value of each expression,
-separated by the value of the
-.B SUBSEP
-variable.
-This facility is used to simulate multiply dimensioned
-arrays. For example:
-.PP
-.RS
-.ft B
-i = "A" ;\^ j = "B" ;\^ k = "C"
-.br
-x[i,j,k] = "hello, world\en"
-.ft R
-.RE
-.PP
-assigns the string \fB"hello, world\en"\fR to the element of the array
-.B x
-which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK
-are associative, i.e. indexed by string values.
-.PP
-The special operator
-.B in
-may be used in an
-.B if
-or
-.B while
-statement to see if an array has an index consisting of a particular
-value.
-.PP
-.RS
-.ft B
-.nf
-if (val in array)
- print array[val]
-.fi
-.ft
-.RE
-.PP
-If the array has multiple subscripts, use
-.BR "(i, j) in array" .
-.PP
-The
-.B in
-construct may also be used in a
-.B for
-loop to iterate over all the elements of an array.
-.PP
-An element may be deleted from an array using the
-.B delete
-statement.
-.SS Variable Typing
-.PP
-Variables and fields
-may be (floating point) numbers, or strings, or both. How the
-value of a variable is interpreted depends upon its context. If used in
-a numeric expression, it will be treated as a number, if used as a string
-it will be treated as a string.
-.PP
-To force a variable to be treated as a number, add 0 to it; to force it
-to be treated as a string, concatenate it with the null string.
-.PP
-The AWK language defines comparisons as being done numerically if
-possible, otherwise one or both operands are converted to strings and
-a string comparison is performed.
-.PP
-Uninitialized variables have the numeric value 0 and the string value ""
-(the null, or empty, string).
-.SH PATTERNS AND ACTIONS
-AWK is a line oriented language. The pattern comes first, and then the
-action. Action statements are enclosed in
-.B {
-and
-.BR } .
-Either the pattern may be missing, or the action may be missing, but,
-of course, not both. If the pattern is missing, the action will be
-executed for every single line of input.
-A missing action is equivalent to
-.RS
-.PP
-.B "{ print }"
-.RE
-.PP
-which prints the entire line.
-.PP
-Comments begin with the ``#'' character, and continue until the
-end of the line.
-Blank lines may be used to separate statements.
-Normally, a statement ends with a newline, however, this is not the
-case for lines ending in
-a ``,'', ``{'', ``?'', ``:'', ``&&'', or ``||''.
-Lines ending in
-.B do
-or
-.B else
-also have their statements automatically continued on the following line.
-In other cases, a line can be continued by ending it with a ``\e'',
-in which case the newline will be ignored.
-.PP
-Multiple statements may
-be put on one line by separating them with a ``;''.
-This applies to both the statements within the action part of a
-pattern-action pair (the usual case),
-and to the pattern-action statements themselves.
-.SS Patterns
-AWK patterns may be one of the following:
-.PP
-.RS
-.nf
-.B BEGIN
-.B END
-.BI / "regular expression" /
-.I "relational expression"
-.IB pattern " && " pattern
-.IB pattern " || " pattern
-.IB pattern " ? " pattern " : " pattern
-.BI ( pattern )
-.BI ! " pattern"
-.IB pattern1 ", " pattern2"
-.fi
-.RE
-.PP
-.B BEGIN
-and
-.B END
-are two special kinds of patterns which are not tested against
-the input.
-The action parts of all
-.B BEGIN
-patterns are merged as if all the statements had
-been written in a single
-.B BEGIN
-block. They are executed before any
-of the input is read. Similarly, all the
-.B END
-blocks are merged,
-and executed when all the input is exhausted (or when an
-.B exit
-statement is executed).
-.B BEGIN
-and
-.B END
-patterns cannot be combined with other patterns in pattern expressions.
-.B BEGIN
-and
-.B END
-patterns cannot have missing action parts.
-.PP
-For
-.BI / "regular expression" /
-patterns, the associated statement is executed for each input line that matches
-the regular expression.
-Regular expressions are the same as those in
-.IR egrep (1),
-and are summarized below.
-.PP
-A
-.I "relational expression"
-may use any of the operators defined below in the section on actions.
-These generally test whether certain fields match certain regular expressions.
-.PP
-The
-.BR && ,
-.BR || ,
-and
-.B !
-operators are logical AND, logical OR, and logical NOT, respectively, as in C.
-They do short-circuit evaluation, also as in C, and are used for combining
-more primitive pattern expressions. As in most languages, parentheses
-may be used to change the order of evaluation.
-.PP
-The
-.B ?\^:
-operator is like the same operator in C. If the first pattern is true
-then the pattern used for testing is the second pattern, otherwise it is
-the third. Only one of the second and third patterns is evaluated.
-.PP
-The
-.IB pattern1 ", " pattern2"
-form of an expression is called a range pattern.
-It matches all input lines starting with a line that matches
-.IR pattern1 ,
-and continuing until a line that matches
-.IR pattern2 ,
-inclusive. It does not combine with any other sort of pattern expression.
-.SS Regular Expressions
-Regular expressions are the extended kind found in
-.IR egrep .
-They are composed of characters as follows:
-.RS
-.TP \l'[^abc...]'
-.I c
-matches the non-metacharacter
-.IR c .
-.TP \l'[^abc...]'
-.I \ec
-matches the literal character
-.IR c .
-.TP \l'[^abc...]'
-.B .
-matches any character except newline.
-.TP \l'[^abc...]'
-.B ^
-matches the beginning of a line or a string.
-.TP \l'[^abc...]'
-.B $
-matches the end of a line or a string.
-.TP \l'[^abc...]'
-.BI [ abc... ]
-character class, matches any of the characters
-.IR abc... .
-.TP \l'[^abc...]'
-.BI [^ abc... ]
-negated character class, matches any character except
-.I abc...
-and newline.
-.TP \l'[^abc...]'
-.IB r1 | r2
-alternation: matches either
-.I r1
-or
-.IR r2 .
-.TP \l'[^abc...]'
-.I r1r2
-concatenation: matches
-.IR r1 ,
-and then
-.IR r2 .
-.TP \l'[^abc...]'
-.IB r +
-matches one or more
-.IR r 's.
-.TP \l'[^abc...]'
-.IB r *
-matches zero or more
-.IR r 's.
-.TP \l'[^abc...]'
-.IB r ?
-matches zero or one
-.IR r 's.
-.TP \l'[^abc...]'
-.BI ( r )
-grouping: matches
-.IR r .
-.RE
-.SS Actions
-Action statements are enclosed in braces,
-.B {
-and
-.BR } .
-Action statements consist of the usual assignment, conditional, and looping
-statements found in most languages. The operators, control statements,
-and input/output statements
-available are patterned after those in C.
-.PP
-The operators in AWK, in order of increasing precedence, are
-.PP
-.RS
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "= += \-= *= /= %= ^="
-Assignment. Both absolute assignment
-.BI ( var " = " value )
-and operator-assignment (the other forms) are supported.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B ?:
-The C conditional expression. This has the form
-.IB expr1 " ? " expr2 " : " expr3\c
-\&. If
-.I expr1
-is true, the value of the expression is
-.IR expr2 ,
-otherwise it is
-.IR expr3 .
-Only one of
-.I expr2
-and
-.I expr3
-is evaluated.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B ||
-logical OR.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B &&
-logical AND.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "~ !~"
-regular expression match, negated match.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "< <= > >= != =="
-the regular relational operators.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.I blank
-string concatenation.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "+ \-"
-addition and subtraction.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "* / %"
-multiplication, division, and modulus.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "+ \- !"
-unary plus, unary minus, and logical negation.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B ^
-exponentiation (\fB**\fR may also be used, and \fB**=\fR for
-the assignment operator).
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "++ \-\^\-"
-increment and decrement, both prefix and postfix.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B $
-field reference.
-.RE
-.PP
-The control statements are
-as follows:
-.PP
-.RS
-.nf
-\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR]
-\fBwhile (\fIcondition\fB) \fIstatement \fR
-\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR
-\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR
-\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR
-\fBbreak\fR
-\fBcontinue\fR
-\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
-\fBexit\fR [ \fIexpression\fR ]
-\fB{ \fIstatements \fB}
-.fi
-.RE
-.PP
-The input/output statements are as follows:
-.PP
-.RS
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI close( filename )
-close file (or pipe, see below).
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.B getline
-set
-.B $0
-from next input record; set
-.BR NF ,
-.BR NR ,
-.BR FNR .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI "getline <" file
-set
-.B $0
-from next record of
-.IR file ;
-set
-.BR NF .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI getline " var"
-set
-.I var
-from next input record; set
-.BR NF ,
-.BR FNR .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI getline " var" " <" file
-set
-.I var
-from next record of
-.IR file .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.B next
-Stop processing the current input record. The next input record
-is read and processing starts over with the first pattern in the
-AWK program. If the end of the input data is reached, the
-.B END
-block(s), if any, are executed.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.B print
-prints the current record.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI print " expr-list"
-prints expressions.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI print " expr-list" " >" file
-prints expressions on
-.IR file .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI printf " fmt, expr-list"
-format and print.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI printf " fmt, expr-list" " >" file
-format and print on
-.IR file .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
-.BI system( cmd-line )
-execute the command
-.IR cmd-line ,
-and return the exit status.
-(This may not be available on
-systems besides \s-1UNIX\s+1 and \s-1GNU\s+1.)
-.RE
-.PP
-Other input/output redirections are also allowed. For
-.B print
-and
-.BR printf ,
-.BI >> file
-appends output to the
-.IR file ,
-while
-.BI | " command"
-writes on a pipe.
-In a similar fashion,
-.IB command " | getline"
-pipes into
-.BR getline .
-.BR Getline
-will return 0 on end of file, and \-1 on an error.
-.PP
-The AWK versions of the
-.B printf
-and
-.B sprintf
-(see below)
-functions accept the following conversion specification formats:
-.RS
-.TP
-.B %c
-An ASCII character.
-If the argument used for
-.B %c
-is numeric, it is treated as a character and printed.
-Otherwise, the argument is assumed to be a string, and the only first
-character of that string is printed.
-.TP
-.B %d
-A decimal number (the integer part).
-.TP
-.B %e
-A floating point number of the form
-.BR [\-]d.ddddddE[+\^\-]dd .
-.TP
-.B %f
-A floating point number of the form
-.BR [\-]ddd.dddddd .
-.TP
-.B %g
-Use
-.B e
-or
-.B f
-conversion, whichever is shorter, with nonsignificant zeros suppressed.
-.TP
-.B %o
-An unsigned octal number (again, an integer).
-.TP
-.B %s
-A character string.
-.TP
-.B %x
-An unsigned hexadecimal number (an integer).
-.TP
-.B %%
-A single
-.B %
-character; no argument is converted.
-.RE
-.PP
-There are optional, additional parameters that may lie between the
-.B %
-and the control letter:
-.RS
-.TP
-.B \-
-The expression should be left-justified within its field.
-.TP
-.I width
-The field should be padded to this width. If the number has a leading
-zero, then the field will be padded with zeros.
-Otherwise it is padded with blanks.
-.TP
-.BI . prec
-A number indicating the maximum width of strings or digits to the right
-of the decimal point.
-.RE
-.PP
-The dynamic
-.I width
-and
-.I prec
-capabilities of the C library
-.B printf
-routines are not supported.
-However, they may be simulated by using
-the AWK concatenation operation to build up
-a format specification dynamically.
-.PP
-When doing I/O redirection from either
-.B print
-or
-.B printf
-into a file,
-or via
-.B getline
-from a file,
-.I gawk
-recognizes certain special filenames internally. These filenames
-allow access to open file descriptors inherited from
-.IR gawk 's
-parent process (usually the shell). The filenames are:
-.RS
-.TP
-.B /dev/stdin
-The standard input.
-.TP
-.B /dev/stdout
-The standard output.
-.TP
-.B /dev/stderr
-The standard error output.
-.TP
-.BI /dev/fd/\^ n
-The file denoted by the open file descriptor
-.IR n .
-.RE
-.PP
-These are particularly useful for error messages. For example:
-.PP
-.RS
-.ft B
-print "You blew it!" > "/dev/stderr"
-.ft R
-.RE
-.PP
-whereas you would otherwise have to use
-.PP
-.RS
-.ft B
-print "You blew it!" | "cat 1>&2"
-.ft R
-.RE
-.PP
-These file names may also be used on the command line to name data files.
-.PP
-AWK has the following pre-defined arithmetic functions:
-.PP
-.RS
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI atan2( y , " x" )
-returns the arctangent of
-.I y/x
-in radians.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI cos( expr )
-returns the cosine in radians.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI exp( expr )
-the exponential function.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI int( expr )
-truncates to integer.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI log( expr )
-the natural logarithm function.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.B rand()
-returns a random number between 0 and 1.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI sin( expr )
-returns the sine in radians.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI sqrt( expr )
-the square root function.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
-.BI srand( expr )
-use
-.I expr
-as a new seed for the random number generator. If no
-.I expr
-is provided, the time of day will be used.
-The return value is the previous seed for the random
-number generator.
-.RE
-.PP
-AWK has the following pre-defined string functions:
-.PP
-.RS
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-\fBgsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
-for each substring matching the regular expression
-.I r
-in the string
-.IR t ,
-substitute the string
-.IR s ,
-and return the number of substitutions.
-If
-.I t
-is not supplied, use
-.BR $0 .
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-.BI index( s , " t" )
-returns the index of the string
-.I t
-in the string
-.IR s ,
-or 0 if
-.I t
-is not present.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-.BI length( s )
-returns the length of the string
-.IR s .
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-.BI match( s , " r" )
-returns the position in
-.I s
-where the regular expression
-.I r
-occurs, or 0 if
-.I r
-is not present, and sets the values of
-.B RSTART
-and
-.BR RLENGTH .
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-\fBsplit(\fIs\fB, \fIa\fB, \fIr\fB)\fR
-splits the string
-.I s
-into the array
-.I a
-on the regular expression
-.IR r ,
-and returns the number of fields. If
-.I r
-is omitted,
-.B FS
-is used instead.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-.BI sprintf( fmt , " expr-list" )
-prints
-.I expr-list
-according to
-.IR fmt ,
-and returns the resulting string.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-\fBsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
-this is just like
-.BR gsub ,
-but only the first matching substring is replaced.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-\fBsubstr(\fIs\fB, \fIi\fB, \fIn\fB)\fR
-returns the
-.IR n -character
-substring of
-.I s
-starting at
-.IR i .
-If
-.I n
-is omitted, the rest of
-.I s
-is used.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-.BI tolower( str )
-returns a copy of the string
-.IR str ,
-with all the upper-case characters in
-.I str
-translated to their corresponding lower-case counterparts.
-Non-alphabetic characters are left unchanged.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
-.BI toupper( str )
-returns a copy of the string
-.IR str ,
-with all the lower-case characters in
-.I str
-translated to their corresponding upper-case counterparts.
-Non-alphabetic characters are left unchanged.
-.RE
-.PP
-String constants in AWK are sequences of characters enclosed
-between double quotes (\fB"\fR). Within strings, certain
-.I "escape sequences"
-are recognized, as in C. These are:
-.PP
-.RS
-.TP \l'\fB\e\fIddd\fR'
-.B \e\e
-A literal backslash.
-.TP \l'\fB\e\fIddd\fR'
-.B \ea
-The ``alert'' character; usually the ASCII BEL character.
-.TP \l'\fB\e\fIddd\fR'
-.B \eb
-backspace.
-.TP \l'\fB\e\fIddd\fR'
-.B \ef
-form-feed.
-.TP \l'\fB\e\fIddd\fR'
-.B \en
-new line.
-.TP \l'\fB\e\fIddd\fR'
-.B \er
-carriage return.
-.TP \l'\fB\e\fIddd\fR'
-.B \et
-horizontal tab.
-.TP \l'\fB\e\fIddd\fR'
-.B \ev
-vertical tab.
-.TP \l'\fB\e\fIddd\fR'
-.BI \ex "\^hex digits"
-The character represented by the string of hexadecimal digits following
-the
-.BR \ex .
-As in ANSI C, all following hexadecimal digits are considered part of
-the escape sequence.
-(This feature should tell us something about language design by committee.)
-E.g., "\ex1B" is the ASCII ESC (escape) character.
-.TP \l'\fB\e\fIddd\fR'
-.BI \e ddd
-The character represented by the 1-, 2-, or 3-digit sequence of octal
-digits. E.g. "\e033" is the ASCII ESC (escape) character.
-.TP \l'\fB\e\fIddd\fR'
-.BI \e c
-The literal character
-.IR c\^ .
-.RE
-.PP
-The escape sequences may also be used inside constant regular expressions
-(e.g.,
-.B "/[\ \et\ef\en\er\ev]/"
-matches whitespace characters).
-.SH FUNCTIONS
-Functions in AWK are defined as follows:
-.PP
-.RS
-\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR
-.RE
-.PP
-Functions are executed when called from within the action parts of regular
-pattern-action statements. Actual parameters supplied in the function
-call are used to instantiate the formal parameters declared in the function.
-Arrays are passed by reference, other variables are passed by value.
-.PP
-Since functions were not originally part of the AWK language, the provision
-for local variables is rather clumsy: they are declared as extra parameters
-in the parameter list. The convention is to separate local variables from
-real parameters by extra spaces in the parameter list. For example:
-.PP
-.RS
-.ft B
-.nf
-function f(p, q, a, b) { # a & b are local
- ..... }
-
-/abc/ { ... ; f(1, 2) ; ... }
-.fi
-.ft R
-.RE
-.PP
-The left parenthesis in a function call is required
-to immediately follow the function name,
-without any intervening white space.
-This is to avoid a syntactic ambiguity with the concatenation operator.
-This restriction does not apply to the built-in functions listed above.
-.PP
-Functions may call each other and may be recursive.
-Function parameters used as local variables are initialized
-to the null string and the number zero upon function invocation.
-.PP
-The word
-.B func
-may be used in place of
-.BR function .
-.SH EXAMPLES
-.nf
-Print and sort the login names of all users:
-
-.ft B
- BEGIN { FS = ":" }
- { print $1 | "sort" }
-
-.ft R
-Count lines in a file:
-
-.ft B
- { nlines++ }
- END { print nlines }
-
-.ft R
-Precede each line by its number in the file:
-
-.ft B
- { print FNR, $0 }
-
-.ft R
-Concatenate and line number (a variation on a theme):
-
-.ft B
- { print NR, $0 }
-.ft R
-.SH SEE ALSO
-.IR "The AWK Programming Language" ,
-Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
-Addison-Wesley, 1988. ISBN 0-201-07981-X.
-.SH SYSTEM V RELEASE 4 COMPATIBILITY
-A primary goal for
-.I gawk
-is compatibility with the latest version of \s-1UNIX\s+1
-.IR awk .
-To this end,
-.I gawk
-incorporates the following user visible
-features which are not described in the AWK book,
-but are part of
-.I awk
-in System V Release 4.
-.PP
-When processing arguments,
-.I gawk
-uses the special option ``\fB\-\^\-\fP'' to signal the end of
-arguments, and warns about, but otherwise ignores, undefined options.
-.PP
-The AWK book does not define the return value of
-.BR srand() .
-The System V Release 4 version of \s-1UNIX\s+1
-.I awk
-has it return the seed it was using, to allow keeping track
-of random number sequences. Therefore
-.B srand()
-in
-.I gawk
-also returns its current seed.
-.PP
-The use of multiple
-.B \-f
-options is a new feature, as is the
-.B ENVIRON
-array.
-.SH GNU EXTENSIONS
-.I Gawk
-has some extensions to System V
-.IR awk .
-They are described in this section. All the extensions described here
-can be disabled by compiling
-.I gawk
-with
-.BR \-DSTRICT ,
-or by invoking
-.I gawk
-with the name
-.IR awk .
-If the underlying operating system supports the
-.B /dev/fd
-directory and corresponding files, then
-.I gawk
-can be compiled with
-.B \-DNO_DEV_FD
-to disable the special filename processing.
-.PP
-The following features of
-.I gawk
-are not available in
-System V
-.IR awk .
-.RS
-.TP \l'\(bu'
-\(bu
-The
-.BR \ea ,
-.BR \ev ,
-or
-.B \ex
-escape sequences are not recognized.
-.TP \l'\(bu'
-\(bu
-The special file names available for I/O redirection are not recognized.
-.TP \l'\(bu'
-\(bu
-The
-.B tolower
-and
-.B toupper
-built-in string functions are not available.
-.TP \l'\(bu'
-\(bu
-The
-.B IGNORECASE
-variable and its side-effects are not available.
-.TP \l'\(bu'
-\(bu
-No path search is performed for files named via the
-.B \-f
-option. Therefore the
-.B AWKPATH
-environment variable is not special.
-.RE
-.PP
-The AWK book does not define the return value of the
-.B close
-function.
-.IR Gawk\^ 's
-.B close
-returns the value from
-.IR fclose (3),
-or
-.IR pclose (3),
-when closing a file or pipe, respectively.
-.PP
-When
-.I gawk
-is invoked as
-.IR awk ,
-if the
-.I fs
-argument to the
-.B \-F
-option is ``t'', then
-.B FS
-will be set to the tab character.
-Since this is a rather ugly special case, it is not the default behavior.
-.PP
-The rest of the features described in this section may change at some time in
-the future, or may go away entirely.
-You should not write programs that depend upon them.
-.PP
-.I Gawk
-accepts the following additional options:
-.ig
-.TP
-.B \-D
-Turn on general debugging and turn on
-.IR yacc (1)
-or
-.IR bison (1)
-debugging output during program parsing.
-This option should only be of interest to the
-.I gawk
-maintainers, and may not even be compiled into
-.IR gawk .
-.TP
-.B \-d
-Turn on general debugging and print the
-.I gawk
-internal tree as the program is executed.
-This option should only be of interest to the
-.I gawk
-maintainers, and may not even be compiled into
-.IR gawk .
-..
-.TP
-.B \-v
-Print version information for this particular copy of
-.I gawk
-on the error output.
-This is useful mainly for knowing if the current copy of
-.I gawk
-on your system
-is up to date with respect to whatever the Free Software Foundation
-is distributing.
-.TP
-.B \-V
-Print the GNU copyright information message on the error output.
-.SH BUGS
-The
-.B \-F
-option is not necessary given the command line variable assignment feature;
-it remains only for backwards compatibility.
-.SH AUTHORS
-The original version of \s-1UNIX\s+1
-.I awk
-was designed and implemented by Alfred Aho,
-Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan
-continues to maintain and enhance it.
-.PP
-Paul Rubin and Jay Fenlason,
-of the Free Software Foundation, wrote
-.IR gawk ,
-to be compatible with the original version of
-.I awk
-distributed in Seventh Edition \s-1UNIX\s+1.
-John Woods contributed a number of bug fixes.
-David Trueman of Dalhousie University, with contributions
-from Arnold Robbins at Emory University, made
-.I gawk
-compatible with the new version of \s-1UNIX\s+1
-.IR awk .
-.SH ACKNOWLEDGEMENTS
-Brian Kernighan of Bell Labs
-provided valuable assistance during testing and debugging.
-We thank him.
diff --git a/gawk.aux b/gawk.aux
deleted file mode 100644
index 9c137c3b..00000000
--- a/gawk.aux
+++ /dev/null
@@ -1,202 +0,0 @@
-'xrdef {Preface-pg}{1}
-'xrdef {Preface-snt}{}
-'xrdef {History-pg}{1}
-'xrdef {History-snt}{}
-'xrdef {License-pg}{3}
-'xrdef {License-snt}{}
-'xrdef {This Manual-pg}{9}
-'xrdef {This Manual-snt}{chapter'tie1}
-'xrdef {The Files-pg}{9}
-'xrdef {The Files-snt}{section'tie1.1}
-'xrdef {Getting Started-pg}{11}
-'xrdef {Getting Started-snt}{chapter'tie2}
-'xrdef {Very Simple-pg}{11}
-'xrdef {Very Simple-snt}{section'tie2.1}
-'xrdef {Two Rules-pg}{12}
-'xrdef {Two Rules-snt}{section'tie2.2}
-'xrdef {More Complex-pg}{13}
-'xrdef {More Complex-snt}{section'tie2.3}
-'xrdef {Running gawk-pg}{14}
-'xrdef {Running gawk-snt}{section'tie2.4}
-'xrdef {One-shot-pg}{15}
-'xrdef {One-shot-snt}{section'tie2.4.1}
-'xrdef {Read Terminal-pg}{15}
-'xrdef {Read Terminal-snt}{section'tie2.4.2}
-'xrdef {Long-pg}{16}
-'xrdef {Long-snt}{section'tie2.4.3}
-'xrdef {Executable Scripts-pg}{17}
-'xrdef {Executable Scripts-snt}{section'tie2.4.4}
-'xrdef {Command Line-pg}{18}
-'xrdef {Command Line-snt}{section'tie2.4.5}
-'xrdef {Comments-pg}{19}
-'xrdef {Comments-snt}{section'tie2.5}
-'xrdef {Statements/Lines-pg}{20}
-'xrdef {Statements/Lines-snt}{section'tie2.6}
-'xrdef {When-pg}{21}
-'xrdef {When-snt}{section'tie2.7}
-'xrdef {Reading Files-pg}{23}
-'xrdef {Reading Files-snt}{chapter'tie3}
-'xrdef {Records-pg}{23}
-'xrdef {Records-snt}{section'tie3.1}
-'xrdef {Fields-pg}{24}
-'xrdef {Fields-snt}{section'tie3.2}
-'xrdef {Non-Constant Fields-pg}{26}
-'xrdef {Non-Constant Fields-snt}{section'tie3.3}
-'xrdef {Changing Fields-pg}{27}
-'xrdef {Changing Fields-snt}{section'tie3.4}
-'xrdef {Field Separators-pg}{28}
-'xrdef {Field Separators-snt}{section'tie3.5}
-'xrdef {Multiple-pg}{31}
-'xrdef {Multiple-snt}{section'tie3.6}
-'xrdef {Assignment Options-pg}{32}
-'xrdef {Assignment Options-snt}{section'tie3.7}
-'xrdef {Getline-pg}{32}
-'xrdef {Getline-snt}{section'tie3.8}
-'xrdef {Close Input-pg}{36}
-'xrdef {Close Input-snt}{section'tie3.8.1}
-'xrdef {Printing-pg}{39}
-'xrdef {Printing-snt}{chapter'tie4}
-'xrdef {Print-pg}{39}
-'xrdef {Print-snt}{section'tie4.1}
-'xrdef {Print Examples-pg}{40}
-'xrdef {Print Examples-snt}{section'tie4.2}
-'xrdef {Output Separators-pg}{41}
-'xrdef {Output Separators-snt}{section'tie4.3}
-'xrdef {Redirection-pg}{42}
-'xrdef {Redirection-snt}{section'tie4.4}
-'xrdef {Close Output-pg}{43}
-'xrdef {Close Output-snt}{section'tie4.4.1}
-'xrdef {Printf-pg}{44}
-'xrdef {Printf-snt}{section'tie4.5}
-'xrdef {Basic Printf-pg}{45}
-'xrdef {Basic Printf-snt}{section'tie4.5.1}
-'xrdef {Format-Control-pg}{45}
-'xrdef {Format-Control-snt}{section'tie4.5.2}
-'xrdef {Modifiers-pg}{46}
-'xrdef {Modifiers-snt}{section'tie4.5.3}
-'xrdef {Printf Examples-pg}{46}
-'xrdef {Printf Examples-snt}{section'tie4.5.4}
-'xrdef {One-liners-pg}{49}
-'xrdef {One-liners-snt}{chapter'tie5}
-'xrdef {Patterns-pg}{51}
-'xrdef {Patterns-snt}{chapter'tie6}
-'xrdef {Empty-pg}{51}
-'xrdef {Empty-snt}{section'tie6.1}
-'xrdef {Regexp-pg}{52}
-'xrdef {Regexp-snt}{section'tie6.2}
-'xrdef {Regexp Usage-pg}{52}
-'xrdef {Regexp Usage-snt}{section'tie6.2.1}
-'xrdef {Regexp Operators-pg}{53}
-'xrdef {Regexp Operators-snt}{section'tie6.2.2}
-'xrdef {Comparison Patterns-pg}{55}
-'xrdef {Comparison Patterns-snt}{section'tie6.3}
-'xrdef {Ranges-pg}{56}
-'xrdef {Ranges-snt}{section'tie6.4}
-'xrdef {BEGIN/END-pg}{57}
-'xrdef {BEGIN/END-snt}{section'tie6.5}
-'xrdef {Boolean-pg}{58}
-'xrdef {Boolean-snt}{section'tie6.6}
-'xrdef {Conditional Patterns-pg}{59}
-'xrdef {Conditional Patterns-snt}{section'tie6.7}
-'xrdef {Actions-pg}{61}
-'xrdef {Actions-snt}{chapter'tie7}
-'xrdef {Expressions-pg}{63}
-'xrdef {Expressions-snt}{chapter'tie8}
-'xrdef {Constants-pg}{63}
-'xrdef {Constants-snt}{section'tie8.1}
-'xrdef {Variables-pg}{64}
-'xrdef {Variables-snt}{section'tie8.2}
-'xrdef {Arithmetic Ops-pg}{65}
-'xrdef {Arithmetic Ops-snt}{section'tie8.3}
-'xrdef {Concatenation-pg}{65}
-'xrdef {Concatenation-snt}{section'tie8.4}
-'xrdef {Comparison Ops-pg}{66}
-'xrdef {Comparison Ops-snt}{section'tie8.5}
-'xrdef {Boolean Ops-pg}{67}
-'xrdef {Boolean Ops-snt}{section'tie8.6}
-'xrdef {Assignment Ops-pg}{68}
-'xrdef {Assignment Ops-snt}{section'tie8.7}
-'xrdef {Increment Ops-pg}{70}
-'xrdef {Increment Ops-snt}{section'tie8.8}
-'xrdef {Conversion-pg}{71}
-'xrdef {Conversion-snt}{section'tie8.9}
-'xrdef {Conditional Exp-pg}{72}
-'xrdef {Conditional Exp-snt}{section'tie8.10}
-'xrdef {Function Calls-pg}{73}
-'xrdef {Function Calls-snt}{section'tie8.11}
-'xrdef {Statements-pg}{75}
-'xrdef {Statements-snt}{chapter'tie9}
-'xrdef {If-pg}{75}
-'xrdef {If-snt}{section'tie9.1}
-'xrdef {While-pg}{76}
-'xrdef {While-snt}{section'tie9.2}
-'xrdef {Do-pg}{77}
-'xrdef {Do-snt}{section'tie9.3}
-'xrdef {For-pg}{77}
-'xrdef {For-snt}{section'tie9.4}
-'xrdef {Break-pg}{79}
-'xrdef {Break-snt}{section'tie9.5}
-'xrdef {Continue-pg}{80}
-'xrdef {Continue-snt}{section'tie9.6}
-'xrdef {Next-pg}{81}
-'xrdef {Next-snt}{section'tie9.7}
-'xrdef {Exit-pg}{82}
-'xrdef {Exit-snt}{section'tie9.8}
-'xrdef {Arrays-pg}{83}
-'xrdef {Arrays-snt}{chapter'tie10}
-'xrdef {Array Intro-pg}{83}
-'xrdef {Array Intro-snt}{section'tie10.1}
-'xrdef {Reference to Elements-pg}{85}
-'xrdef {Reference to Elements-snt}{section'tie10.2}
-'xrdef {Assigning Elements-pg}{86}
-'xrdef {Assigning Elements-snt}{section'tie10.3}
-'xrdef {Array Example-pg}{86}
-'xrdef {Array Example-snt}{section'tie10.4}
-'xrdef {Scanning an Array-pg}{87}
-'xrdef {Scanning an Array-snt}{section'tie10.5}
-'xrdef {Delete-pg}{88}
-'xrdef {Delete-snt}{section'tie10.6}
-'xrdef {Multi-dimensional-pg}{89}
-'xrdef {Multi-dimensional-snt}{section'tie10.7}
-'xrdef {Multi-scanning-pg}{91}
-'xrdef {Multi-scanning-snt}{section'tie10.8}
-'xrdef {Built-in-pg}{93}
-'xrdef {Built-in-snt}{chapter'tie11}
-'xrdef {Numeric Functions-pg}{93}
-'xrdef {Numeric Functions-snt}{section'tie11.1}
-'xrdef {String Functions-pg}{95}
-'xrdef {String Functions-snt}{section'tie11.2}
-'xrdef {I/O Functions-pg}{98}
-'xrdef {I/O Functions-snt}{section'tie11.3}
-'xrdef {User-defined-pg}{99}
-'xrdef {User-defined-snt}{chapter'tie12}
-'xrdef {Definition Syntax-pg}{99}
-'xrdef {Definition Syntax-snt}{section'tie12.1}
-'xrdef {Function Example-pg}{100}
-'xrdef {Function Example-snt}{section'tie12.2}
-'xrdef {Function Caveats-pg}{101}
-'xrdef {Function Caveats-snt}{section'tie12.3}
-'xrdef {Return Statement-pg}{102}
-'xrdef {Return Statement-snt}{section'tie12.4}
-'xrdef {Special-pg}{105}
-'xrdef {Special-snt}{chapter'tie13}
-'xrdef {User-modified-pg}{105}
-'xrdef {User-modified-snt}{section'tie13.1}
-'xrdef {Auto-set-pg}{106}
-'xrdef {Auto-set-snt}{section'tie13.2}
-'xrdef {Sample Program-pg}{109}
-'xrdef {Sample Program-snt}{}
-'xrdef {Notes-pg}{111}
-'xrdef {Notes-snt}{}
-'xrdef {Extensions-pg}{111}
-'xrdef {Extensions-snt}{}
-'xrdef {Future Extensions-pg}{111}
-'xrdef {Future Extensions-snt}{}
-'xrdef {Improvements-pg}{112}
-'xrdef {Improvements-snt}{}
-'xrdef {Manual Improvements-pg}{113}
-'xrdef {Manual Improvements-snt}{}
-'xrdef {Glossary-pg}{115}
-'xrdef {Glossary-snt}{}
-'xrdef {Index-pg}{119}
-'xrdef {Index-snt}{}
diff --git a/gawk.cp b/gawk.cp
deleted file mode 100644
index 7ff13135..00000000
--- a/gawk.cp
+++ /dev/null
@@ -1,234 +0,0 @@
-\entry {What is awk}{1}{What is {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Uses of awk}{1}{Uses of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Acronym}{1}{Acronym}
-\entry {History of awk}{1}{History of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Manual, using this}{9}{Manual, using this}
-\entry {Using this manual}{9}{Using this manual}
-\entry {Language, awk}{9}{Language, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Program, awk}{9}{Program, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {awk language}{9}{{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} language}
-\entry {awk program}{9}{{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program}
-\entry {Input file, sample}{9}{Input file, sample}
-\entry {Sample input file}{9}{Sample input file}
-\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing BBS-list}'\hbox {} file}{9}{`{\fam \ttfam \tentt \rawbackslash \frenchspacing BBS-list}'\hbox {} file}
-\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing inventory-shipped}'\hbox {} file}{10}{`{\fam \ttfam \tentt \rawbackslash \frenchspacing inventory-shipped}'\hbox {} file}
-\entry {Script, definition of}{11}{Script, definition of}
-\entry {Rule, definition of}{11}{Rule, definition of}
-\entry {Pattern, definition of}{11}{Pattern, definition of}
-\entry {Action, definition of}{11}{Action, definition of}
-\entry {Program, definition of}{11}{Program, definition of}
-\entry {Basic function of gawk}{11}{Basic function of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}
-\entry {Action, curly braces}{11}{Action, curly braces}
-\entry {Curly braces}{11}{Curly braces}
-\entry {print $0}{11}{{\fam \ttfam \tentt \rawbackslash \frenchspacing print $0}\hbox {}}
-\entry {Action, default}{11}{Action, default}
-\entry {Pattern, default}{11}{Pattern, default}
-\entry {Default action}{11}{Default action}
-\entry {Default pattern}{11}{Default pattern}
-\entry {How gawk works}{12}{How gawk works}
-\entry {Command line formats}{14}{Command line formats}
-\entry {Running gawk programs}{14}{Running gawk programs}
-\entry {Single quotes, why they are needed}{15}{Single quotes, why they are needed}
-\entry {Standard input}{15}{Standard input}
-\entry {Input, standard}{15}{Input, standard}
-\entry {Case sensitivity and gawk}{16}{Case sensitivity and gawk}
-\entry {Pattern, case sensitive}{16}{Pattern, case sensitive}
-\entry {running long programs}{16}{running long programs}
-\entry {-f option}{16}{-f option}
-\entry {program file}{16}{program file}
-\entry {file, awk program}{16}{file, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program}
-\entry {Executable Scripts}{17}{Executable Scripts}
-\entry {Scripts, Executable}{17}{Scripts, Executable}
-\entry {Self contained Programs}{17}{Self contained Programs}
-\entry {Program, Self contained}{17}{Program, Self contained}
-\entry {#!}{17}{#!}
-\entry {Shell Scripts}{17}{Shell Scripts}
-\entry {Scripts, Shell}{17}{Scripts, Shell}
-\entry {Command Line}{18}{Command Line}
-\entry {Invocation of gawk}{18}{Invocation of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}
-\entry {Arguments, Command Line}{18}{Arguments, Command Line}
-\entry {Options, Command Line}{18}{Options, Command Line}
-\entry {Multiple passes over data}{19}{Multiple passes over data}
-\entry {Passes, Multiple}{19}{Passes, Multiple}
-\entry {Comments}{19}{Comments}
-\entry {Use of comments}{19}{Use of comments}
-\entry {Documenting awk programs}{19}{Documenting {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} programs}
-\entry {Programs, documenting}{19}{Programs, documenting}
-\entry {Backslash Continuation}{20}{Backslash Continuation}
-\entry {Continuing statements on the next line}{20}{Continuing statements on the next line}
-\entry {Multiple statements on one line}{21}{Multiple statements on one line}
-\entry {When to use awk}{21}{When to use {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Applications of awk}{21}{Applications of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Emacs Lisp}{21}{Emacs Lisp}
-\entry {Reading files, general}{23}{Reading files, general}
-\entry {Input, general}{23}{Input, general}
-\entry {Standard input}{23}{Standard input}
-\entry {Input, standard}{23}{Input, standard}
-\entry {General input}{23}{General input}
-\entry {Record separator, RS}{23}{Record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}
-\entry {Changing the record separator}{23}{Changing the record separator}
-\entry {Number of records, NR}{24}{Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}}
-\entry {Number of records, FNR}{24}{Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}}
-\entry {Examining fields}{24}{Examining fields}
-\entry {Fields}{24}{Fields}
-\entry {Accessing fields}{24}{Accessing fields}
-\entry {$ (field operator)}{24}{{\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {} (field operator)}
-\entry {Operators, $}{24}{Operators, {\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {}}
-\entry {$NF, last field in record}{25}{{\fam \ttfam \tentt \rawbackslash \frenchspacing $NF}\hbox {}, last field in record}
-\entry {Number of fields, NF}{25}{Number of fields, {\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}}
-\entry {Fields, negative-numbered}{26}{Fields, negative-numbered}
-\entry {Negative-numbered fields}{26}{Negative-numbered fields}
-\entry {Field, changing contents of}{27}{Field, changing contents of}
-\entry {Changing contents of a field}{27}{Changing contents of a field}
-\entry {Fields, semantics of}{28}{Fields, semantics of}
-\entry {Fields, separating}{28}{Fields, separating}
-\entry {Field separator, FS}{28}{Field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}}
-\entry {Separator character, choice of}{29}{Separator character, choice of}
-\entry {Field separator, choice of}{29}{Field separator, choice of}
-\entry {Regular expressions, field separators and}{29}{Regular expressions, field separators and}
-\entry {Field separator, setting on command line}{29}{Field separator, setting on command line}
-\entry {Command line, setting FS on}{29}{Command line, setting {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {} on}
-\entry {Multiple line records}{31}{Multiple line records}
-\entry {Input, multiple line records}{31}{Input, multiple line records}
-\entry {Reading files, multiple line records}{31}{Reading files, multiple line records}
-\entry {Records, multiple line}{31}{Records, multiple line}
-\entry {Input, getline function}{32}{Input, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function}
-\entry {Reading files, getline function}{32}{Reading files, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function}
-\entry {close statement for input}{36}{{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for input}
-\entry {Printing, general}{39}{Printing, general}
-\entry {Output}{39}{Output}
-\entry {print statement}{39}{{\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} statement}
-\entry {Output field separator, OFS}{41}{Output field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}
-\entry {Output record separator, ORS}{41}{Output record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}
-\entry {Output redirection}{42}{Output redirection}
-\entry {Redirection of output}{42}{Redirection of output}
-\entry {{\fam \ttfam \tentt \gtr }}{42}{{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }}\hbox {}}
-\entry {{\fam \ttfam \tentt \gtr }{\fam \ttfam \tentt \gtr }}{42}{{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }{\fam \ttfam \tentt \gtr }}\hbox {}}
-\entry {{\fam \ttfam \tentt \char '174}}{42}{{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \char '174}}\hbox {}}
-\entry {Pipes for output}{42}{Pipes for output}
-\entry {Output, piping}{42}{Output, piping}
-\entry {close statement for output}{43}{{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for output}
-\entry {Closing files and pipes}{43}{Closing files and pipes}
-\entry {Formatted output}{44}{Formatted output}
-\entry {Output, formatted}{44}{Output, formatted}
-\entry {printf statement, format of}{45}{{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} statement, format of}
-\entry {Format string}{45}{Format string}
-\entry {printf, format-control characters}{45}{{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, format-control characters}
-\entry {Format specifier}{45}{Format specifier}
-\entry {printf, modifiers}{46}{{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, modifiers}
-\entry {Modifiers (in format specifiers)}{46}{Modifiers (in format specifiers)}
-\entry {One-liners}{49}{One-liners}
-\entry {Patterns, definition of}{51}{Patterns, definition of}
-\entry {Patterns, types of}{51}{Patterns, types of}
-\entry {Empty pattern}{51}{Empty pattern}
-\entry {Pattern, empty}{51}{Pattern, empty}
-\entry {Pattern, regular expressions}{52}{Pattern, regular expressions}
-\entry {Regexp}{52}{Regexp}
-\entry {Regular expressions as patterns}{52}{Regular expressions as patterns}
-\entry {Regular expression matching operators}{52}{Regular expression matching operators}
-\entry {String-matching operators}{52}{String-matching operators}
-\entry {Operators, string-matching}{52}{Operators, string-matching}
-\entry {Operators, regular expression matching}{52}{Operators, regular expression matching}
-\entry {regexp search operators}{52}{regexp search operators}
-\entry {Computed Regular Expressions}{52}{Computed Regular Expressions}
-\entry {Regular Expressions, Computed}{52}{Regular Expressions, Computed}
-\entry {Dynamic Regular Expressions}{52}{Dynamic Regular Expressions}
-\entry {Regular Expressions, Dynamic}{52}{Regular Expressions, Dynamic}
-\entry {Metacharacters}{53}{Metacharacters}
-\entry {Regular expression, metacharacters}{53}{Regular expression, metacharacters}
-\entry {Comparison expressions as patterns}{55}{Comparison expressions as patterns}
-\entry {Pattern, comparison expressions}{55}{Pattern, comparison expressions}
-\entry {Relational operators}{55}{Relational operators}
-\entry {Operators, relational}{55}{Operators, relational}
-\entry {Range pattern}{56}{Range pattern}
-\entry {patterns, range}{56}{patterns, range}
-\entry {BEGIN, special pattern}{57}{{\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}, special pattern}
-\entry {Patterns, BEGIN}{57}{Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}}
-\entry {END, special pattern}{57}{{\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}, special pattern}
-\entry {Patterns, END}{57}{Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}}
-\entry {Patterns, boolean}{58}{Patterns, boolean}
-\entry {Boolean patterns}{58}{Boolean patterns}
-\entry {Conditional Patterns}{59}{Conditional Patterns}
-\entry {Patterns, Conditional}{59}{Patterns, Conditional}
-\entry {Ternary Operator}{59}{Ternary Operator}
-\entry {Operator, Ternary}{59}{Operator, Ternary}
-\entry {Action, general}{61}{Action, general}
-\entry {Curly braces}{61}{Curly braces}
-\entry {Action, curly braces}{61}{Action, curly braces}
-\entry {Action, separating statements}{61}{Action, separating statements}
-\entry {Expressions}{61}{Expressions}
-\entry {Statements}{61}{Statements}
-\entry {Compound statements}{61}{Compound statements}
-\entry {Function definitions}{61}{Function definitions}
-\entry {Constants, types of}{63}{Constants, types of}
-\entry {String constants}{63}{String constants}
-\entry {String value}{63}{String value}
-\entry {Numerical constant}{63}{Numerical constant}
-\entry {Numerical value}{63}{Numerical value}
-\entry {Differences between gawk and awk}{63}{Differences between {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {Escape sequence notation}{63}{Escape sequence notation}
-\entry {Variables, user-defined}{64}{Variables, user-defined}
-\entry {User-defined variables}{64}{User-defined variables}
-\entry {Built-in variables}{64}{Built-in variables}
-\entry {Variables, built-in}{64}{Variables, built-in}
-\entry {Arithmetic operators}{65}{Arithmetic operators}
-\entry {Operators, arithmetic}{65}{Operators, arithmetic}
-\entry {Mod function, semantics of}{65}{Mod function, semantics of}
-\entry {Differences between gawk and awk}{65}{Differences between {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}
-\entry {String operators}{65}{String operators}
-\entry {Operators, string}{65}{Operators, string}
-\entry {Concatenation}{65}{Concatenation}
-\entry {Comparison expressions}{66}{Comparison expressions}
-\entry {Expressions, comparison}{66}{Expressions, comparison}
-\entry {Relational operators}{66}{Relational operators}
-\entry {Operators, relational}{66}{Operators, relational}
-\entry {Expressions, boolean}{67}{Expressions, boolean}
-\entry {Boolean expressions}{67}{Boolean expressions}
-\entry {Operators, boolean}{67}{Operators, boolean}
-\entry {Boolean operators}{67}{Boolean operators}
-\entry {Assignment operators}{68}{Assignment operators}
-\entry {Operators, assignment}{68}{Operators, assignment}
-\entry {Lvalue}{68}{Lvalue}
-\entry {Increment operators}{70}{Increment operators}
-\entry {Operators, increment}{70}{Operators, increment}
-\entry {Conversion of strings and numbers}{71}{Conversion of strings and numbers}
-\entry {Conditional expression}{72}{Conditional expression}
-\entry {Expression, conditional}{72}{Expression, conditional}
-\entry {Function call}{73}{Function call}
-\entry {Calling a function}{73}{Calling a function}
-\entry {Arguments in function call}{73}{Arguments in function call}
-\entry {Statements}{75}{Statements}
-\entry {if statement}{75}{{\fam \ttfam \tentt \rawbackslash \frenchspacing if}\hbox {} statement}
-\entry {while statement}{76}{{\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} statement}
-\entry {Loop}{76}{Loop}
-\entry {Body of a loop}{76}{Body of a loop}
-\entry {for statement}{77}{{\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement}
-\entry {break statement}{79}{{\fam \ttfam \tentt \rawbackslash \frenchspacing break}\hbox {} statement}
-\entry {Loops, breaking out of}{79}{Loops, breaking out of}
-\entry {continue statement}{80}{{\fam \ttfam \tentt \rawbackslash \frenchspacing continue}\hbox {} statement}
-\entry {next statement}{81}{{\fam \ttfam \tentt \rawbackslash \frenchspacing next}\hbox {} statement}
-\entry {exit statement}{82}{{\fam \ttfam \tentt \rawbackslash \frenchspacing exit}\hbox {} statement}
-\entry {Arrays}{83}{Arrays}
-\entry {Arrays, definition of}{83}{Arrays, definition of}
-\entry {Associative arrays}{83}{Associative arrays}
-\entry {Array reference}{85}{Array reference}
-\entry {Element of array}{85}{Element of array}
-\entry {Reference to array}{85}{Reference to array}
-\entry {Arrays, determining presence of elements}{85}{Arrays, determining presence of elements}
-\entry {Array assignment}{86}{Array assignment}
-\entry {Element assignment}{86}{Element assignment}
-\entry {for (x in \dots {})}{87}{{\fam \ttfam \tentt \rawbackslash \frenchspacing for (x in \dots {})}\hbox {}}
-\entry {Arrays, special for statement}{87}{Arrays, special {\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement}
-\entry {Scanning an array}{87}{Scanning an array}
-\entry {delete statement}{88}{{\fam \ttfam \tentt \rawbackslash \frenchspacing delete}\hbox {} statement}
-\entry {Deleting elements of arrays}{88}{Deleting elements of arrays}
-\entry {Removing elements of arrays}{88}{Removing elements of arrays}
-\entry {Arrays, deleting an element}{88}{Arrays, deleting an element}
-\entry {Subscripts, multi-dimensional in arrays}{89}{Subscripts, multi-dimensional in arrays}
-\entry {Arrays, multi-dimensional subscripts}{89}{Arrays, multi-dimensional subscripts}
-\entry {Built-in functions, list of}{93}{Built-in functions, list of}
-\entry {Interaction of awk with other programs}{98}{Interaction of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} with other programs}
-\entry {User-defined functions}{99}{User-defined functions}
-\entry {Functions, user-defined}{99}{Functions, user-defined}
-\entry {return statement}{102}{{\fam \ttfam \tentt \rawbackslash \frenchspacing return}\hbox {} statement}
-\entry {Special variables, user modifiable}{105}{Special variables, user modifiable}
diff --git a/gawk.cps b/gawk.cps
deleted file mode 100644
index 9c5c7c44..00000000
--- a/gawk.cps
+++ /dev/null
@@ -1,253 +0,0 @@
-\initial {#}
-\entry {#!}{17}
-\initial {$}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {} (field operator)}{25}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing $NF}\hbox {}, last field in record}{25}
-\initial {-}
-\entry {-f option}{16}
-\initial {`}
-\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing BBS-list}'\hbox {} file}{9}
-\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing inventory-shipped}'\hbox {} file}{10}
-\initial {{\fam \ttfam \tentt \char '174}}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \char '174}}\hbox {}}{42}
-\initial {{\fam \ttfam \tentt \gtr }}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }}\hbox {}}{42}
-\initial {{\fam \ttfam \tentt \gtr }}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }{\fam \ttfam \tentt \gtr }}\hbox {}}{42}
-\initial {A}
-\entry {Accessing fields}{24}
-\entry {Acronym}{1}
-\entry {Action, curly braces}{11, 61}
-\entry {Action, default}{12}
-\entry {Action, definition of}{11}
-\entry {Action, general}{61}
-\entry {Action, separating statements}{61}
-\entry {Applications of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{21}
-\entry {Arguments in function call}{73}
-\entry {Arguments, Command Line}{18}
-\entry {Arithmetic operators}{65}
-\entry {Array assignment}{86}
-\entry {Array reference}{85}
-\entry {Arrays}{83}
-\entry {Arrays, definition of}{83}
-\entry {Arrays, deleting an element}{88}
-\entry {Arrays, determining presence of elements}{85}
-\entry {Arrays, multi-dimensional subscripts}{89}
-\entry {Arrays, special {\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement}{87}
-\entry {Assignment operators}{68}
-\entry {Associative arrays}{83}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} language}{9}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program}{9}
-\initial {B}
-\entry {Backslash Continuation}{20}
-\entry {Basic function of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}{11}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}, special pattern}{57}
-\entry {Body of a loop}{76}
-\entry {Boolean expressions}{67}
-\entry {Boolean operators}{67}
-\entry {Boolean patterns}{58}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing break}\hbox {} statement}{79}
-\entry {Built-in functions, list of}{93}
-\entry {Built-in variables}{64}
-\initial {C}
-\entry {Calling a function}{73}
-\entry {Case sensitivity and gawk}{16}
-\entry {Changing contents of a field}{27}
-\entry {Changing the record separator}{23}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for input}{36}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for output}{43}
-\entry {Closing files and pipes}{43}
-\entry {Command Line}{18}
-\entry {Command line formats}{14}
-\entry {Command line, setting {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {} on}{29}
-\entry {Comments}{19}
-\entry {Comparison expressions}{66}
-\entry {Comparison expressions as patterns}{55}
-\entry {Compound statements}{61}
-\entry {Computed Regular Expressions}{52}
-\entry {Concatenation}{65}
-\entry {Conditional expression}{72}
-\entry {Conditional Patterns}{59}
-\entry {Constants, types of}{63}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing continue}\hbox {} statement}{80}
-\entry {Continuing statements on the next line}{20}
-\entry {Conversion of strings and numbers}{71}
-\entry {Curly braces}{11, 61}
-\initial {D}
-\entry {Default action}{12}
-\entry {Default pattern}{12}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing delete}\hbox {} statement}{88}
-\entry {Deleting elements of arrays}{88}
-\entry {Differences between {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{63, 65}
-\entry {Documenting {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} programs}{19}
-\entry {Dynamic Regular Expressions}{52}
-\initial {E}
-\entry {Element assignment}{86}
-\entry {Element of array}{85}
-\entry {Emacs Lisp}{21}
-\entry {Empty pattern}{51}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}, special pattern}{57}
-\entry {Escape sequence notation}{63}
-\entry {Examining fields}{24}
-\entry {Executable Scripts}{17}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing exit}\hbox {} statement}{82}
-\entry {Expression, conditional}{72}
-\entry {Expressions}{61}
-\entry {Expressions, boolean}{67}
-\entry {Expressions, comparison}{66}
-\initial {F}
-\entry {Field separator, choice of}{29}
-\entry {Field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}}{28}
-\entry {Field separator, setting on command line}{29}
-\entry {Field, changing contents of}{27}
-\entry {Fields}{24}
-\entry {Fields, negative-numbered}{26}
-\entry {Fields, semantics of}{28}
-\entry {Fields, separating}{28}
-\entry {file, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program}{16}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing for (x in \dots {})}\hbox {}}{87}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement}{77}
-\entry {Format specifier}{45}
-\entry {Format string}{45}
-\entry {Formatted output}{44}
-\entry {Function call}{73}
-\entry {Function definitions}{61}
-\entry {Functions, user-defined}{99}
-\initial {G}
-\entry {General input}{23}
-\initial {H}
-\entry {History of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{1}
-\entry {How gawk works}{12}
-\initial {I}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing if}\hbox {} statement}{75}
-\entry {Increment operators}{70}
-\entry {Input file, sample}{9}
-\entry {Input, general}{23}
-\entry {Input, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function}{32}
-\entry {Input, multiple line records}{31}
-\entry {Input, standard}{15, 23}
-\entry {Interaction of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} with other programs}{98}
-\entry {Invocation of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}{18}
-\initial {L}
-\entry {Language, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{9}
-\entry {Loop}{76}
-\entry {Loops, breaking out of}{79}
-\entry {Lvalue}{68}
-\initial {M}
-\entry {Manual, using this}{9}
-\entry {Metacharacters}{53}
-\entry {Mod function, semantics of}{65}
-\entry {Modifiers (in format specifiers)}{46}
-\entry {Multiple line records}{31}
-\entry {Multiple passes over data}{19}
-\entry {Multiple statements on one line}{21}
-\initial {N}
-\entry {Negative-numbered fields}{26}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing next}\hbox {} statement}{81}
-\entry {Number of fields, {\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}}{25}
-\entry {Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}}{24}
-\entry {Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}}{24}
-\entry {Numerical constant}{63}
-\entry {Numerical value}{63}
-\initial {O}
-\entry {One-liners}{49}
-\entry {Operator, Ternary}{59}
-\entry {Operators, {\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {}}{25}
-\entry {Operators, arithmetic}{65}
-\entry {Operators, assignment}{68}
-\entry {Operators, boolean}{67}
-\entry {Operators, increment}{70}
-\entry {Operators, regular expression matching}{52}
-\entry {Operators, relational}{55, 66}
-\entry {Operators, string}{65}
-\entry {Operators, string-matching}{52}
-\entry {Options, Command Line}{18}
-\entry {Output}{39}
-\entry {Output field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}{41}
-\entry {Output record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}{41}
-\entry {Output redirection}{42}
-\entry {Output, formatted}{44}
-\entry {Output, piping}{42}
-\initial {P}
-\entry {Passes, Multiple}{19}
-\entry {Pattern, case sensitive}{16}
-\entry {Pattern, comparison expressions}{55}
-\entry {Pattern, default}{12}
-\entry {Pattern, definition of}{11}
-\entry {Pattern, empty}{51}
-\entry {Pattern, regular expressions}{52}
-\entry {Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}}{57}
-\entry {Patterns, boolean}{58}
-\entry {Patterns, Conditional}{59}
-\entry {Patterns, definition of}{51}
-\entry {Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}}{57}
-\entry {patterns, range}{56}
-\entry {Patterns, types of}{51}
-\entry {Pipes for output}{42}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing print $0}\hbox {}}{11}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} statement}{39}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} statement, format of}{45}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, format-control characters}{45}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, modifiers}{46}
-\entry {Printing, general}{39}
-\entry {program file}{16}
-\entry {Program, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{9}
-\entry {Program, definition of}{11}
-\entry {Program, Self contained}{17}
-\entry {Programs, documenting}{19}
-\initial {R}
-\entry {Range pattern}{56}
-\entry {Reading files, general}{23}
-\entry {Reading files, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function}{32}
-\entry {Reading files, multiple line records}{31}
-\entry {Record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}{23}
-\entry {Records, multiple line}{31}
-\entry {Redirection of output}{42}
-\entry {Reference to array}{85}
-\entry {Regexp}{52}
-\entry {regexp search operators}{52}
-\entry {Regular expression matching operators}{52}
-\entry {Regular expression, metacharacters}{53}
-\entry {Regular expressions as patterns}{52}
-\entry {Regular Expressions, Computed}{52}
-\entry {Regular Expressions, Dynamic}{52}
-\entry {Regular expressions, field separators and}{29}
-\entry {Relational operators}{55, 66}
-\entry {Removing elements of arrays}{88}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing return}\hbox {} statement}{102}
-\entry {Rule, definition of}{11}
-\entry {Running gawk programs}{14}
-\entry {running long programs}{16}
-\initial {S}
-\entry {Sample input file}{9}
-\entry {Scanning an array}{87}
-\entry {Script, definition of}{11}
-\entry {Scripts, Executable}{17}
-\entry {Scripts, Shell}{17}
-\entry {Self contained Programs}{17}
-\entry {Separator character, choice of}{29}
-\entry {Shell Scripts}{17}
-\entry {Single quotes, why they are needed}{15}
-\entry {Special variables, user modifiable}{105}
-\entry {Standard input}{15, 23}
-\entry {Statements}{61, 75}
-\entry {String constants}{63}
-\entry {String operators}{65}
-\entry {String value}{63}
-\entry {String-matching operators}{52}
-\entry {Subscripts, multi-dimensional in arrays}{89}
-\initial {T}
-\entry {Ternary Operator}{59}
-\initial {U}
-\entry {Use of comments}{19}
-\entry {User-defined functions}{99}
-\entry {User-defined variables}{64}
-\entry {Uses of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{1}
-\entry {Using this manual}{9}
-\initial {V}
-\entry {Variables, built-in}{64}
-\entry {Variables, user-defined}{64}
-\initial {W}
-\entry {What is {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{1}
-\entry {When to use {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{21}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} statement}{76}
diff --git a/gawk.dvi b/gawk.dvi
deleted file mode 100644
index 8c863aa4..00000000
--- a/gawk.dvi
+++ /dev/null
Binary files differ
diff --git a/gawk.fn b/gawk.fn
deleted file mode 100644
index d45d54e7..00000000
--- a/gawk.fn
+++ /dev/null
@@ -1,10 +0,0 @@
-\entry {getline}{32}{{\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {}}
-\entry {match}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing match}\hbox {}}
-\entry {length}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing length}\hbox {}}
-\entry {match}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing match}\hbox {}}
-\entry {split}{96}{{\fam \ttfam \tentt \rawbackslash \frenchspacing split}\hbox {}}
-\entry {sprintf}{96}{{\fam \ttfam \tentt \rawbackslash \frenchspacing sprintf}\hbox {}}
-\entry {sub}{96}{{\fam \ttfam \tentt \rawbackslash \frenchspacing sub}\hbox {}}
-\entry {gsub}{97}{{\fam \ttfam \tentt \rawbackslash \frenchspacing gsub}\hbox {}}
-\entry {substr}{97}{{\fam \ttfam \tentt \rawbackslash \frenchspacing substr}\hbox {}}
-\entry {system}{98}{{\fam \ttfam \tentt \rawbackslash \frenchspacing system}\hbox {}}
diff --git a/gawk.fns b/gawk.fns
deleted file mode 100644
index bfd931c1..00000000
--- a/gawk.fns
+++ /dev/null
@@ -1,13 +0,0 @@
-\initial {G}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {}}{32}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing gsub}\hbox {}}{97}
-\initial {L}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing length}\hbox {}}{95}
-\initial {M}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing match}\hbox {}}{95}
-\initial {S}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing split}\hbox {}}{96}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing sprintf}\hbox {}}{96}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing sub}\hbox {}}{96}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing substr}\hbox {}}{97}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing system}\hbox {}}{98}
diff --git a/gawk.ky b/gawk.ky
deleted file mode 100644
index e69de29b..00000000
--- a/gawk.ky
+++ /dev/null
diff --git a/gawk.kys b/gawk.kys
deleted file mode 100644
index e69de29b..00000000
--- a/gawk.kys
+++ /dev/null
diff --git a/gawk.pg b/gawk.pg
deleted file mode 100644
index e69de29b..00000000
--- a/gawk.pg
+++ /dev/null
diff --git a/gawk.pgs b/gawk.pgs
deleted file mode 100644
index e69de29b..00000000
--- a/gawk.pgs
+++ /dev/null
diff --git a/gawk.texinfo b/gawk.texinfo
deleted file mode 100644
index 4c22e8ad..00000000
--- a/gawk.texinfo
+++ /dev/null
@@ -1,6587 +0,0 @@
-\input texinfo @c -*-texinfo-*-
-@c %**start of header (This is for running Texinfo on a region.)
-@setfilename gawk-info
-@settitle The GAWK Manual
-@c %**end of header (This is for running Texinfo on a region.)
-
-@iftex
-@finalout
-@end iftex
-
-@ifinfo
-This file documents @code{awk}, a program that you can use to select
-particular records in a file and perform operations upon them.
-
-Copyright (C) 1989 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-@ignore
-Permission is granted to process this file through TeX and print the
-results, provided the printed document carries copying permission
-notice identical to this one except for the removal of this paragraph
-(this paragraph not being relevant to the printed manual).
-
-@end ignore
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation approved
-by the Foundation.
-@end ifinfo
-
-@setchapternewpage odd
-@titlepage
-@sp 11
-@center @titlefont{The GAWK Manual}
-@sp 4
-@center by Diane Barlow Close and Richard Stallman
-@center with Paul H. Rubin
-@center and Arnold D. Robbins
-@sp 2
-@center Edition 0.1 Beta
-@sp 2
-@center March 1989
-
-@c Include the Distribution inside the titlepage environment so
-@c that headings are turned off. Headings on and off do not work.
-
-@page
-@vskip 0pt plus 1filll
-Copyright @copyright{} 1989 Free Software Foundation, Inc.
-@sp 2
-
-This is Edition 0.1 Beta of @cite{The GAWK Manual}, @*
-for the 2.02 Beta, 23 December 1988, version @*
-of the GNU implementation of AWK.
-
-@sp 2
-Published by the Free Software Foundation @*
-675 Massachusetts Avenue, @*
-Cambridge, MA 02139 USA @*
-Printed copies are available for $10 each.
-
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation approved
-by the Foundation.
-@end titlepage
-
-@node Top, Preface, (dir), (dir)
-@comment node-name, next, previous, up
-@c Preface or Licensing nodes should come right after the Top
-@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
-
-@ifinfo
-This file documents @code{awk}, a program that you can use to select
-particular records in a file and perform operations upon them; it
-contains the following chapters:
-@end ifinfo
-
-@menu
-* Preface:: What you can do with @code{awk}; brief history
- and acknowledgements.
-
-* License:: Your right to copy and distribute @code{gawk}.
-
-* This Manual:: Using this manual.
-@ifinfo
- Includes sample input files that you can use.
-@end ifinfo
-* Getting Started:: A basic introduction to using @code{awk}.
- How to run an @code{awk} program. Command line syntax.
-
-* Reading Files:: How to read files and manipulate fields.
-
-* Printing:: How to print using @code{awk}. Describes the
- @code{print} and @code{printf} statements.
- Also describes redirection of output.
-
-* One-liners:: Short, sample @code{awk} programs.
-
-* Patterns:: The various types of patterns explained in detail.
-
-* Actions:: The various types of actions are introduced here.
- Describes expressions and the various operators in
- detail. Also describes comparison expressions.
-
-* Statements:: The various control statements are described in
- detail.
-
-* Arrays:: The description and use of arrays. Also includes
- array--oriented control statements.
-
-* User-defined:: User--defined functions are described in detail.
-
-* Built-in:: The built--in functions are summarized here.
-
-* Special:: The special variables are summarized here.
-
-* Sample Program:: A sample @code{awk} program with a complete explanation.
-
-* Notes:: Something about the implementation of @code{gawk}.
-
-* Glossary:: An explanation of some unfamiliar terms.
-
-* Index::
-@end menu
-
-
-@node Preface, License, Top , Top
-@comment node-name, next, previous, up
-@unnumbered Preface
-
-@cindex What is @code{awk}
-If you are like many computer users, you frequently would like to make
-changes in various text files wherever certain patterns appear, or
-extract data from parts of certain lines while discarding the rest. To
-write a program to do this in a language such as C or Pascal is a
-time--consuming inconvenience that may take many lines of code. The job
-may be easier with @code{awk}.
-
-The @code{awk} utility interprets a special--purpose programming language
-that makes it possible to handle simple data--reformatting jobs easily
-with just a few lines of code.
-
-The GNU implementation of @code{awk} is called @code{gawk}; it is fully
-upward compatible with the System V Release 3.1 and later
-version of @code{awk}. All properly written
-@code{awk} programs should work with @code{gawk}. So we usually don't
-distinguish between @code{gawk} and other @code{awk} implementations in
-this manual.@refill
-
-@cindex Uses of @code{awk}
-This manual teaches you what @code{awk} does and how you can use
-@code{awk} effectively. You should already be familiar with basic,
-general--purpose, operating system commands such as @code{ls}. Using
-@code{awk} you can: @refill
-
-@itemize @bullet
-@item
-manage small, personal databases,
-
-@item
-generate reports,
-
-@item
-validate data,
-@item
-produce indexes, and perform other document preparation tasks,
-
-@item
-even experiment with algorithms that can be adapted later to other computer
-languages!
-@end itemize
-
-@menu
-* History:: The history of gawk and awk. Acknowledgements.
-@end menu
-
-@node History, , , Preface
-@comment node-name, next, previous, up
-@unnumberedsec History of @code{awk} and @code{gawk}
-
-@cindex Acronym
-@cindex History of @code{awk}
-The name @code{awk} comes from the initials of its designers: Alfred V.
-Aho, Peter J. Weinberger, and Brian W. Kernighan. The original version of
-@code{awk} was written in 1977. In 1985 a new version made the programming
-language more powerful, introducing user--defined functions, multiple input
-streams, and computed regular expressions.
-@comment We don't refer people to non-free information
-@comment In 1988, the original authors
-@comment published @cite{The AWK Programming Language} (Addison-Wesley, ISBN
-@comment 0-201-07981-X), as a definitive description of the @code{awk} language.
-
-The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
-and Jay Fenlason, with advice from Richard Stallman. John Woods
-contributed parts of the code as well. In 1988, David Trueman, with
-help from Arnold Robbins, reworked @code{gawk} for compatibility with
-the newer @code{awk}.
-
-Many people need to be thanked for their assistance in producing this
-manual. Jay Fenlason contributed many ideas and sample programs. Richard
-Mlynarik and Robert Chassell gave helpful comments on drafts of this
-manual. The paper @cite{A Supplemental Document for @code{awk}} by John W.
-Pierce of the Chemistry Department at UC San Diego, pinpointed several
-issues relevant both to @code{awk} implementation and to this manual, that
-would otherwise have escaped us.
-
-Finally, we would like to thank Brian Kernighan of Bell Labs for invaluable
-assistance during the testing and debugging of @code{gawk}, and for
-help in clarifying several points about the language.@refill
-
-@node License, This Manual, Preface, Top
-@unnumbered GNU GENERAL PUBLIC LICENSE
-@center Version 1, February 1989
-
-@display
-Copyright @copyright{} 1989 Free Software Foundation, Inc.
-675 Mass Ave, Cambridge, MA 02139, USA
-
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-@end display
-
-@unnumberedsec Preamble
-
- The license agreements of most software companies try to keep users
-at the mercy of those companies. By contrast, our General Public
-License is intended to guarantee your freedom to share and change free
-software---to make sure the software is free for all its users. The
-General Public License applies to the Free Software Foundation's
-software and to any other program whose authors commit to using it.
-You can use it for your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Specifically, the General Public License is designed to make
-sure that you have the freedom to give away or sell copies of free
-software, that you receive source code or can get it if you want it,
-that you can change the software or use pieces of it in new free
-programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of a such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must tell them their rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
-@iftex
-@unnumberedsec TERMS AND CONDITIONS
-@end iftex
-@ifinfo
-@center TERMS AND CONDITIONS
-@end ifinfo
-
-@enumerate
-@item
-This License Agreement applies to any program or other work which
-contains a notice placed by the copyright holder saying it may be
-distributed under the terms of this General Public License. The
-``Program'', below, refers to any such program or work, and a ``work based
-on the Program'' means either the Program or any work containing the
-Program or a portion of it, either verbatim or with modifications. Each
-licensee is addressed as ``you''.
-
-@item
-You may copy and distribute verbatim copies of the Program's source
-code as you receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice and
-disclaimer of warranty; keep intact all the notices that refer to this
-General Public License and to the absence of any warranty; and give any
-other recipients of the Program a copy of this General Public License
-along with the Program. You may charge a fee for the physical act of
-transferring a copy.
-
-@item
-You may modify your copy or copies of the Program or any portion of
-it, and copy and distribute such modifications under the terms of Paragraph
-1 above, provided that you also do the following:
-
-@itemize @bullet
-@item
-cause the modified files to carry prominent notices stating that
-you changed the files and the date of any change; and
-
-@item
-cause the whole of any work that you distribute or publish, that
-in whole or in part contains the Program or any part thereof, either
-with or without modifications, to be licensed at no charge to all
-third parties under the terms of this General Public License (except
-that you may choose to grant warranty protection to some or all
-third parties, at your option).
-
-@item
-If the modified program normally reads commands interactively when
-run, you must cause it, when started running for such interactive use
-in the simplest and most usual way, to print or display an
-announcement including an appropriate copyright notice and a notice
-that there is no warranty (or else, saying that you provide a
-warranty) and that users may redistribute the program under these
-conditions, and telling the user how to view a copy of this General
-Public License.
-
-@item
-You may charge a fee for the physical act of transferring a
-copy, and you may at your option offer warranty protection in
-exchange for a fee.
-@end itemize
-
-Mere aggregation of another independent work with the Program (or its
-derivative) on a volume of a storage or distribution medium does not bring
-the other work under the scope of these terms.
-
-@item
-You may copy and distribute the Program (or a portion or derivative of
-it, under Paragraph 2) in object code or executable form under the terms of
-Paragraphs 1 and 2 above provided that you also do one of the following:
-
-@itemize @bullet
-@item
-accompany it with the complete corresponding machine-readable
-source code, which must be distributed under the terms of
-Paragraphs 1 and 2 above; or,
-
-@item
-accompany it with a written offer, valid for at least three
-years, to give any third party free (except for a nominal charge
-for the cost of distribution) a complete machine-readable copy of the
-corresponding source code, to be distributed under the terms of
-Paragraphs 1 and 2 above; or,
-
-@item
-accompany it with the information you received as to where the
-corresponding source code may be obtained. (This alternative is
-allowed only for noncommercial distribution and only if you
-received the program in object code or executable form alone.)
-@end itemize
-
-Source code for a work means the preferred form of the work for making
-modifications to it. For an executable file, complete source code means
-all the source code for all modules it contains; but, as a special
-exception, it need not include source code for modules which are standard
-libraries that accompany the operating system on which the executable
-file runs, or for standard header files or definitions files that
-accompany that operating system.
-
-@item
-You may not copy, modify, sublicense, distribute or transfer the
-Program except as expressly provided under this General Public License.
-Any attempt otherwise to copy, modify, sublicense, distribute or transfer
-the Program is void, and will automatically terminate your rights to use
-the Program under this License. However, parties who have received
-copies, or rights to use copies, from you under this General Public
-License will not have their licenses terminated so long as such parties
-remain in full compliance.
-
-@item
-By copying, distributing or modifying the Program (or any work based
-on the Program) you indicate your acceptance of this license to do so,
-and all its terms and conditions.
-
-@item
-Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the original
-licensor to copy, distribute or modify the Program subject to these
-terms and conditions. You may not impose any further restrictions on the
-recipients' exercise of the rights granted herein.
-
-@item
-The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of the license which applies to it and ``any
-later version'', you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-the license, you may choose any version ever published by the Free Software
-Foundation.
-
-@item
-If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-@iftex
-@heading NO WARRANTY
-@end iftex
-@ifinfo
-@center NO WARRANTY
-@end ifinfo
-
-@item
-BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-@item
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
-ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
-ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT
-LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES
-SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
-WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-@end enumerate
-
-@iftex
-@heading END OF TERMS AND CONDITIONS
-@end iftex
-@ifinfo
-@center END OF TERMS AND CONDITIONS
-@end ifinfo
-
-@page
-@unnumberedsec Appendix: How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to humanity, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these
-terms.
-
- To do so, attach the following notices to the program. It is safest to
-attach them to the start of each source file to most effectively convey
-the exclusion of warranty; and each file should have at least the
-``copyright'' line and a pointer to where the full notice is found.
-
-@smallexample
-@var{one line to give the program's name and a brief idea of what it does.}
-Copyright (C) 19@var{yy} @var{name of author}
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 1, or (at your option)
-any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-@end smallexample
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-@smallexample
-Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
-Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-This is free software, and you are welcome to redistribute it
-under certain conditions; type `show c' for details.
-@end smallexample
-
-The hypothetical commands `show w' and `show c' should show the
-appropriate parts of the General Public License. Of course, the
-commands you use may be called something other than `show w' and `show
-c'; they could even be mouse-clicks or menu items---whatever suits your
-program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a ``copyright disclaimer'' for the program, if
-necessary. Here a sample; alter the names:
-
-@example
-Yoyodyne, Inc., hereby disclaims all copyright interest in the
-program `Gnomovision' (a program to direct compilers to make passes
-at assemblers) written by James Hacker.
-
-@var{signature of Ty Coon}, 1 April 1989
-Ty Coon, President of Vice
-@end example
-
-That's all there is to it!
-
-@node This Manual, Getting Started, License , Top
-@chapter Using This Manual
-@cindex Manual, using this
-@cindex Using this manual
-@cindex Language, @code{awk}
-@cindex Program, @code{awk}
-@cindex @code{awk} language
-@cindex @code{awk} program
-
-The term @code{gawk} refers to a program (a version of @code{awk})
-developed by the Free Software Foundation, and to the language you
-use to tell it what to do. When we need to be careful, we call the program
-``the @code{awk} utility'' and the language ``the @code{awk} language''.
-The purpose of this manual is to explain the @code{awk} language and how to
-run the @code{awk} utility.
-
-The term @dfn{@code{awk} program} refers to a program written by you in
-the @code{awk} programming language.@refill
-
-@xref{Getting Started}, for the bare essentials you need to know to
-start using @code{awk}.
-
-Useful ``one--liners'' are included to give you a feel for the
-@code{awk} language (@pxref{One-liners}).
-
-@ignore
-@strong{I deleted four paragraphs here because they would confuse the
-beginner more than help him. They mention terms such as ``field'',
-``pattern'', ``action'', ``built--in function'' which the beginner
-doesn't know.}
-
-@strong{If you can find a way to introduce several of these concepts here,
-enough to give the reader a map of what is to follow, that might
-be useful. I'm not sure that can be done without taking up more
-space than ought to be used here. There may be no way to win.}
-
-@strong{ADR: I'd like to tackle this in phase 2 of my editing.}
-@end ignore
-
-A sizable sample @code{awk} program has been provided for you (@pxref{Sample
-Program}).@refill
-
-If you find terms that you aren't familiar with, try looking them
-up in the glossary (@pxref{Glossary}).@refill
-
-Most of the time complete @code{awk} programs are used as examples, but in
-some of the more advanced sections, only the part of the @code{awk} program
-that illustrates the concept being described is shown.@refill
-
-@menu
-This chapter contains the following sections:
-
-* The Files:: Sample data files for use in the @code{awk} programs
- illustrated in this manual.
-@end menu
-
-@node The Files, , , This Manual
-@section Input Files for the Examples
-
-@cindex Input file, sample
-@cindex Sample input file
-@cindex @file{BBS-list} file
-This manual contains many sample programs. The data for many of those
-programs comes from two files. The first file, called @file{BBS-list},
-represents a list of computer bulletin board systems and information about
-those systems.
-
-Each line of this file is one @dfn{record}. Each record contains the name
-of a computer bulletin board, its phone number, the board's baud rate, and a
-code for the number of hours it is operational. An @samp{A} in the last
-column means the board operates 24 hours all week. A @samp{B} in the last
-column means the board operates evening and weekend hours, only. A @samp{C}
-means the board operates only on weekends.
-
-@group
-@example
-aardvark 555-5553 1200/300 B
-alpo-net 555-3412 2400/1200/300 A
-barfly 555-7685 1200/300 A
-bites 555-1675 2400/1200/300 A
-camelot 555-0542 300 C
-core 555-2912 1200/300 C
-fooey 555-1234 2400/1200/300 B
-foot 555-6699 1200/300 B
-macfoo 555-6480 1200/300 A
-sdace 555-3430 2400/1200/300 A
-sabafoo 555-2127 1200/300 C
-@end example
-@end group
-The second data file, called @file{inventory-shipped}, represents
-information about shipments during the year. Each line of this file is also
-one record. Each record contains the month of the year, the number of green
-crates shipped, the number of red boxes shipped, the number of orange bags
-shipped, and the number of blue packages shipped, respectively.
-@cindex @file{inventory-shipped} file
-
-@group
-@example
-Jan 13 25 15 115
-Feb 15 32 24 226
-Mar 15 24 34 228
-Apr 31 52 63 420
-May 16 34 29 208
-Jun 31 42 75 492
-Jul 24 34 67 436
-Aug 15 34 47 316
-Sep 13 55 37 277
-Oct 29 54 68 525
-Nov 20 87 82 577
-Dec 17 35 61 401
-
-Jan 21 36 64 620
-Feb 26 58 80 652
-Mar 24 75 70 495
-Apr 21 70 74 514
-@end example
-@end group
-
-@ifinfo
-If you are reading this in GNU Emacs using Info, you can copy the regions
-of text showing these sample files into your own test files. This way you
-can try out the examples shown in the remainder of this document. You do
-this by using the command @kbd{M-x write-region} to copy text from the Info
-file into a file for use with @code{awk} (see your @cite{GNU Emacs Manual}
-for more information). Using this information, create your own
-@file{BBS-list} and @file{inventory-shipped} files, and practice what you
-learn in this manual.
-@end ifinfo
-
-@node Getting Started, Reading Files, This Manual, Top
-@chapter Getting Started With @code{awk}
-
-@cindex Script, definition of
-@cindex Rule, definition of
-@cindex Pattern, definition of
-@cindex Action, definition of
-@cindex Program, definition of
-@cindex Basic function of @code{gawk}
-The basic function of @code{awk} is to search files for lines (or other
-units of text) that contain certain patterns. When a line matching any
-of those patterns is found, @code{awk} performs specified actions on
-that line. Then @code{awk} keeps processing input lines until the end
-of the file is reached.@refill
-
-An @code{awk} @dfn{program} or @dfn{script} consists of a series of
-@dfn{rules}. (They may also contain @dfn{function definitions}, but
-that is an advanced feature, so let's ignore it for now.
-@xref{User-defined}.)
-
-A rule contains a @dfn{pattern}, an @dfn{action}, or both. Actions are
-enclosed in curly braces to distinguish them from patterns. Therefore,
-an @code{awk} program is a sequence of rules in the form:@refill
-@cindex Action, curly braces
-@cindex Curly braces
-
-@example
-@var{pattern} @{ @var{action} @}
-@var{pattern} @{ @var{action} @}
-@dots{}
-@end example
-
-@menu
-* Very Simple:: A very simple example.
-* Two Rules:: A less simple one--line example with two rules.
-* More Complex:: A more complex example.
-* Running gawk:: How to run gawk programs; includes command line syntax.
-* Comments:: Adding documentation to gawk programs.
-* Statements/Lines:: Subdividing or combining statements into lines.
-
-* When:: When to use gawk and when to use other things.
-@end menu
-
-@node Very Simple, Two Rules, , Getting Started
-@section A Very Simple Example
-
-@cindex @code{print $0}
-The following command runs a simple @code{awk} program that searches the
-input file @file{BBS-list} for the string of characters: @samp{foo}. (A
-string of characters is usually called, quite simply, a @dfn{string}.)
-
-@example
-awk '/foo/ @{ print $0 @}' BBS-list
-@end example
-
-@noindent
-When lines containing @samp{foo} are found, they are printed, because
-@w{@code{print $0}} means print the current line. (Just @code{print} by
-itself also means the same thing, so we could have written that
-instead.)
-
-You will notice that slashes, @samp{/}, surround the string @samp{foo}
-in the actual @code{awk} program. The slashes indicate that @samp{foo}
-is a pattern to search for. This type of pattern is called a
-@dfn{regular expression}, and is covered in more detail later
-(@pxref{Regexp}). There are single quotes around the @code{awk} program
-so that the shell won't interpret any of it as special shell
-characters.@refill
-
-Here is what this program prints:
-
-@example
-fooey 555-1234 2400/1200/300 B
-foot 555-6699 1200/300 B
-macfoo 555-6480 1200/300 A
-sabafoo 555-2127 1200/300 C
-@end example
-
-@cindex Action, default
-@cindex Pattern, default
-@cindex Default action
-@cindex Default pattern
-In an @code{awk} rule, either the pattern or the action can be omitted,
-but not both.
-
-If the pattern is omitted, then the action is performed for @emph{every}
-input line.@refill
-
-If the action is omitted, the default action is to print all lines that
-match the pattern. We could leave out the action (the print statement
-and the curly braces) in the above example, and the result would be the
-same: all lines matching the pattern @samp{foo} would be printed. (By
-comparison, omitting the print statement but retaining the curly braces
-makes an empty action that does nothing; then no lines would be
-printed.)
-
-@node Two Rules, More Complex, Very Simple, Getting Started
-@section An Example with Two Rules
-@cindex How gawk works
-
-The @code{awk} utility reads the input files one line at a
-time. For each line, @code{awk} tries the patterns of all the rules.
-If several patterns match then several actions are run, in the order in
-which they appear in the @code{awk} program. If no patterns match, then
-no actions are run.
-
-After processing all the rules (perhaps none) that match the line,
-@code{awk} reads the next line (however, @pxref{Next}).
-This continues until the end of the file is reached.@refill
-
-For example, the @code{awk} program:
-
-@example
-/12/ @{ print $0 @}
-/21/ @{ print $0 @}
-@end example
-
-@noindent
-contains two rules. The first rule has the string @samp{12} as the
-pattern and @samp{print $0} as the action. The second rule has the
-string @samp{21} as the pattern and also has @samp{print $0} as the
-action. Each rule's action is enclosed in its own pair of braces.
-
-This @code{awk} program prints every line that contains the string
-@samp{12} @emph{or} the string @samp{21}. If a line contains both
-strings, it is printed twice, once by each rule.
-
-If we run this program on our two sample data files, @file{BBS-list} and
-@file{inventory-shipped}, as shown here:
-
-@example
-awk '/12/ @{ print $0 @}
- /21/ @{ print $0 @}' BBS-list inventory-shipped
-@end example
-
-@noindent
-we get the following output:
-
-@example
-aardvark 555-5553 1200/300 B
-alpo-net 555-3412 2400/1200/300 A
-barfly 555-7685 1200/300 A
-bites 555-1675 2400/1200/300 A
-core 555-2912 1200/300 C
-fooey 555-1234 2400/1200/300 B
-foot 555-6699 1200/300 B
-macfoo 555-6480 1200/300 A
-sdace 555-3430 2400/1200/300 A
-sabafoo 555-2127 1200/300 C
-sabafoo 555-2127 1200/300 C
-Jan 21 36 64 620
-Apr 21 70 74 514
-@end example
-
-@noindent
-Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
-was printed twice, once for each rule.
-
-@node More Complex, Running gawk, Two Rules, Getting Started
-@comment node-name, next, previous, up
-@section A More Complex Example
-
-Here is an example to give you an idea of what typical @code{awk}
-programs do. This example shows how @code{awk} can be used to
-summarize, select, and rearrange the output of another utility. It uses
-features that haven't been covered yet, so don't worry if you don't
-understand all the details.
-
-@example
-ls -l | awk '$5 == "Nov" @{ sum += $4 @}
- END @{ print sum @}'
-@end example
-
-This command prints the total number of bytes in all the files in the
-current directory that were last modified in November (of any year).
-(In the C shell you would need to type a semicolon and then a backslash
-at the end of the first line; in the Bourne shell you can type the example
-as shown.)
-
-The @w{@code{ls -l}} part of this example is a command that gives you a full
-listing of all the files in a directory, including file size and date.
-Its output looks like this:
-
-@example
--rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile
--rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h
--rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h
--rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y
--rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c
--rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c
--rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c
--rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c
-@end example
-
-@noindent
-The first field contains read--write permissions, the second field contains
-the number of links to the file, and the third field identifies the owner of
-the file. The fourth field contains the size of the file in bytes. The
-fifth, sixth, and seventh fields contain the month, day, and time,
-respectively, that the file was last modified. Finally, the eighth field
-contains the name of the file.
-
-The @samp{$5 == "Nov"} in our @code{awk} program is an expression that
-tests whether the fifth field of the output from @w{@code{ls -l}}
-matches the string @samp{Nov}. Each time a line has the string
-@samp{Nov} in its fifth field, the action @samp{@{ sum += $4 @}} is
-performed. This adds the fourth field (the file size) to the variable
-@code{sum}. As a result, when @code{awk} has finished reading all the
-input lines, @code{sum} will be the sum of the sizes of files whose
-lines matched the pattern.@refill
-
-After the last line of output from @code{ls} has been processed, the
-@code{END} pattern is executed, and the value of @code{sum} is
-printed. In this example, the value of @code{sum} would be 80600.@refill
-
-These more advanced @code{awk} techniques are covered in later sections
-(@pxref{Actions}). Before you can move on to more advanced @code{awk}
-programming, you have to know how @code{awk} interprets your input and
-displays your output. By manipulating @dfn{fields} and using special
-@dfn{print} statements, you can produce some very useful and spectacular
-looking reports.@refill
-
-
-@node Running gawk, Comments, More Complex, Getting Started
-@section How to Run @code{awk} Programs
-
-@cindex Command line formats
-@cindex Running gawk programs
-There are several ways to run an @code{awk} program. If the program is
-short, it is easiest to include it in the command that runs @code{awk},
-like this:
-
-@example
-awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-@noindent
-where @var{program} consists of a series of @var{patterns} and
-@var{actions}, as described earlier.
-
-When the program is long, you would probably prefer to put it in a file
-and run it with a command like this:
-
-@example
-awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-@menu
-* One-shot:: Running a short throw--away @code{awk} program.
-* Read Terminal:: Using no input files (input from terminal instead).
-* Long:: Putting permanent @code{awk} programs in files.
-* Executable Scripts:: Making self--contained @code{awk} programs.
-* Command Line:: How the @code{awk} command line is laid out.
-@end menu
-
-@node One-shot, Read Terminal, , Running gawk
-@subsection One--shot Throw--away @code{awk} Programs
-
-Once you are familiar with @code{awk}, you will often type simple
-programs at the moment you want to use them. Then you can write the
-program as the first argument of the @code{awk} command, like this:
-
-@example
-awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-@noindent
-where @var{program} consists of a series of @var{patterns} and
-@var{actions}, as described earlier.
-
-@cindex Single quotes, why they are needed
-This command format tells the shell to start @code{awk} and use the
-@var{program} to process records in the input file(s). There are single
-quotes around the @var{program} so that the shell doesn't interpret any
-@code{awk} characters as special shell characters. They cause the
-shell to treat all of @var{program} as a single argument for
-@code{awk}. They also allow @var{program} to be more than one line
-long.@refill
-
-This format is also useful for running short or medium--sized @code{awk}
-programs from shell scripts, because it avoids the need for a separate
-file for the @code{awk} program. A self--contained shell script is more
-reliable since there are no other files to misplace.
-
-@node Read Terminal, Long, One-shot, Running gawk
-@subsection Running @code{awk} without Input Files
-
-@cindex Standard input
-@cindex Input, standard
-You can also use @code{awk} without any input files. If you type the
-command line:@refill
-
-@example
-awk '@var{program}'
-@end example
-
-@noindent
-then @code{awk} applies the @var{program} to the @dfn{standard input},
-which usually means whatever you type on the terminal. This continues
-until you indicate end--of--file by typing @kbd{Control-d}.
-
-For example, if you type:
-
-@example
-awk '/th/'
-@end example
-
-@noindent
-whatever you type next will be taken as data for that @code{awk}
-program. If you go on to type the following data,
-
-@example
-Kathy
-Ben
-Tom
-Beth
-Seth
-Karen
-Thomas
-@kbd{Control-d}
-@end example
-
-@noindent
-then @code{awk} will print
-
-@example
-Kathy
-Beth
-Seth
-@end example
-
-@noindent
-@cindex Case sensitivity and gawk
-@cindex Pattern, case sensitive
-as matching the pattern @samp{th}. Notice that it did not recognize
-@samp{Thomas} as matching the pattern. The @code{awk} language is
-@dfn{case sensitive}, and matches patterns @emph{exactly}.@refill
-
-@node Long, Executable Scripts, Read Terminal, Running gawk
-@subsection Running Long Programs
-
-@cindex running long programs
-@cindex -f option
-@cindex program file
-@cindex file, @code{awk} program
-Sometimes your @code{awk} programs can be very long. In this case it is
-more convenient to put the program into a separate file. To tell
-@code{awk} to use that file for its program, you type:@refill
-
-@example
-awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-The @samp{-f} tells the @code{awk} utility to get the @code{awk} program
-from the file @var{source-file}. Any file name can be used for
-@var{source-file}. For example, you could put the program:@refill
-
-@example
-/th/
-@end example
-
-@noindent
-into the file @file{th-prog}. Then the command:
-
-@example
-awk -f th-prog
-@end example
-
-@noindent
-does the same thing as this one:
-
-@example
-awk '/th/'
-@end example
-
-@noindent
-which was explained earlier (@pxref{Read Terminal}). Note that you
-don't usually need single quotes around the file name that you specify
-with @samp{-f}, because most file names don't contain any of the shell's
-special characters.
-
-If you want to identify your @code{awk} program files clearly as such,
-you can add the extension @file{.awk} to the filename. This doesn't
-affect the execution of the @code{awk} program, but it does make
-``housekeeping'' easier.
-
-@node Executable Scripts, Command Line, Long, Running gawk
-@c node-name, next, previous, up
-@subsection Executable @code{awk} Programs
-@cindex Executable Scripts
-@cindex Scripts, Executable
-@cindex Self contained Programs
-@cindex Program, Self contained
-@cindex #!
-
-(The following section assumes that you are already somewhat familiar
-with @code{awk}.)
-
-Once you have learned @code{awk}, you may want to write self--contained
-@code{awk} scripts, using the @samp{#!} script mechanism. You can do
-this on BSD Unix systems and GNU.
-
-For example, you could create a text file named @file{hello}, containing
-the following (where @samp{BEGIN} is a feature we have not yet
-discussed):
-
-@example
-#! /bin/awk -f
-
-# a sample awk program
-
-BEGIN @{ print "hello, world" @}
-@end example
-
-@noindent
-After making this file executable (with the @code{chmod} command), you
-can simply type:
-
-@example
-hello
-@end example
-
-@noindent
-at the shell, and the system will arrange to run @code{awk} as if you
-had typed:
-
-@example
-awk -f hello
-@end example
-
-@noindent
-Self--contained @code{awk} scripts are particularly useful for putting
-@code{awk} programs into production on your system, without your users
-having to know that they are actually using an @code{awk} program.
-
-@cindex Shell Scripts
-@cindex Scripts, Shell
-If your system does not support the @samp{#!} mechanism, you can get a
-similar effect using a regular shell script. It would look something
-like this:
-
-@example
-: a sample awk program
-
-awk '@var{program}' "$@@"
-@end example
-
-Using this technique, it is @emph{vital} to enclose the @var{program} in
-single quotes to protect it from interpretation by the shell. If you
-omit the quotes, only a shell wizard can predict the result.
-
-The @samp{"$@@"} causes the shell to forward all the command line
-arguments to the @code{awk} program, without interpretation.
-@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
-
-@c We don't refer to hoarded information.
-@c (See
-@c @cite{The UNIX Programming Environment} by Brian Kernighan and Rob Pike,
-@c Prentice-Hall, 1984, for more information on writing shell programs that
-@c use the Unix utilities. The most powerful version of the shell is the
-@c Korn shell. A detailed description of the Korn shell can be found in
-@c @cite{The KornShell Command and Programming Language} by Morris Bolsky
-@c and David Korn, Prentice-Hall, 1989.)
-
-@node Command Line, , Executable Scripts, Running gawk
-@c node-name, next, previous, up
-@subsection Details of the @code{awk} Command Line
-@cindex Command Line
-@cindex Invocation of @code{gawk}
-@cindex Arguments, Command Line
-@cindex Options, Command Line
-
-(The following section assumes that you are already familiar with
-@code{awk}.)
-
-There are two ways to run @code{awk}. Here are templates for both of
-them; items enclosed in @samp{[} and @samp{]} in these templates are
-optional.
-
-@example
-awk [ -F@var{fs} ] [ -- ] '@var{program}' @var{file} @dots{}
-awk [ -F@var{fs} ] -f @var{source-file} [ -f @var{source-file} @dots{} ] [ -- ] @var{file} @dots{}
-@end example
-
-Options begin with a minus sign, and consist of a single character.
-The options and their meanings are as follows:
-
-@table @code
-@item -F@var{fs}
-This sets the @code{FS} variable to @var{fs} (@pxref{Special}).
-As a special case, if @var{fs} is @samp{t}, then @code{FS} will be set
-to the tab character (@code{"\t"}).
-
-@item -f @var{source-file}
-Indicates that the @code{awk} program is to be found in @var{source-file}
-instead of in the first non--option argument.
-
-@item --
-This signals the end of the command line options. If you wish to
-specify an input file named @file{-f}, you can precede it with the
-@samp{--} argument to prevent the @file{-f} from being interpreted as an
-option. This handling of @samp{--} follows the POSIX argument parsing
-conventions.
-@end table
-
-Any other options will be flagged as invalid with a warning message, but
-are otherwise ignored.
-
-If the @samp{-f} option is @emph{not} used, then the first non--option
-command line argument is expected to be the program text.
-
-The @samp{-f} option may be used more than once on the command line.
-@code{awk} will read its program source from all of the named files, as
-if they had been concatenated together into one big file. This is useful
-for creating libraries of @code{awk} functions. Useful functions can be
-written once, and then retrieved from a standard place, instead of having
-to be included into each individual program. You can still type in a program
-at the terminal and use library functions, by specifying @file{/dev/tty}
-as one of the arguments to a @samp{-f}. Type your program, and end it
-with the keyboard end--of--file character @kbd{Control-d}.
-
-Any additional arguments on the command line are made available to your
-@code{awk} program in the @code{ARGV} array (@pxref{Special}). These
-arguments are normally treated as input files to be processed in the
-order specified. However, an argument that has the form
-@var{var}@code{=}@var{value}, means to assign the value @var{value} to
-the variable @var{var}---it does not specify a file at all.
-
-@vindex ARGV
-Command line options and the program text (if present) are omitted from
-the @code{ARGV} array. All other arguments, including variable assignments,
-are included (@pxref{Special}).
-
-The distinction between file name arguments and variable--assignment
-arguments is made when @code{awk} is about to open the next input file.
-At that point in execution, it checks the ``file name'' to see whether
-it is really a variable assignment; if so, instead of trying to read a
-file it will, @emph{at that point in the execution}, assign the
-variable.
-
-Therefore, the variables actually receive the specified values after all
-previously specified files have been read. In particular, the values of
-variables assigned in this fashion are @emph{not} available inside a
-@code{BEGIN} rule (@pxref{BEGIN/END}), since such rules are run before
-@code{awk} begins scanning the argument list.@refill
-
-@vindex OFS
-@vindex ORS
-@vindex RS
-The variable assignment feature is most useful for assigning to variables
-such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
-output formats, before listing the data files. It is also useful for
-controlling state if multiple passes are needed over a data file. For
-example:@refill
-
-@cindex Multiple passes over data
-@cindex Passes, Multiple
-@example
-awk 'pass == 1 @{ @var{pass 1 stuff} @}
- pass == 2 @{ @var{pass 2 stuff} @}' pass=1 datafile pass=2 datafile
-@end example
-
-@node Comments, Statements/Lines, Running gawk, Getting Started
-@section Comments in @code{awk} Programs
-@cindex Comments
-@cindex Use of comments
-@cindex Documenting @code{awk} programs
-@cindex Programs, documenting
-
-When you write a complicated @code{awk} program, you can put @dfn{comments}
-in the program file to help you remember what the program does, and how it
-works.
-
-A comment starts with the the sharp sign character, @kbd{#}, and continues
-to the end of the line. The @code{awk} language ignores the rest of a line
-following a sharp sign. For example, we could have put the following into
-@file{th-prog}:@refill
-
-@example
-# This program finds records containing the pattern @samp{th}. This is how
-# you continue comments on additional lines.
-/th/
-@end example
-
-You can put comment lines into keyboard--composed throw--away @code{awk}
-programs also, but this usually isn't very useful; the purpose of a
-comment is to help yourself or another person understand the program at
-another time.
-
-@node Statements/Lines, When, Comments, Getting Started
-@section @code{awk} Statements versus Lines
-
-Most often, each line in an @code{awk} program is a separate statement or
-separate rule, like this:
-
-@example
-awk '/12/ @{ print $0 @}
- /21/ @{ print $0 @}' BBS-list inventory-shipped
-@end example
-
-But sometimes statements can be more than one line, and lines can contain
-several statements.
-
-You can split a statement into multiple lines by inserting a newline after
-any of the following:
-
-@example
-, @{ ? : || &&
-@end example
-
-@noindent
-Lines ending in @code{do} or @code{else} automatically have their
-statements continued on the following line(s). A newline at any other
-point ends the statement.@refill
-
-@cindex Backslash Continuation
-@cindex Continuing statements on the next line
-If you would like to split a single statement into two lines at a point
-where a newline would terminate it, you can @dfn{continue} it by ending the
-first line with a backslash character, @samp{\}. This is allowed
-absolutely anywhere in the statement, even in the middle of a string or
-regular expression. For example:
-
-@example
-awk '/This program is too long, so continue it\
- on the next line/ @{ print $1 @}'
-@end example
-
-@noindent
-We have generally not used backslash continuation in the sample programs in
-this manual. Since there is no limit on the length of a line, it is never
-strictly necessary; it just makes programs prettier. We have preferred to
-make them even more pretty by keeping the statements short. Backslash
-continuation is most useful when your @code{awk} program is in a separate
-source file, instead of typed in on the command line.
-
-@strong{Warning: this does not work if you are using the C shell.}
-Continuation with backslash works for @code{awk} programs in files, and
-also for one--shot programs @emph{provided} you are using the Bourne
-shell, the Korn shell, or the Bourne--again shell. But the C shell used
-on Berkeley Unix behaves differently! There, you must use two backslashes
-in a row, followed by a newline.@refill
-
-@cindex Multiple statements on one line
-When @code{awk} statements within one rule are short, you might want to put
-more than one of them on a line. You do this by separating the statements
-with semicolons, @samp{;}.
-This also applies to the rules themselves.
-Thus, the above example program could have been written:@refill
-
-@example
-/12/ @{ print $0 @} ; /21/ @{ print $0 @}
-@end example
-
-@noindent
-@emph{Note:} It is a new requirement that rules on the same line require
-semicolons as a separator in the @code{awk} language; it was done for
-consistency with the statements in the action part of rules.
-
-@node When, , Statements/Lines, Getting Started
-@section When to Use @code{awk}
-
-@cindex When to use @code{awk}
-@cindex Applications of @code{awk}
-What use is all of this to me, you might ask? Using additional operating
-system utilities, more advanced patterns, field separators, arithmetic
-statements, and other selection criteria, you can produce much more complex
-output. The @code{awk} language is very useful for producing reports from
-large amounts of raw data, like summarizing information from the output of
-standard operating system programs such as @code{ls}. (@xref{More
-Complex, , A More Complex Example}.)
-
-Programs written with @code{awk} are usually much smaller than they would
-be in other languages. This makes @code{awk} programs easy to compose and
-use. Often @code{awk} programs can be quickly composed at your terminal,
-used once, and thrown away. Since @code{awk} programs are interpreted, you
-can avoid the usually lengthy edit--compile--test--debug cycle of software
-development.
-
-@cindex Emacs Lisp
-Complex programs have been written in @code{awk}, including a complete
-retargetable assembler for 8--bit microprocessors (@pxref{Glossary} for
-more information) and a microcode assembler for a special purpose Prolog
-computer. However, @code{awk}'s capabilities are strained by tasks of
-such complexity.
-
-If you find yourself writing @code{awk} scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated string
-or pattern matching capabilities. The shell is also good at string and
-pattern matching; in addition it allows powerful use of the standard
-utilities. More conventional languages like C, C++, or Lisp offer
-better facilities for system programming and for managing the complexity
-of large programs. Programs in these languages may require more lines
-of source code than the equivalent @code{awk} programs, but they will be
-easier to maintain and usually run more efficiently.@refill
-
-@node Reading Files, Printing, Getting Started, Top
-@chapter Reading Files (Input)
-
-@cindex Reading files, general
-@cindex Input, general
-@cindex Standard input
-@cindex Input, standard
-@cindex General input
-@vindex FILENAME
-In the typical @code{awk} program, all input is read either from the
-standard input (usually the keyboard) or from files whose names you
-specify on the @code{awk} command line. If you specify input files,
-@code{awk} reads data from the first one until it reaches the end; then
-it reads the second file until it reaches the end, and so on. The name
-of the current input file can be found in the special variable
-@code{FILENAME} (@pxref{Special}).@refill
-
-The input is split automatically into @dfn{records}, and processed by
-the rules one record at a time. (Records are the units of text
-mentioned in the introduction; by default, a record is a line of text.)
-Each record read is split automatically into @dfn{fields}, to make it
-more convenient for a rule to work on parts of the record under
-consideration.
-
-On rare occasions you will need to use the @code{getline} command,
-which can do explicit input from any number of files.
-
-@menu
-* Records:: Controlling how data is split into records.
-* Fields:: An introduction to fields.
-* Field Separators:: The field separator and how to change it.
-* Multiple:: Reading multi--line records.
-
-* Assignment Options:: Setting variables on the command line and a summary
- of command line syntax. This is an advanced method
- of input.
-
-* Getline:: Reading files under explicit program control
- using the @code{getline} function.
-* Close Input:: Closing an input file (so you can read from
- the beginning once more).
-@end menu
-
-@node Records, Fields, , Reading Files
-@section How Input is Split into Records
-
-@cindex Record separator, @code{RS}
-The @code{awk} language divides its input into records and fields.
-Records are separated from each other by the @dfn{record separator}. By
-default, the record separator is the @dfn{newline} character.
-Therefore, normally, a record is a line of text.@refill
-
-@cindex Changing the record separator
-@vindex RS
-Sometimes you may want to use a different character to separate your
-records. You can use different characters by changing the special
-variable @code{RS}.
-
-The value of @code{RS} is a string that says how to separate records;
-the default value is @code{"\n"}, the string of just a newline
-character. This is why lines of text are the default record. Although
-@code{RS} can have any string as its value, only the first character of
-the string will be used as the record separator. The other characters
-are ignored. @code{RS} is exceptional in this regard; @code{awk} uses
-the full value of all its other special variables.@refill
-
-@ignore
-Someday this should be true!
-
-The value of @code{RS} is not limited to a one--character string. It can
-be any regular expression (@pxref{Regexp}). In general, each record
-ends at the next string that matches the regular expression; the next
-record starts at the end of the matching string. This general rule is
-actually at work in the usual case, where @code{RS} contains just a
-newline: a record ends at the beginning of the next matching string (the
-next newline in the input) and the following record starts just after
-the end of this string (at the first character of the following line).
-The newline, since it matches @code{RS}, is not part of either record.
-@end ignore
-
-The value of @code{RS} is changed by @dfn{assigning} it a new value
-(@pxref{Assignment Ops}).
-One way to do this is at the beginning of your @code{awk} program,
-before any input has been processed, using the special @code{BEGIN}
-pattern (@pxref{BEGIN/END}). This way, @code{RS} is changed to its new
-value before any input is read. The new value of @code{RS} is enclosed
-in quotation marks. For example:@refill
-
-@example
-awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
-@end example
-
-@noindent
-changes the value of @code{RS} to @samp{/}, the slash character, before
-reading any input. Records are now separated by a slash. The second
-rule in the @code{awk} program (the action with no pattern) will proceed
-to print each record. Since each @code{print} statement adds a newline
-at the end of its output, the effect of this @code{awk} program is to
-copy the input with each slash changed to a newline.
-
-Another way to change the record separator is on the command line,
-using the variable--assignment feature (@pxref{Command Line}).
-
-@example
-awk '@dots{}' RS="/" @var{source-file}
-@end example
-
-@noindent
-@code{RS} will be set to @samp{/} before processing @var{source-file}.
-
-The empty string (a string of no characters) has a special meaning
-as the value of @code{RS}: it means that records are separated only
-by blank lines. @xref{Multiple}, for more details.
-
-@cindex Number of records, @code{NR}
-@cindex Number of records, @code{FNR}
-@vindex NR
-@vindex FNR
-The @code{awk} utility keeps track of the number of records that have
-been read so far from the current input file. This value is stored in a
-special variable called @code{FNR}. It is reset to zero when a new file
-is started. Another variable, @code{NR}, is the total number of input
-records read so far from all files. It starts at zero but is never
-automatically reset to zero.
-
-If you change the value of @code{RS} in the middle of an @code{awk} run,
-the new value is used to delimit subsequent records, but the record
-currently being processed (and records already finished) are not
-affected.
-
-@node Fields, Non-Constant Fields, Records, Reading Files
-@section Examining Fields
-
-@cindex Examining fields
-@cindex Fields
-@cindex Accessing fields
-When @code{awk} reads an input record, the record is
-automatically separated or @dfn{parsed} by the interpreter into pieces
-called @dfn{fields}. By default, fields are separated by whitespace,
-like words in a line.
-Whitespace in @code{awk} means any string of one or more spaces and/or
-tabs; other characters such as newline, formfeed, and so on, that are
-considered whitespace by other languages are @emph{not} considered
-whitespace by @code{awk}.
-
-The purpose of fields is to make it more convenient for you to refer to
-these pieces of the record. You don't have to use them---you can
-operate on the whole record if you wish---but fields are what make
-simple @code{awk} programs so powerful.
-
-@cindex @code{$} (field operator)
-@cindex Operators, @code{$}
-To refer to a field in an @code{awk} program, you use a dollar--sign,
-@samp{$}, followed by the number of the field you want. Thus, @code{$1}
-refers to the first field, @code{$2} to the second, and so on. For
-example, suppose the following is a line of input:@refill
-
-@example
-This seems like a pretty nice example.
-@end example
-
-@noindent
-Here the first field, or @code{$1}, is @samp{This}; the second field, or
-@code{$2}, is @samp{seems}; and so on. Note that the last field,
-@code{$7}, is @samp{example.}. Because there is no space between the
-@samp{e} and the @samp{.}, the period is considered part of the seventh
-field.@refill
-
-@cindex @code{$NF}, last field in record
-No matter how many fields there are, the last field in a record can be
-represented by @code{$NF}. So, in the example above, @code{$NF} would
-be the same as @code{$7}, which is @samp{example.}. Why this works is
-explained below (@pxref{Non-Constant Fields}). If you try to refer to a
-field beyond the last one, such as @code{$8} when the record has only 7
-fields, you get the empty string.
-
-@vindex NF
-@cindex Number of fields, @code{NF}
-Plain @code{NF}, with no @samp{$}, is a special variable whose value
-is the number of fields in the current record.
-
-@code{$0}, which looks like an attempt to refer to the zeroth field, is
-a special case: it represents the whole input record. This is what you
-would use when you aren't interested in fields.
-
-Here are some more examples:
-
-@example
-awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
-@end example
-
-@noindent
-This example contains the @dfn{matching} operator @code{~}
-(@pxref{Comparison Ops}). Using this operator, all records in the file
-@file{BBS-list} whose first field contains the string @samp{foo} are
-printed.@refill
-
-By contrast, the following example:
-
-@example
-awk '/foo/ @{ print $1, $NF @}' BBS-list
-@end example
-
-@noindent
-looks for the string @samp{foo} in @emph{the entire record} and prints
-the first field and the last field for each input record containing the
-pattern.@refill
-
-The following program will search the system password file, and print
-the entries for users who have no password.
-
-@example
-awk -F: '$2 == ""' /etc/passwd
-@end example
-
-@noindent
-This program uses the @samp{-F} option on the command line to set the
-file separator. (Fields in @file{/etc/passwd} are separated by colons.
-The second field represents a user's encrypted password, but if the
-field is empty, that user has no password.)
-
-@node Non-Constant Fields, Changing Fields, Fields, Reading Files
-@section Non-constant Field Numbers
-
-The number of a field does not need to be a constant. Any expression in
-the @code{awk} language can be used after a @samp{$} to refer to a
-field. The @code{awk} utility evaluates the expression and uses the
-@dfn{numeric value} as a field number. Consider this example:@refill
-
-@example
-awk '@{ print $NR @}'
-@end example
-
-@noindent
-Recall that @code{NR} is the number of records read so far: 1 in the
-first record, 2 in the second, etc. So this example will print the
-first field of the first record, the second field of the second record,
-and so on. For the twentieth record, field number 20 will be printed;
-most likely this will make a blank line, because the record will not
-have 20 fields.
-
-Here is another example of using expressions as field numbers:
-
-@example
-awk '@{ print $(2*2) @}' BBS-list
-@end example
-
-The @code{awk} language must evaluate the expression @samp{(2*2)} and use
-its value as the field number to print. The @samp{*} sign represents
-multiplication, so the expression @samp{2*2} evaluates to 4. This example,
-then, prints the hours of operation (the fourth field) for every line of the
-file @file{BBS-list}.@refill
-
-@cindex Fields, negative-numbered
-@cindex Negative-numbered fields
-When you use non--constant field numbers, you may ask for a field
-with a negative number. This always results in an empty string, just
-like a field whose number is too large for the input record. For
-example, @samp{$(1-4)} would try to examine field number -3; it would
-result in an empty string.
-
-If the field number you compute is zero, you get the entire record.
-
-The number of fields in the current record is stored in the special variable
-@code{NF} (@pxref{Special}). The expression @samp{$NF} is not a special
-feature: it is the direct consequence of evaluating @code{NF} and using
-its value as a field number.
-
-@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
-@section Changing the Contents of a Field
-
-@cindex Field, changing contents of
-@cindex Changing contents of a field
-You can change the contents of a field as seen by @code{awk} within an
-@code{awk} program; this changes what @code{awk} perceives as the
-current input record. (The actual input is untouched: @code{awk} never
-modifies the input file.)
-
-Look at this example:
-
-@example
-awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
-@end example
-
-@noindent
-The @samp{-} sign represents subtraction, so this program reassigns
-field three, @code{$3}, to be the value of field two minus ten,
-@samp{@code{$2} - 10}. (@xref{Arithmetic Ops}.) Then field two, and the
-new value for field three, are printed.
-
-In order for this to work, the text in field @code{$2} must make sense
-as a number; the string of characters must be converted to a number in
-order for the computer to do arithmetic on it. The number resulting
-from the subtraction is converted back to a string of characters which
-then becomes field 3. @xref{Conversion}.
-
-When you change the value of a field (as perceived by @code{awk}), the
-text of the input record is recalculated to contain the new field where
-the old one was. @code{$0} will from that time on reflect the altered
-field. Thus,
-
-@example
-awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
-@end example
-
-@noindent
-will print a copy of the input file, with 10 subtracted from the second
-field of each line.
-
-You can also assign contents to fields that are out of range. For
-example:
-
-@example
-awk '@{ $6 = ($5 + $4 + $3 + $2)/4) ; print $6 @}' inventory-shipped
-@end example
-
-@noindent
-We've just created @code{$6}, whose value is the average of fields
-@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign represents
-addition, and the @samp{/} sign represents division. For the file
-@file{inventory-shipped} @code{$6} represents the average number of parcels
-shipped for a particular month.
-
-Creating a new field changes what @code{awk} interprets as the current
-input record. The value of @code{$0} will be recomputed. This
-recomputation affects and is affected by features not yet discussed, in
-particular, the @dfn{Output Field Separator}, @code{OFS}, which is used
-to separate the fields (@pxref{Output Separators}), and @code{NF} (the
-number of fields; @pxref{Fields}). For example, the value of @code{NF}
-will be set to the number of the highest out--of--range field you
-create.@refill
-
-Note, however, that merely @emph{referencing} an out--of--range field
-will @emph{not} change the value of either @code{$0} or @code{NF}.
-Referencing an out--of--range field merely produces a null string. For
-example:@refill
-
-@example
-if ($(NF+1) != "")
- print "can't happen"
-else
- print "everything is normal"
-@end example
-
-@noindent
-should print @samp{everything is normal}. (@xref{If}, for more
-information about @code{awk}'s @samp{if-else} statements.)
-
-@node Field Separators, Multiple, Changing Fields, Reading Files
-@section Specifying How Fields Are Separated
-
-@vindex FS
-@cindex Fields, semantics of
-@cindex Fields, separating
-@cindex Field separator, @code{FS}
-You can change the way @code{awk} splits a record into fields by changing the
-value of the @dfn{field separator}. The field separator is represented by
-the special variable @code{FS} in an @code{awk} program, and can be set
-by @samp{-F} on the command line. The @code{awk} language scans each input
-line for the field separator character to determine the positions of fields
-within that line. Shell programmers take note! @code{awk} uses the variable
-@code{FS}, not @code{IFS}.@refill
-
-The default value of the field separator is a string containing a single
-space. This value is actually a special case; as you know, by default, fields
-are separated by whitespace sequences, not by single spaces: two spaces
-in a row do not delimit an empty field. ``Whitespace'' is defined as sequences
-of one or more spaces or tab characters.
-
-You change the value of @code{FS} by @dfn{assigning} it a new value. You
-can do this using the special @code{BEGIN} pattern (@pxref{BEGIN/END}).
-This pattern allows you to change the value of @code{FS} before any input is
-read. The new value of @code{FS} is enclosed in quotations. For example,
-set the value of @code{FS} to the string @samp{","}:
-
-@example
-awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
-@end example
-
-@noindent
-and use the input line:@refill
-
-@example
-John Q. Smith, 29 Oak St., Walamazoo, MI 42139
-@end example
-
-@noindent
-This @code{awk} program will extract the string @samp{29 Oak St.}.
-
-@cindex Separator character, choice of
-@cindex Field separator, choice of
-@cindex Regular expressions, field separators and
-Sometimes your input data will contain separator characters that don't
-separate fields the way you thought they would. For instance, the person's
-name in the example we've been using might have a title or suffix attached,
-such as @samp{John Q. Smith, LXIX}. If you assigned @code{FS} to be
-@samp{,} then:
-
-@example
-awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}
-@end example
-
-@noindent
-would extract @samp{LXIX}, instead of @samp{29 Oak St.}. If you were
-expecting the program to print the address, you would be surprised. So,
-choose your data layout and separator characters carefully to prevent
-problems like this from happening.@refill
-
-You can assign @code{FS} to be a series of characters. For example, the
-assignment:@refill
-
-@example
-FS = ", \t"
-@end example
-
-@noindent
-makes every area of an input line that consists of a comma followed by a
-space and a tab, into a field separator. (@samp{\t} stands for a
-tab.)@refill
-
-If @code{FS} is any single character other than a blank, then that character
-is used as the field separator, and two successive occurrences of that
-character do delimit an empty field.
-
-If you assign @code{FS} to a string longer than one character, that string
-is evaluated as a @dfn{regular expression} (@pxref{Regexp}). The value of
-the regular expression is used as a field separator.
-
-@cindex Field separator, setting on command line
-@cindex Command line, setting @code{FS} on
-@code{FS} can be set on the command line. You use the @samp{-F} argument to
-do so. For example:
-
-@example
-awk -F, '@var{program}' @var{input-files}
-@end example
-
-@noindent
-sets @code{FS} to be the @samp{,} character. Notice that the argument uses
-a capital @samp{F}. Contrast this with @samp{-f}, which specifies a file
-containing an @code{awk} program. Case is significant in command options:
-the @samp{-F} and @samp{-f} options have nothing to do with each other.
-You can use both options at the same time to set the @code{FS} argument
-@emph{and} get an @code{awk} program from a file.
-
-As a special case, if the argument to @samp{-F} is @samp{t}, then @code{FS}
-is set to the tab character. (This is because if you type @samp{-F\t},
-without the quotes, at the shell, the @samp{\} gets deleted, so @code{awk}
-figures that you really want your fields to be separated with tabs, and
-not @samp{t}s. Use @code{FS="t"} if you really do want to separate your
-fields with @samp{t}s.)
-
-For example, let's use an @code{awk} program file called @file{baud.awk}
-that contains the pattern @samp{/300/}, and the action @samp{print $1}.
-We'll use the operating system utility @code{cat} to ``look'' at our
-program:@refill
-
-@example
-% cat baud.awk
-/300/ @{ print $1 @}
-@end example
-
-Let's also set @code{FS} to be the @samp{-} character. We will apply
-all this information to the file @file{BBS-list}. This @code{awk} program
-will now print a list of the names of the bulletin boards that operate at
-300 baud and the first three digits of their phone numbers.@refill
-
-@example
-awk -F- -f baud.awk BBS-list
-@end example
-
-@noindent
-produces this output:
-
-@example
-aardvark 555
-alpo
-barfly 555
-bites 555
-camelot 555
-core 555
-fooey 555
-foot 555
-macfoo 555
-sdace 555
-sabafoo 555
-@end example
-
-@noindent
-Note the second line of output. If you check the original file, you will
-see that the second line looked like this:
-
-@example
-alpo-net 555-3412 2400/1200/300 A
-@end example
-
-The @samp{-} as part of the system's name was used as the field
-separator, instead of the @samp{-} in the phone number that was
-originally intended. This demonstrates why you have to be careful in
-choosing your field and record separators.
-
-@node Multiple, Assignment Options, Field Separators, Reading Files
-@section Multiple--Line Records
-
-@cindex Multiple line records
-@cindex Input, multiple line records
-@cindex Reading files, multiple line records
-@cindex Records, multiple line
-In some data bases, a single line cannot conveniently hold all the information
-in one entry. Then you will want to use multi--line records.
-
-The first step in doing this is to choose your data format: when records
-are not defined as single lines, how will you want to define them?
-What should separate records?
-
-One technique is to use an unusual character or string to separate
-records. For example, you could use the formfeed character (written
-@samp{\f} in @code{awk}, as in C) to separate them, making each record
-a page of the file. To do this, just set the variable @code{RS} to
-@code{"\f"} (a string containing the formfeed character), or whatever
-string you prefer to use.
-
-@ignore
-Another technique is to have blank lines separate records. The string
-@code{"^\n+"} is a regular expression that matches any sequence of
-newlines starting at the beginning of a line---in other words, it
-matches a sequence of blank lines. If you set @code{RS} to this string,
-a record will always end at the first blank line encountered. In
-addition, a regular expression always matches the longest possible
-sequence when there is a choice. So the next record won't start until
-the first nonblank line that follows---no matter how many blank lines
-appear in a row, they will be consider one record--separator.
-@end ignore
-
-Another technique is to have blank lines separate records.
-By a special dispensation, a null string as the value of @code{RS}
-indicates that records are separated by one or more blank lines.
-If you set @code{RS} to the null string,
-a record will always end at the first blank line encountered.
-And the next record won't start until
-the first nonblank line that follows---no matter how many blank lines
-appear in a row, they will be considered one record--separator.@refill
-
-The second step is to separate the fields in the record. One way to
-do this is to put each field on a separate line: to do this, just set
-the variable @code{FS} to the string @code{"\n"}. (This
-simple regular expression matches a single newline.) Another idea is to
-divide each of the lines into fields in the normal manner; the regular
-expression @w{@code{"[ \t\n]+"}} will do this nicely by treating the newlines
-inside the record just like spaces.@refill
-
-When @code{RS} is set to the null string, the newline character @emph{always}
-acts as a field separator. This is in addition to whatever value @code{FS}
-has. The probable reason for this rule is so that you get rational
-behavior in the default case (i.e. @w{@code{FS == " "}}). This can be
-a problem if you really don't want the newline character to separate
-fields, since there is no way to do that. However, you can work around this
-by using the @code{split} function to manually break up your data
-(@pxref{String Functions}).
-
-@ignore
-Here are two ways to use records separated by blank lines and break each
-line into fields normally:
-
-@example
-awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} @{ print $0 @}' BBS-list
-
-@exdent @r{or}
-
-awk 'BEGIN @{ RS = "^\n+"; FS = "[ \t\n]+" @} @{ print $0 @}' BBS-list
-@end example
-@end ignore
-
-Here is how to use records separated by blank lines and break each
-line into fields normally:
-
-@example
-awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} ; @{ print $0 @}' BBS-list
-@end example
-
-@node Assignment Options, Getline, Multiple, Reading Files
-@section Assigning Variables on the Command Line
-
-You can include variable @dfn{assignments} among the file names on the
-command line used to invoke @code{awk} (@pxref{Command Line}). Such
-assignments have the form:
-
-@example
-@var{variable}=@var{text}
-@end example
-
-@noindent
-and allow you to change variables either at the beginning of the
-@code{awk} run or in between input files. The variable assignment is
-performed at a time determined by its position among the input file
-arguments: after the processing of the preceding input file argument.
-For example:
-
-@example
-awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
-@end example
-
-@noindent
-prints the value of field number @code{n} for all input records. Before
-the first file is read, the command line sets the variable @code{n}
-equal to 4. This causes the fourth field of the file
-@file{inventory-shipped} to be printed. After the first file has
-finished, but before the second file is started, @code{n} is set to 2,
-so that the second field of the file @file{BBS-list} will be printed.
-
-Command line arguments are made available for explicit examination by
-the @code{awk} program in an array named @code{ARGV} (@pxref{Special}).
-
-@node Getline, , Assignment Options, Reading Files
-@section Explicit Input with @code{getline}
-
-@findex getline
-@cindex Input, @code{getline} function
-@cindex Reading files, @code{getline} function
-So far we have been getting our input files from @code{awk}'s main
-input stream---either the standard input (usually your terminal) or the
-files specified on the command line. The @code{awk} language has a
-special built--in function called @code{getline} that
-can be used to read input under your explicit control.
-
-This command is quite complex and should @emph{not} be used by
-beginners. The command (and its variations) is covered here because
-this is the section about input. The examples that follow the
-explanation of the @code{getline} command include material that has not
-been covered yet. Therefore, come back and attempt the @code{getline}
-command @emph{after} you have reviewed the rest of this manual and have
-a good knowledge of how @code{awk} works.
-
-When retrieving input, @code{getline} returns a 1 if it found a record, and
-a 0 if the end of the file was encountered. If there was some error in
-getting a record, such as a file that could not be opened, then @code{getline}
-returns a -1.
-
-In the following examples, @var{command} stands for a string value that
-represents a shell command.
-
-@table @code
-@item getline
-The @code{getline} function can be used by itself, in an @code{awk}
-program, to read input from the current input. All it does in this
-case is read the next input record and split it up into fields. This
-is useful if you've finished processing the current record, but you
-want to do some special processing @emph{right now} on the next
-record. Here's an example:@refill
-
-@example
-awk '@{
- if (t = index($0, "/*")) @{
- if(t > 1)
- tmp = substr($0, 1, t - 1)
- else
- tmp = ""
- u = index(substr($0, t + 2), "*/")
- while (! u) @{
- getline
- t = -1
- u = index($0, "*/")
- @}
- if(u <= length($0) - 2)
- $0 = tmp substr($0, t + u + 3)
- else
- $0 = tmp
- @}
- print $0
-@}'
-@end example
-
-This @code{awk} program deletes all comments, @samp{/* @dots{}
-*/}, from the input. By replacing the @samp{print $0} with other
-statements, you could perform more complicated processing on the
-de--commented input, such as search it for matches for a regular
-expression.
-
-This form of the @code{getline} command sets @code{NF} (the number of
-fields; @pxref{Fields}), @code{NR} (the number of records read so far), the
-@code{FNR} variable (@pxref{Records}), and the value of @code{$0}.
-
-@emph{Note:} The new value of @code{$0} will be used in testing
-the patterns of any subsequent rules. The original value
-of @code{$0} that triggered the rule which executed @code{getline}
-is lost. By contrast, the @code{next} statement reads a new record
-but immediately begins processing it normally, starting with the first
-rule in the program. @xref{Next}.
-
-@item getline @var{var}
-This form of @code{getline} reads a record into the variable @var{var}.
-This is useful when you want your program to read the next record from the
-input file, but you don't want to subject the record to the normal input
-processing.
-
-For example, suppose the next line is a comment, or a special string,
-and you want to read it, but you must make certain that it won't
-accidentally trigger any rules. This version of @code{getline} will
-allow you to read that line and store it in a variable so that the main
-read--a--line--and--check--each--rule loop of @code{awk} never sees it.
-
-The following example swaps every two lines of input. For example, given:
-
-@example
-wan
-tew
-free
-phore
-@end example
-
-@noindent
-it outputs:
-
-@example
-tew
-wan
-phore
-free
-@end example
-
-@noindent
-Here's the program:
-
-@example
-awk '@{
- if ((getline tmp) > 0) @{
- print tmp
- print $0
- @} else
- print $0
-@}'
-@end example
-
-The @code{getline} function used in this way sets only @code{NR} and
-@code{FNR} (and of course, @var{var}). The record is not split into fields,
-so the values of the fields (including @code{$0}) and the value of @code{NF}
-do not change.@refill
-
-@item getline < @var{file}
-This form of the @code{getline} function takes its input from the file
-@var{file}. Here @var{file} is a string--valued expression that
-specifies the file name.
-
-This form is useful if you want to read your input from a particular
-file, instead of from the main input stream. For example, the following
-program reads its input record from the file @file{foo.input} when it
-encounters a first field with a value equal to 10 in the current input
-file.@refill
-
-@example
-awk '@{
-if ($1 == 10) @{
- getline < "foo.input"
- print
-@} else
- print
-@}'
-@end example
-
-Since the main input stream is not used, the values of @code{NR} and
-@code{FNR} are not changed. But the record read is split into fields in
-the normal manner, so the values of @code{$0} and other fields are
-changed. So is the value of @code{NF}.
-
-This does not cause the record to be tested against all the patterns
-in the @code{awk} program, in the way that would happen if the record
-were read normally by the main processing loop of @code{awk}. However
-the new record is tested against any subsequent rules, just as when
-@code{getline} is used without a redirection.
-
-@item getline @var{var} < @var{file}
-This form of the @code{getline} function takes its input from the file
-@var{file} and puts it in the variable @var{var}. As above, @var{file}
-is a string--valued expression that specifies the file to read from.
-
-In this version of @code{getline}, none of the built--in variables are
-changed, and the record is not split into fields. The only variable
-changed is @var{var}.
-
-For example, the following program copies all the input files to the
-output, except for records that say @w{@code{@@include @var{filename}}}.
-Such a record is replaced by the contents of the file
-@var{filename}.@refill
-
-@example
-awk '@{
- if (NF == 2 && $1 == "@@include") @{
- while ((getline line < $2) > 0)
- print line
- close($2)
- @} else
- print
-@}'
-@end example
-
-Note here how the name of the extra input file is not built into
-the program; it is taken from the data, from the second field on
-the @samp{@@include} line.
-
-The @code{close} command is used to ensure that if two identical
-@samp{@@include} lines appear in the input, the entire specified file is
-included twice. @xref{Close Input}.
-
-One deficiency of this program is that it does not process nested
-@samp{@@include} statements the way a true macro preprocessor would.
-
-@item @var{command} | getline
-You can @dfn{pipe} the output of a command into @code{getline}. A pipe is
-simply a way to link the output of one program to the input of another. In
-this case, the string @var{command} is run as a shell command and its output
-is piped into @code{awk} to be used as input. This form of @code{getline}
-reads one record from the pipe.
-
-For example, the following program copies input to output, except for lines
-that begin with @samp{@@execute}, which are replaced by the output produced by
-running the rest of the line as a shell command:
-
-@example
-awk '@{
- if ($1 == "@@execute") @{
- tmp = substr($0, 10)
- while ((tmp | getline) > 0)
- print
- close(tmp)
- @} else
- print
-@}'
-@end example
-
-@noindent
-The @code{close} command is used to ensure that if two identical
-@samp{@@execute} lines appear in the input, the command is run again
-for each one. @xref{Close Input}.
-
-Given the input:
-
-@example
-foo
-bar
-baz
-@@execute who
-bletch
-@end example
-
-@noindent
-the program might produce:
-
-@example
-foo
-bar
-baz
-hack ttyv0 Jul 13 14:22
-hack ttyp0 Jul 13 14:23 (gnu:0)
-hack ttyp1 Jul 13 14:23 (gnu:0)
-hack ttyp2 Jul 13 14:23 (gnu:0)
-hack ttyp3 Jul 13 14:23 (gnu:0)
-bletch
-@end example
-
-@noindent
-Notice that this program ran the command @code{who} and printed the result.
-(If you try this program yourself, you will get different results, showing
-you logged in.)
-
-This variation of @code{getline} splits the record into fields, sets the
-value of @code{NF} and recomputes the value of @code{$0}. The values of
-@code{NR} and @code{FNR} are not changed.
-
-@item @var{command} | getline @var{var}
-The output of the command @var{command} is sent through a pipe to
-@code{getline} and into the variable @var{var}. For example, the
-following program reads the current date and time into the variable
-@code{current_time}, using the utility called @code{date}, and then
-prints it.@refill
-
-@group
-@example
-awk 'BEGIN @{
- "date" | getline current_time
- close("date")
- print "Report printed on " current_time
-@}'
-@end example
-@end group
-
-In this version of @code{getline}, none of the built--in variables are
-changed, and the record is not split into fields.
-@end table
-
-@node Close Input, , , Getline
-@subsection Closing Input Files
-@cindex @code{close} statement for input
-
-If the same file name or the same shell command is used with
-@code{getline} more than once during the execution of the @code{awk}
-program, the file is opened (or the command is executed) only the first time.
-At that time, the first record of input is read from that file or command.
-The next time the same file or command is used in @code{getline}, another
-record is read from it, and so on.
-
-What this implies is that if you want to start reading the same file
-again from the beginning, or if you want to rerun a shell command
-(rather that reading more output from the command), you must take
-special steps. What you can do is use the @code{close} statement:
-
-@example
-close (@var{filename})
-@end example
-
-@noindent
-This statement closes a file or pipe, represented here by
-@var{filename}. The string value of @var{filename} must be the same
-value as the string used to open the file or pipe to begin with.
-
-Once this statement is executed, the next @code{getline} from that file
-or command will reopen the file or rerun the command.
-
-@node Printing, One-liners, Reading Files, Top
-@chapter Printing Output
-
-@cindex Printing, general
-@cindex Output
-One of the most common things that actions do is to output or @dfn{print}
-some or all of the input. For simple output, use the @code{print}
-statement. For fancier formatting use the @code{printf} statement.
-Both are described in this chapter.
-
-@menu
-* Print:: The @code{print} statement.
-* Print Examples:: Simple examples of @code{print} statements.
-* Output Separators:: The output separators and how to change them.
-
-* Redirection:: How to redirect output to multiple files and pipes.
-* Close Output:: How to close output files and pipes.
-
-* Printf:: The @code{printf} statement.
-@end menu
-
-@node Print, Print Examples, , Printing
-@section The @code{print} Statement
-@cindex @code{print} statement
-
-The @code{print} statement does output with simple, standardized
-formatting. You specify only the strings or numbers to be printed, in a
-list separated by commas. They are output, separated by single spaces,
-followed by a newline. The statement looks like this:
-
-@example
-print @var{item1}, @var{item2}, @dots{}
-@end example
-
-@noindent
-The entire list of items may optionally be enclosed in parentheses. The
-parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a redirection
-(@pxref{Redirection}). The relational operators are @samp{==},
-@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
-@samp{!~} (@pxref{Comparison Ops}).@refill
-
-The items printed can be constant strings or numbers, fields of the
-current record (such as @code{$1}), variables, or any @code{awk}
-expressions. The @code{print} statement is completely general for
-computing @emph{what} values to print. With one exception
-(@pxref{Output Separators}), what you can't do is specify @emph{how} to
-print them---how many columns to use, whether to use exponential
-notation or not, and so on. For that, you need the @code{printf}
-statement (@pxref{Printf}).
-
-To print a fixed piece of text, write a string constant as one item,
-such as @w{@code{"Hello there"}}. If you forget to use the double--quote
-characters, your text will be taken as an @code{awk} expression, and
-you will probably get an error. Keep in mind that a space will be printed
-between any two items.
-
-The simple statement @samp{print} with no items is equivalent to
-@samp{print $0}: it prints the entire current record. To print a blank
-line, use @samp{print ""}, where @code{""} is the null, or empty,
-string.
-
-Most often, each @code{print} statement makes one line of output. But it
-isn't limited to one line. If an item value is a string that contains a
-newline, the newline is output along with the rest of the string. A
-single @code{print} can make any number of lines this way.
-
-@node Print Examples, Output Separators, Print, Printing
-@section Examples of @code{print} Statements
-
-Here is an example that prints the first two fields of each input record,
-with a space between them:
-
-@example
-awk '@{ print $1, $2 @}' inventory-shipped
-@end example
-
-@noindent
-Its output looks like this:
-
-@example
-Jan 13
-Feb 15
-Mar 15
-@dots{}
-@end example
-
-A common mistake in using the @code{print} statement is to omit the comma
-between two items. This often has the effect of making the items run
-together in the output, with no space. The reason for this is that
-juxtaposing two string expressions in @code{awk} means to concatenate
-them. For example, without the comma:
-
-@example
-awk '@{ print $1 $2 @}' inventory-shipped
-@end example
-
-@noindent
-prints:
-
-@example
-Jan13
-Feb15
-Mar15
-@dots{}
-@end example
-
-Neither example's output makes much sense to someone unfamiliar with the
-file @file{inventory-shipped}. A heading line at the beginning would make
-it clearer. Let's add some headings to our table of months (@code{$1}) and
-green crates shipped (@code{$2}). We do this using the BEGIN pattern
-(@pxref{BEGIN/END}) to cause the headings to be printed only once:
-
-@c the formatting is strange here because the @{ becomes just a brace.
-@example
-awk 'BEGIN @{ print "Month Crates"
- print "----- ------" @}
- @{ print $1, $2 @}' inventory-shipped
-@end example
-
-@noindent
-Did you already guess what will happen? This program prints the following:
-
-@group
-@example
-Month Crates
------ ------
-Jan 13
-Feb 15
-Mar 15
-@dots{}
-@end example
-@end group
-
-@noindent
-The headings and the table data don't line up! We can fix this by printing
-some spaces between the two fields:
-
-@example
-awk 'BEGIN @{ print "Month Crates"
- print "----- ------" @}
- @{ print $1, " ", $2 @}' inventory-shipped
-@end example
-
-You can imagine that this way of lining up columns can get pretty
-complicated when you have many columns to fix. Counting spaces for two
-or three columns can be simple, but more than this and you can get
-``lost'' quite easily. This is why the @code{printf} statement was
-created (@pxref{Printf}); one of its specialties is lining up columns of
-data.
-
-@node Output Separators, Redirection, Print Examples, Printing
-@section Output Separators
-
-@cindex Output field separator, @code{OFS}
-@vindex OFS
-@vindex ORS
-@cindex Output record separator, @code{ORS}
-As mentioned previously, a @code{print} statement contains a list
-of items, separated by commas. In the output, the items are normally
-separated by single spaces. But they do not have to be spaces; a
-single space is only the default. You can specify any string of
-characters to use as the @dfn{output field separator}, by setting the
-special variable @code{OFS}. The initial value of this variable
-is the string @w{@code{" "}}.
-
-The output from an entire @code{print} statement is called an
-@dfn{output record}. Each @code{print} statement outputs one output
-record and then outputs a string called the @dfn{output record separator}.
-The special variable @code{ORS} specifies this string. The initial
-value of the variable is the string @code{"\n"} containing a newline
-character; thus, normally each @code{print} statement makes a separate line.
-
-You can change how output fields and records are separated by assigning
-new values to the variables @code{OFS} and/or @code{ORS}. The usual
-place to do this is in the @code{BEGIN} rule (@pxref{BEGIN/END}), so
-that it happens before any input is processed. You may also do this
-with assignments on the command line, before the names of your input
-files.
-
-The following example prints the first and second fields of each input
-record separated by a semicolon, with a blank line added after each
-line:@refill
-
-@example
-awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
- @{ print $1, $2 @}' BBS-list
-@end example
-
-If the value of @code{ORS} does not contain a newline, all your output
-will be run together on a single line, unless you output newlines some
-other way.
-
-@node Redirection, Printf, Output Separators, Printing
-@section Redirecting Output of @code{print} and @code{printf}
-
-@cindex Output redirection
-@cindex Redirection of output
-@cindex @code{>}
-@cindex @code{>>}
-@cindex @code{|}
-@ignore
-@strong{ADR: This section and the section on closing files and pipes should
-come @emph{after} the section on @code{printf}. @emph{First} describe
-all the options for output, and @emph{then} describe how to redirect
-the output.}
-@end ignore
-
-So far we have been dealing only with output that prints to the standard
-output, usually your terminal. Both @code{print} and @code{printf} can be
-told to send their output to other places. This is called
-@dfn{redirection}.@refill
-
-A redirection appears after the @code{print} or @code{printf} statement.
-Redirections in @code{awk} are written just like redirections in shell
-commands, except that they are written inside the @code{awk} program.
-
-Here are the three forms of output redirection. They are all shown for
-the @code{print} statement, but they work for @code{printf} also.
-
-@table @code
-@item print @var{items} > @var{output-file}
-This type of redirection prints the items onto the output file
-@var{output-file}. The file name @var{output-file} can be any
-expression. Its value is changed to a string and then used as a
-filename (@pxref{Expressions}).@refill
-
-When this type of redirection is used, the @var{output-file} is erased
-before the first output is written to it. Subsequent writes do not
-erase @var{output-file}, but append to it. If @var{output-file} does
-not exist, then it is created.@refill
-
-For example, here is how one @code{awk} program can write a list of
-BBS names to a file @file{name-list} and a list of phone numbers to a
-file @file{phone-list}. Each output file contains one name or number
-per line.
-
-@example
-awk '@{ print $2 > "phone-list"
- print $1 > "name-list" @}' BBS-list
-@end example
-
-@item print @var{items} >> @var{output-file}
-This type of redirection prints the items onto the output file
-@var{output-file}. The difference between this and the
-single--@samp{>} redirection is that the old contents (if any) of
-@var{output-file} are not erased. Instead, the @code{awk} output is
-appended to the file.
-
-@cindex Pipes for output
-@cindex Output, piping
-@item print @var{items} | @var{command}
-It is also possible to send output through a @dfn{pipe} instead of into a
-file. This type of redirection opens a pipe to @var{command} and writes
-the values of @var{items} through this pipe, to another process created
-to execute @var{command}.@refill
-
-The redirection argument @var{command} is actually an @code{awk}
-expression. Its value is converted to a string, whose contents give the
-shell command to be run.
-
-For example, this produces two files, one unsorted list of BBS names
-and one list sorted in reverse alphabetical order:
-
-@example
-awk '@{ print $1 > "names.unsorted"
- print $1 | "sort -r > names.sorted" @}' BBS-list
-@end example
-
-Here the unsorted list is written with an ordinary redirection while
-the sorted list is written by piping through the @code{sort} utility.
-
-Here is an example that uses redirection to mail a message to a mailing
-list @samp{bug-system}. This might be useful when trouble is encountered
-in an @code{awk} script run periodically for system maintenance.
-
-@example
-print "Awk script failed:", $0 | "mail bug-system"
-print "processing record number", FNR, "of", FILENAME | "mail bug-system"
-close ("mail bug-system")
-@end example
-
-We use a @code{close} statement here because it's a good idea to close
-the pipe as soon as all the intended output has been sent to it.
-@xref{Close Output}, for more information on this.
-@end table
-
-Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
-to open a file or pipe only if the particular @var{file} or @var{command}
-you've specified has not already been written to by your program.@refill
-
-@node Close Output, , , Redirection
-@subsection Closing Output Files and Pipes
-@cindex @code{close} statement for output
-@cindex Closing files and pipes
-
-When a file or pipe is opened, the filename or command associated with
-it is remembered by @code{awk} and subsequent writes to the same file or
-command are appended to the previous writes. The file or pipe stays
-open until @code{awk} exits. This is usually convenient.
-
-Sometimes there is a reason to close an output file or pipe earlier
-than that. To do this, use the @code{close} command, as follows:
-
-@example
-close (@var{filename})
-@end example
-
-@noindent
-or
-
-@example
-close (@var{command})
-@end example
-
-The argument @var{filename} or @var{command} can be any expression.
-Its value must exactly equal the string used to open the file or pipe
-to begin with---for example, if you open a pipe with this:
-
-@example
-print $1 | "sort -r > names.sorted"
-@end example
-
-@noindent
-then you must close it with this:
-
-@example
-close ("sort -r > names.sorted")
-@end example
-
-Here are some reasons why you might need to close an output file:
-
-@itemize @bullet
-@item
-To write a file and read it back later on in the same @code{awk}
-program. Close the file when you are finished writing it; then
-you can start reading it with @code{getline} (@pxref{Getline}).
-
-@item
-To write numerous files, successively, in the same @code{awk}
-program. If you don't close the files, eventually you will exceed the
-system limit on the number of open files in one process. So close
-each one when you are finished writing it.
-
-@item
-To make a command finish. When you redirect output through a pipe,
-the command reading the pipe normally continues to try to read input
-as long as the pipe is open. Often this means the command cannot
-really do its work until the pipe is closed. For example, if you
-redirect output to the @code{mail} program, the message will not
-actually be sent until the pipe is closed.
-
-@item
-To run the same subprogram a second time, with the same arguments.
-This is not the same thing as giving more input to the first run!
-
-For example, suppose you pipe output to the @code{mail} program. If you
-output several lines redirected to this pipe without closing it, they make
-a single message of several lines. By contrast, if you close the pipe
-after each line of output, then each line makes a separate message.
-@end itemize
-
-@node Printf, , Redirection, Printing
-@section Using @code{printf} Statements For Fancier Printing
-@cindex Formatted output
-@cindex Output, formatted
-
-If you want more precise control over the output format than
-@code{print} gives you, use @code{printf}. With @code{printf} you can
-specify the width to use for each item, and you can specify various
-stylistic choices for numbers (such as what radix to use, whether to
-print an exponent, whether to print a sign, and how many digits to print
-after the decimal point). You do this by specifying a @dfn{format
-string}.
-
-@menu
-* Basic Printf:: Syntax of the @code{printf} statement.
-* Format-Control:: Format-control letters.
-* Modifiers:: Format--specification modifiers.
-* Printf Examples:: Several examples.
-@end menu
-
-@node Basic Printf, Format-Control, , Printf
-@subsection Introduction to the @code{printf} Statement
-
-@cindex @code{printf} statement, format of
-The @code{printf} statement looks like this:@refill
-
-@example
-printf @var{format}, @var{item1}, @var{item2}, @dots{}
-@end example
-
-@noindent
-The entire list of items may optionally be enclosed in parentheses. The
-parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a redirection
-(@pxref{Redirection}). The relational operators are @samp{==},
-@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
-@samp{!~} (@pxref{Comparison Ops}).@refill
-
-@cindex Format string
-The difference between @code{printf} and @code{print} is the argument
-@var{format}. This is an expression whose value is taken as a string; its
-job is to say how to output each of the other arguments. It is called
-the @dfn{format string}.
-
-The format string is essentially the same as in the C library function
-@code{printf}. Most of @var{format} is text to be output verbatim.
-Scattered among this text are @dfn{format specifiers}, one per item.
-Each format specifier says to output the next item at that place in the
-format.@refill
-
-The @code{printf} statement does not automatically append a newline to its
-output. It outputs nothing but what the format specifies. So if you want
-a newline, you must include one in the format. The output separator
-variables @code{OFS} and @code{ORS} have no effect on @code{printf}
-statements.
-
-@node Format-Control, Modifiers, Basic Printf, Printf
-@subsection Format--Control Characters
-@cindex @code{printf}, format-control characters
-
-
-@cindex Format specifier
-A format specifier starts with the character @samp{%} and ends with a
-@dfn{format--control letter}; it tells the @code{printf} statement how
-to output one item. (If you actually want to output a @samp{%}, write
-@samp{%%}.) The format--control letter specifies what kind of value to
-print. The rest of the format specifier is made up of optional
-@dfn{modifiers} which are parameters such as the field width to use.
-
-Here is a list of them:
-
-@table @samp
-@item c
-This prints a number as an ASCII character. Thus, @samp{printf "%c",
-65} outputs the letter @samp{A}. The output for a string value is
-the first character of the string.
-
-@item d
-This prints a decimal integer.
-
-@item e
-This prints a number in scientific (exponential) notation.
-For example,
-
-@example
-printf "%4.3e", 1950
-@end example
-
-@noindent
-prints @samp{1.950e+03}, with a total of 4 significant figures of
-which 3 follow the decimal point. The @samp{4.3} are @dfn{modifiers},
-discussed below.
-
-@item f
-This prints a number in floating point notation.
-
-@item g
-This prints either scientific notation or floating point notation, whichever
-is shorter.
-
-@item o
-This prints an unsigned octal integer.
-
-@item s
-This prints a string.
-
-@item x
-This prints an unsigned hexadecimal integer.
-
-@item %
-This isn't really a format--control letter, but it does have a meaning
-when used after a @samp{%}: the sequence @samp{%%} outputs one
-@samp{%}. It does not consume an argument.
-@end table
-
-@node Modifiers, Printf Examples, Format-Control, Printf
-@subsection Modifiers for @code{printf} Formats
-
-@cindex @code{printf}, modifiers
-@cindex Modifiers (in format specifiers)
-A format specification can also include @dfn{modifiers} that can control
-how much of the item's value is printed and how much space it gets. The
-modifiers come between the @samp{%} and the format--control letter. Here
-are the possible modifiers, in the order in which they may appear:
-
-@table @samp
-@item -
-The minus sign, used before the width modifier, says to left--justify
-the argument within its specified width. Normally the argument
-is printed right--justified in the specified width.
-
-@item @var{width}
-This is a number representing the desired width of a field. Inserting any
-number between the @samp{%} sign and the format control character forces the
-field to be expanded to this width. The default way to do this is to
-pad with spaces on the left.
-
-@item .@var{prec}
-This is a number that specifies the precision to use when printing.
-This specifies the number of digits you want printed to the right of the
-decimal place.
-@end table
-
-The C library @code{printf}'s dynamic @var{width} and @var{prec}
-capability (for example, @code{"%*.*s"}) is not supported. However, it can
-be easily simulated using concatenation to dynamically build the
-format string.
-
-@node Printf Examples, , Modifiers, Printf
-@subsection Examples of Using @code{printf}
-
-Here is how to use @code{printf} to make an aligned table:
-
-@example
-awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
-@end example
-
-@noindent
-prints the names of bulletin boards (@code{$1}) of the file
-@file{BBS-list} as a string of 10 characters, left justified. It also
-prints the phone numbers (@code{$2}) afterward on the line. This will
-produce an aligned two--column table of names and phone numbers, like so:@refill
-
-@example
-aardvark 555-5553
-alpo-net 555-3412
-barfly 555-7685
-bites 555-1675
-camelot 555-0542
-core 555-2912
-fooey 555-1234
-foot 555-6699
-macfoo 555-6480
-sdace 555-3430
-sabafoo 555-2127
-@end example
-
-Did you notice that we did not specify that the phone numbers be
-printed as numbers? They had to be printed as strings because the
-numbers are separated by a dash. This dash would be interpreted as a
-@dfn{minus} sign if we had tried to print the phone numbers as
-numbers. This would have led to some pretty confusing results.
-
-We did not specify a width for the phone numbers because they are the
-last things on their lines. We don't need to put spaces after them.
-
-We could make our table look even nicer by adding headings to the tops of
-the columns. To do this, use the BEGIN pattern (@pxref{BEGIN/END}) to cause
-the header to be printed only once, at the beginning of the @code{awk}
-program:
-
-@example
-awk 'BEGIN @{ print "Name Number"
- print "---- ------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
-@end example
-
-Did you notice that we mixed @code{print} and @code{printf} statements in
-the above example? We could have used just @code{printf} statements to get
-the same results:
-
-@example
-awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
- printf "%-10s %s\n", "----", "------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
-@end example
-
-@noindent
-By outputting each column heading with the same format specification
-used for the elements of the column, we have made sure that the headings
-will be aligned just like the columns.
-
-The fact that the same format specification is used can be emphasized
-by storing it in a variable, like so:
-
-@example
-awk 'BEGIN @{ format = "%-10s %s\n"
- printf format, "Name", "Number"
- printf format, "----", "------" @}
- @{ printf format, $1, $2 @}' BBS-list
-@end example
-
-See if you can use the @code{printf} statement to line up the headings and
-table data for our @file{inventory-shipped} example covered earlier in the
-section on the @code{print} statement (@pxref{Print}).
-
-@node One-liners, Patterns, Printing, Top
-@chapter Useful ``One-liners''
-
-@cindex One-liners
-Useful @code{awk} programs are often short, just a line or two. Here is a
-collection of useful, short programs to get you started. Some of these
-programs contain constructs that haven't been covered yet. The description
-of the program will give you a good idea of what is going on, but please
-read the rest of the manual to become an @code{awk} expert!
-
-@table @code
-@item awk '@{ num_fields = num_fields + NF @}
-@itemx @code{ END @{ print num_fields @}'}
-This program prints the total number of fields in all input lines.
-
-@item awk 'length($0) > 80'
-This program prints every line longer than 80 characters. The sole
-rule has a relational expression as its pattern, and has no action (so the
-default action, printing the record, is used).
-
-@item awk 'NF > 0'
-This program prints every line that has at least one field. This is an
-easy way to delete blank lines from a file (or rather, to create a new
-file similar to the old file but from which the blank lines have been
-deleted).
-
-
-@item awk '@{ if (NF > 0) print @}'
-This program also prints every line that has at least one field. Here we
-allow the rule to match every line, then decide in the action whether
-to print.
-
-@item awk 'BEGIN @{ for (i = 1; i <= 7; i++)
-@itemx @code{ print int(101 * rand()) @}'}
-This program prints 7 random numbers from 0 to 100, inclusive.
-
-@item ls -l @var{files} | awk '@{ x += $4 @} ; END @{ print "total bytes: " x @}'
-This program prints the total number of bytes used by @var{files}.
-
-@item expand @var{file} | awk '@{ if (x < length()) x = length() @}
-@itemx @code{ END @{ print "maximum line length is " x @}'}
-This program prints the maximum line length of @var{file}. The input
-is piped through the @code{expand} program to change tabs into spaces,
-so the widths compared are actually the right--margin columns.
-@end table
-
-@node Patterns, Actions, One-liners, Top
-@chapter Patterns
-
-@cindex Patterns, definition of
-@cindex Patterns, types of
-Patterns control the execution of rules: a rule is executed when its
-pattern matches the input record. The @code{awk} language provides
-several special patterns that are described in the sections that
-follow. Patterns include:@refill
-
-@ignore
-@strong{I think the ordering here needs to be rearranged. @code{BEGIN}
-and @code{END} first, then @var{null}, /@var{regexp}/, @var{condexp},
-@var{condexp bool condexp}, @var{exp1} ? @var{exp2} : @var{exp3}, and
-finally the range pattern.}
-@end ignore
-
-@table @asis
-@item @var{null}
-The empty pattern, which matches every input record. (@xref{Empty, , The
-Empty Pattern}.)
-
-@item /@var{regular expression}/
-A regular expression as a pattern. It matches when the text of the
-input record fits the regular expression. (@xref{Regexp, , Regular
-Expressions as Patterns}.)
-
-@item @var{condexp}
-A single comparison expression. It matches when it is true.
-(@xref{Comparison Patterns, , Comparison Expressions as Patterns}.)
-
-@item @code{BEGIN}
-@itemx @code{END}
-Special patterns to supply start--up or clean--up information to
-@code{awk}. (@xref{BEGIN/END, , Specifying Record Ranges With
-Patterns}.)
-
-@item @var{pat1}, @var{pat2}
-A pair of patterns separated by a comma, specifying a range of records.
-(@xref{Ranges, , Specifying Record Ranges With Patterns}.)
-
-@item @var{condexp1} @var{boolean} @var{condexp2}
-A @dfn{compound} pattern, which combines expressions with the operators
-@samp{and}, @code{&&}, and @samp{or}, @code{||}. (@xref{Boolean, ,
-Boolean Operators and Patterns}.)
-
-@item ! @var{condexp}
-The pattern @var{condexp} is evaluated. Then the @code{!} performs a
-boolean ``not'' or logical negation operation; if the input line matches
-the pattern in @var{condexp} then the associated action is @emph{not}
-executed. If the input line did not match that pattern, then the action
-@emph{is} executed. (@xref{Boolean, , Boolean Operators and Patterns}.)
-
-@item (@var{expr})
-Parentheses may be used to control how operators nest.
-
-@item @var{pat1} ? @var{pat2} : @var{pat3}
-The first pattern is evaluated. If it is true, the input line is tested
-against the second pattern, otherwise it is tested against the third.
-(@xref{Conditional Patterns, , Conditional Patterns}.)
-@end table
-
-@menu
-The following subsections describe these forms in detail:
-
-* Empty:: The empty pattern, which matches every record.
-
-* Regexp:: Regular expressions such as @samp{/foo/}.
-
-* Comparison Patterns:: Comparison expressions such as @samp{$1 > 10}.
-
-* Boolean:: Combining comparison expressions.
-
-* Ranges:: Using pairs of patterns to specify record ranges.
-
-* BEGIN/END:: Specifying initialization and cleanup rules.
-
-* Conditional Patterns:: Patterns such as @samp{pat1 ? pat2 : pat3}.
-@end menu
-
-@node Empty, Regexp, , Patterns
-@section The Empty Pattern
-
-@cindex Empty pattern
-@cindex Pattern, empty
-An empty pattern is considered to match @emph{every} input record. For
-example, the program:@refill
-
-@example
-awk '@{ print $1 @}' BBS-list
-@end example
-
-@noindent
-prints just the first field of every record.
-
-@node Regexp, Comparison Patterns, Empty, Patterns
-@section Regular Expressions as Patterns
-@cindex Pattern, regular expressions
-@cindex Regexp
-@cindex Regular expressions as patterns
-
-A @dfn{regular expression}, or @dfn{regexp}, is a way of describing
-classes of strings. When enclosed in slashes (@code{/}), it makes
-an @code{awk} pattern that matches every input record that contains
-a match for the regexp.
-
-The simplest regular expression is a sequence of letters, numbers, or
-both. Such a regexp matches any string that contains that sequence.
-Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
-(More complicated regexps let you specify classes of similar strings.)
-
-@menu
-* Usage: Regexp Usage. How regexps are used in patterns.
-* Operators: Regexp Operators. How to write a regexp.
-@end menu
-
-@node Regexp Usage, Regexp Operators, , Regexp
-@subsection How to use Regular Expressions
-
-When you enclose @samp{foo} in slashes, you get a pattern that matches
-a record that contains @samp{foo}. For example, this prints the second
-field of each record that contains @samp{foo} anywhere:
-
-@example
-awk '/foo/ @{ print $2 @}' BBS-list
-@end example
-
-@cindex Regular expression matching operators
-@cindex String-matching operators
-@cindex Operators, string-matching
-@cindex Operators, regular expression matching
-@cindex regexp search operators
-Regular expressions can also be used in comparison expressions. Then
-you can specify the string to match against; it need not be the entire
-current input record. These comparison expressions can be used as
-patterns or in @code{if} and @code{while} statements.
-
-@table @code
-@item @var{exp} ~ /@var{regexp}/
-This is true if the expression @var{exp} (taken as a character string) is
-matched by @var{regexp}. The following example matches, or selects, all
-input records with the letter @samp{J} in the first field:@refill
-
-@example
-awk '$1 ~ /J/' inventory-shipped
-@end example
-
-So does this:
-
-@example
-awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
-@end example
-
-@item @var{exp} !~ /@var{regexp}/
-This is true if the expression @var{exp} (taken as a character string) is
-@emph{not} matched by @var{regexp}. The following example matches, or
-selects, all input records whose first field @emph{does not} contain the
-letter @samp{J}:@refill
-
-@example
-awk '$1 !~ /J/' inventory-shipped
-@end example
-@end table
-
-@cindex Computed Regular Expressions
-@cindex Regular Expressions, Computed
-@cindex Dynamic Regular Expressions
-@cindex Regular Expressions, Dynamic
-The right hand side of a @code{~} or @code{!~} operator need not be
-a constant regexp (i.e. a string of characters between @samp{/}s). It can
-also be @dfn{computed}, or @dfn{dynamic}. For example:
-
-@example
-identifier = "[A-Za-z_][A-Za-z_0-9]+"
-$0 ~ identifier
-@end example
-
-@noindent
-sets @code{identifier} to a regexp that describes @code{awk} variable
-names, and tests if the input record matches this regexp.
-
-A dynamic regexp may actually be any expression. The expression is
-evaluated, and the result is treated as a string that describes a
-regular expression.
-
-@node Regexp Operators, , Regexp Usage, Regexp
-@subsection Regular Expression Operators
-@cindex Metacharacters
-@cindex Regular expression, metacharacters
-
-You can combine regular expressions with the following characters,
-called @dfn{regular expression operators}, or @dfn{metacharacters}, to
-increase the power and versatility of regular expressions. This is
-a table of metacharacters:
-
-@table @code
-@item \
-This is used to suppress the special meaning of a character when
-matching. For example:
-
-@example
-\$
-@end example
-
-@noindent
-matches the character @samp{$}.
-
-@item ^
-This matches the beginning of the string or the beginning of a line
-within the string. For example:
-
-@example
-^@@chapter
-@end example
-
-@noindent
-matches the @samp{@@chapter} at the beginning of a string, and can be used
-to identify chapter beginnings in Texinfo source files.
-
-@item $
-This is similar to @code{^}, but it matches only at the end of a string
-or the end of a line within the string. For example:
-
-@example
-/p$/
-@end example
-
-@noindent
-as a pattern matches a record that ends with a @samp{p}.
-
-@item .
-This matches any single character except a newline. For example:
-
-@example
-.P
-@end example
-
-@noindent
-matches any single character followed by a @samp{P} in a string. Using
-concatenation we can make regular expressions like @samp{U.A}, which matches
-any three--character string that begins with @samp{U} and ends with @samp{A}.
-
-@item [@dots{}]
-This is called a @dfn{character set}. It matches any one of a group of
-characters that are enclosed in the square brackets. For example:
-
-@example
-[MVX]
-@end example
-
-@noindent
-matches any of the characters @samp{M}, @samp{V}, or @samp{X} in a
-string.@refill
-
-Ranges of characters are indicated by using a hyphen between the beginning
-and ending characters, and enclosing the whole thing in brackets. For
-example:@refill
-
-@example
-[0-9]
-@end example
-
-@noindent
-matches any string that contains a digit.
-
-Note that special patterns have to be followed to match the characters,
-@samp{]}, @samp{-}, and @samp{^} when they are enclosed in the square
-brackets. To match a @samp{]}, make it the first character in the set.
-For example:
-
-@example
-[]d]
-@end example
-
-@noindent
-matches either @samp{]}, or @samp{d}.@refill
-
-To match @samp{-}, write it as @samp{---}, which is a range containing only
-@samp{-}. You may also make the @samp{-} be the first or last character
-in the set. To match @samp{^}, make it any character except the first one of
-a set.
-
-@item [^ @dots{}]
-This is the @dfn{complemented character set}. The first character after
-the @samp{[} @emph{must} be a @samp{^}. This matches any characters
-@emph{except} those in the square brackets. For example:
-
-@example
-[^0-9]
-@end example
-
-@noindent
-matches any characters that are not digits.
-
-@item |
-This is the @dfn{alternation operator} and it is used to specify
-alternatives. For example:
-
-@example
-^P|[0-9]
-@end example
-
-@noindent
-matches any string that matches either @samp{^P} or @samp{[0-9]}. This
-means it matches any string that contains a digit or starts with @samp{P}.
-
-@item (@dots{})
-Parentheses are used for grouping in regular expressions as in
-arithmetic. They can be used to concatenate regular expressions
-containing the alternation operator, @samp{|}.
-
-@item *
-This symbol means that the preceding regular expression is to be
-repeated as many times as possible to find a match. For example:
-
-@example
-ph*
-@end example
-
-@noindent
-applies the @code{*} symbol to the preceding @samp{h} and looks for matches
-to one @samp{p} followed by any number of @samp{h}'s. This will also match
-just @samp{p} if no @samp{h}'s are present.
-
-The @code{*} means repeat the @emph{smallest} possible preceding expression
-in order to find a match. The @code{awk} language processes a @code{*} by
-matching as many repetitions as can be found. For example:
-
-@example
-awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
-@end example
-
-@noindent
-matches every record in the input containing a string of the form
-@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.@refill
-
-@item +
-This symbol is similar to @code{*}, but the preceding expression must be
-matched at least once. This means that:
-
-@example
-wh+y
-@end example
-
-@noindent
-would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas @samp{wh*y}
-would match all three of these strings. And this is a simpler
-way of writing the last @samp{*} example:
-
-@example
-awk '/\(c[ad]+r x\)/ @{ print @}' sample
-@end example
-
-@item ?
-This symbol is similar to @code{*}, but the preceding expression can be
-matched once or not at all. For example:
-
-@example
-fe?d
-@end example
-
-@noindent
-will match @samp{fed} or @samp{fd}, but nothing else.@refill
-@end table
-
-In regular expressions, the @code{*}, @code{+}, and @code{?} operators have
-the highest precedence, followed by concatenation, and finally by @code{|}.
-As in arithmetic, parentheses can change how operators are grouped.@refill
-
-Any other character stands for itself. However, it is important to note
-that case in regular expressions @emph{is} significant, both when matching
-ordinary (i.e. non--metacharacter) characters, and inside character sets.
-Thus a @samp{w} in a regular expression matches only a lower case @samp{w}
-and not either an uppercase or lowercase @samp{w}. When you want to
-do a case--independent match, you have to use a character set: @samp{[Ww]}.
-
-@node Comparison Patterns, Ranges, Regexp, Patterns
-@section Comparison Expressions as Patterns
-@cindex Comparison expressions as patterns
-@cindex Pattern, comparison expressions
-@cindex Relational operators
-@cindex Operators, relational
-
-@dfn{Comparison patterns} use @dfn{relational operators} to compare
-strings or numbers. The relational operators are the same as in C.
-Here is a table of them:
-
-@table @code
-@item @var{x} < @var{y}
-True if @var{x} is less than @var{y}.
-
-@item @var{x} <= @var{y}
-True if @var{x} is less than or equal to @var{y}.
-
-@item @var{x} > @var{y}
-True if @var{x} is greater than @var{y}.
-
-@item @var{x} >= @var{y}
-True if @var{x} is greater than or equal to @var{y}.
-
-@item @var{x} == @var{y}
-True if @var{x} is equal to @var{y}.
-
-@item @var{x} != @var{y}
-True if @var{x} is not equal to @var{y}.
-@end table
-
-Comparison expressions can be used as patterns to control whether a
-rule is executed. The expression is evaluated for each input record
-read, and the pattern is considered matched if the condition is
-@dfn{true}.
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (@pxref{Conversion}). Strings are compared by comparing the
-first character of each, then the second character of each, and so on.
-Thus, @code{"10"} is less than @code{"9"}.
-
-The following example prints the second field of each input record
-whose first field is precisely @samp{foo}.
-
-@example
-awk '$1 == "foo" @{ print $2 @}' BBS-list
-@end example
-
-@noindent
-Contrast this with the following regular expression match, which would
-accept any record with a first field that contains @samp{foo}:
-
-@example
-awk '$1 ~ "foo" @{ print $2 @}' BBS-list
-@end example
-
-@node Ranges, BEGIN/END, Comparison Patterns, Patterns
-@section Specifying Record Ranges With Patterns
-
-@cindex Range pattern
-@cindex patterns, range
-A @dfn{range pattern} is made of two patterns separated by a comma:
-@samp{@var{begpat}, @var{endpat}}. It matches ranges of consecutive
-input records. The first pattern @var{begpat} controls where the
-range begins, and the second one @var{endpat} controls where it ends.
-
-They work as follows: @var{begpat} is matched against every input
-record; when a record matches @var{begpat}, the range pattern becomes
-@dfn{turned on}. The range pattern matches this record. As long as it
-stays turned on, it automatically matches every input record read. But
-meanwhile, @var{endpat} is matched against every input record, and when
-it matches, the range pattern is turned off again for the following
-record. Now we go back to checking @var{begpat} against each record.
-For example:@refill
-
-@example
-awk '$1 == "on", $1 == "off"'
-@end example
-
-@noindent
-prints every record between on/off pairs, inclusive.
-
-The record that turns on the range pattern and the one that turns it
-off both match the range pattern. If you don't want to operate on
-these records, you can write @code{if} statements in the rule's action
-to distinguish them.
-
-It is possible for a pattern to be turned both on and off by the same
-record, if both conditions are satisfied by that record. Then the action is
-executed for just that record.
-
-@node BEGIN/END, Boolean, Ranges, Patterns
-@section @code{BEGIN} and @code{END} Special Patterns
-
-@cindex @code{BEGIN}, special pattern
-@cindex Patterns, @code{BEGIN}
-@cindex @code{END}, special pattern
-@cindex Patterns, @code{END}
-@code{BEGIN} and @code{END} are special patterns. They are not used to
-match input records. Rather, they are used for supplying start--up or
-clean--up information to your @code{awk} script. A @code{BEGIN} rule is
-executed, once, before the first input record has been read. An @code{END}
-rule is executed, once, after all the input has been read. For
-example:@refill
-
-@example
-awk 'BEGIN @{ print "Analysis of ``foo'' program" @}
- /foo/ @{ ++foobar @}
- END @{ print "``foo'' appears " foobar " times." @}' BBS-list
-@end example
-
-This program finds out how many times the string @samp{foo} appears in the
-input file @file{BBS-list}. The @code{BEGIN} pattern prints out a title
-for the report. There is no need to use the @code{BEGIN} pattern to
-initialize the counter @code{foobar} to zero, as @code{awk} does this for
-us automatically (@pxref{Variables}).
-The second rule increments the variable @code{foobar}
-every time a record containing the pattern @samp{foo} is read. The last
-rule prints out the value of @code{foobar} at the end of the run.@refill
-
-The special patterns @code{BEGIN} and @code{END} do not combine with
-other kinds of patterns.
-
-An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
-rules. The contents of multiple @code{BEGIN} or @code{END} rules are
-treated as if they had been enclosed in a single rule, in the order
-that the rules are encountered in the @code{awk} program. (This feature
-was introduced with the new version of @code{awk}.)
-
-Multiple @code{BEGIN} and @code{END} sections are also useful
-for writing library functions that need to do initialization and/or cleanup
-of their own. Note that the order in which library functions are named
-on the command line will affect the order in which their @code{BEGIN}
-and @code{END} rules will be executed. Therefore you have to be careful
-how you write your library functions. (@xref{Command Line}, for more
-information on using library functions.)
-
-If an @code{awk} program only has a @code{BEGIN} rule, and no other
-rules, then the program will exit after the @code{BEGIN} rule has been
-run. Older versions of @code{awk} used to read their input until end of
-file was seen. However, if an @code{END} rule exists as well, then the
-input will be read, even if there are no other rules in the program.
-
-@code{BEGIN} and @code{END} rules must have actions; there is no default
-action for these rules since there is no current record when they run.
-
-@node Boolean, Conditional Patterns, BEGIN/END, Patterns
-@section Boolean Operators and Patterns
-@cindex Patterns, boolean
-@cindex Boolean patterns
-
-A boolean pattern is a combination of other patterns using the boolean
-operators ``or'' (@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}),
-along with parentheses to control nesting. Whether the boolean pattern
-matches an input record is computed from whether its subpatterns match.
-
-The subpatterns of a boolean pattern can be regular expressions,
-matching expressions, comparisons, or other boolean combinations of
-such. Range patterns cannot appear inside boolean operators, since they
-don't make sense for classifying a single record, and neither can the
-special patterns @code{BEGIN} and @code{END}, which never match any
-input record.
-
-Here are descriptions of the three boolean operators.
-
-@table @code
-@item @var{pat1} && @var{pat2}
-Matches if both @var{pat1} and @var{pat2} match by themselves. For
-example, the following command prints all records in the input file
-@file{BBS-list} that contain both @samp{2400} and @samp{foo}.@refill
-
-@example
-awk '/2400/ && /foo/' BBS-list
-@end example
-
-Whether @var{pat2} matches is tested only if @var{pat1} succeeds. This
-can make a difference when @var{pat2} contains expressions that have
-side effects: in the case of @samp{/foo/ && ($2 == bar++)}, the variable
-@code{bar} is not incremented if there is no @samp{foo} in the record.@refill
-
-@item @var{pat1} || @var{pat2}
-Matches if at least one of @var{pat1} and @var{pat2} matches the current
-input record. For example, the following command prints all records in
-the input file @file{BBS-list} that contain @emph{either} @samp{2400} or
-@samp{foo}, or both.@refill
-
-@example
-awk '/2400/ || /foo/' BBS-list
-@end example
-
-Whether @var{pat2} matches is tested only if @var{pat1} fails to match.
-This can make a difference when @var{pat2} contains expressions that
-have side effects.
-
-@item !@var{pat}
-Matches if @var{pat} does not match. For example, the following command
-prints all records in the input file @file{BBS-list} that do @emph{not}
-contain the string @samp{foo}.
-
-@example
-awk '! /foo/' BBS-list
-@end example
-@end table
-
-Note that boolean patterns are built from other patterns just as boolean
-expressions are built from other expressions (@pxref{Boolean Ops}). Any
-boolean expression is also a valid boolean pattern. But the converse is
-not true: simple regular expression patterns such as @samp{/foo/} are not
-allowed in boolean expressions. Regular expressions can appear in boolean
-expressions only in conjunction with the matching operators, @samp{~}
-and @samp{!~}.
-
-@node Conditional Patterns, , Boolean, Patterns
-@section Conditional Patterns
-@cindex Conditional Patterns
-@cindex Patterns, Conditional
-@cindex Ternary Operator
-@cindex Operator, Ternary
-
-Patterns may use a @dfn{conditional expression} much like the conditional
-expression of the C language. This takes the form:
-
-@example
-@var{pat1} ? @var{pat2} : @var{pat3}
-@end example
-
-The first pattern is evaluated. If it evaluates to @var{true}, then the
-input record is tested against @var{pat2}. Otherwise it is tested
-against @var{pat3}. The conditional pattern matches if @var{pat2} or
-@var{pat3} (whichever one is selected) matches.@refill
-
-@node Actions, Expressions, Patterns, Top
-@chapter Actions: The Basics
-@cindex Action, general
-@cindex Curly braces
-@cindex Action, curly braces
-@cindex Action, separating statements
-
-The @dfn{action} part of an @code{awk} rule tells @code{awk} what to do
-once a match for the pattern is found. An action consists of one or more
-@code{awk} @dfn{statements}, enclosed in curly braces (@samp{@{} and
-@samp{@}}). The curly braces must be used even if the action contains only
-one statement, or even if it contains no statements at all. Action statements
-are separated by newlines or semicolons.@refill
-
-Besides the print statements already covered (@pxref{Printing}), there are
-four kinds of action statements: expressions, control statements, compound
-statements, and function definitions.@refill
-
-@itemize @bullet
-@item
-@cindex Expressions
-@dfn{Expressions} include assignments, arithmetic, function calls, and more
-(@pxref{Expressions}).@refill
-
-@item
-@cindex Statements
-@dfn{Control statements} specify the control flow of @code{awk} programs. The
-@code{awk} language gives you C--like constructs (@code{if}, @code{for},
-@code{while}, and so on) as well as a few special ones
-(@pxref{Statements}).@refill
-
-@item
-@cindex Compound statements
-A @dfn{compound statement} is just one or more @code{awk} statements
-enclosed in curly braces. This way you can group several statements
-to form the body of an @code{if} or similar statement.
-
-@item
-@cindex Function definitions
-You can define @dfn{user--defined functions} for use elsewhere in the
-@code{awk} program (@pxref{User-defined}).
-@end itemize
-
-@iftex
-The next two chapters will cover in detail expressions and control statements,
-respectively.
-We will then detour for a chapter to talk about arrays.
-@c (@strong{This is poor organization!!!})
-Then the following two chapters will deal with compound statements and
-user--defined functions, respectively.@refill
-@end iftex
-
-@node Expressions, Statements, Actions, Top
-@chapter Actions: Expressions
-
-Expressions are the basic building block of @code{awk} actions. An
-expression evaluates to a value, which you can print, test, store in a
-variable or pass to a function.
-
-But, beyond that, an expression can assign a new value to a variable
-or a field, with an assignment operator.
-
-An expression can serve as a statement on its own. Most other action
-statements are made up of various combinations of expressions. As in
-other languages, expressions in @code{awk} include variables, array
-references, constants, and function calls, as well as combinations of
-these with various operators.
-
-@menu
-* Constants:: String and numeric constants.
-* Variables:: Variables give names to values for future use.
-* Fields:: Field references such as @code{$1} are also expressions.
-* Arrays:: Array element references are expressions.
-
-* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.)
-* Concatenation:: Concatenating strings.
-* Comparison Ops:: Comparison of numbers and strings with @samp{<}, etc.
-* Boolean Ops:: Combining comparison expressions using boolean operators
- @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not'').
-
-* Assignment Ops:: Changing the value of a variable or a field.
-* Increment Ops:: Incrementing the numeric value of a variable.
-
-* Conversion:: The conversion of strings to numbers and vice versa.
-* Conditional Exp:: Conditional expressions select between two subexpressions
- under control of a third subexpression.
-* Function Calls:: A function call is an expression.
-@end menu
-
-@node Constants, Variables, , Expressions
-@section Constant Expressions
-@cindex Constants, types of
-@cindex String constants
-@cindex String value
-
-There are two types of constants: numeric constants and string constants.
-
-@cindex Numerical constant
-@cindex Numerical value
-The @dfn{numeric constant} is a number. This number can be an integer, a
-decimal fraction, or a number in scientific (exponential) notation. Note that
-all numeric values are represented within @code{awk} in double--precision
-floating point. Here are some examples of numeric constants, which all
-have the same value:
-
-@example
-105
-1.05e+2
-1050e-1
-@end example
-
-A string constant consists of a sequence of characters enclosed in
-double--quote marks. For example:
-
-@example
-"parrot"
-@end example
-
-@noindent
-@cindex Differences between @code{gawk} and @code{awk}
-represents the string constant @samp{parrot}. Strings in @code{gawk} can
-be of any length and they can contain all the possible 8--bit ASCII
-characters including ASCII NUL. Other @code{awk} implementations may
-have difficulty with some character codes.@refill
-
-@cindex Escape sequence notation
-Some characters cannot be included literally in a string. You represent
-them instead with @dfn{escape sequences}, which are character sequences
-beginning with a backslash (@samp{\}).
-
-One use of the backslash is to include double--quote characters in a string.
-Since a plain double--quote would end the string, you must use @samp{\"}.
-Backslash itself is another character that can't be included normally;
-you write @samp{\\} to put one backslash in the string.
-
-Another use of backslash is to represent unprintable characters
-such as newline. While there is nothing to stop you from writing these
-characters directly in an @code{awk} program, they may look ugly.
-
-@table @code
-@item \b
-Represents a backspaced, @samp{@ctrl{H}}.
-
-@item \f
-Represents a formfeed, @samp{@ctrl{L}}.
-
-@item \n
-Represents a newline, @samp{@ctrl{J}}.
-
-@item \r
-Represents a carriage return, @samp{@ctrl{M}}.
-
-@item \t
-Represents a horizontal tab, @samp{@ctrl{I}}.
-
-@item \v
-Represents a vertical tab, @samp{@ctrl{K}}.
-
-@item \@var{nnn}
-Represents the octal value @var{nnn}, where @var{nnn} is one to three digits
-between 0 and 7. For example, the code for the ASCII ESC (escape) character
-is @samp{\033}.@refill
-@end table
-
-@node Variables, Arithmetic Ops, Constants, Expressions
-@section Variables
-@cindex Variables, user-defined
-@cindex User-defined variables
-
-Variables let you give names to values and refer to them later. You have
-already seen variables in many of the examples. The name of a variable
-must be a sequence of letters, digits and underscores, but it may not begin
-with a digit. Case is significant in variable names; @code{a} and @code{A}
-are distinct variables.
-
-A variable name is a valid expression by itself; it represents the
-variable's current value. Variables are given new values with
-@dfn{assignment operators} and @dfn{increment operators}.
-@xref{Assignment Ops}.
-
-@cindex Built-in variables
-@cindex Variables, built-in
-A few variables have special built--in meanings, such as @code{FS}, the
-field separator, and @code{NF}, the number of fields in the current input
-record. @xref{Special}, for a list of them. Special variables can
-be used and assigned just like all other variables, but their values
-are also used or changed automatically by @code{awk}. Each special
-variable's name is made entirely of upper case letters.
-
-Variables in @code{awk} can be assigned either numeric values or string
-values. By default, variables are initialized to the null string, which
-has the numeric value zero. So there is no need to ``initialize''
-each variable explicitly in @code{awk}, the way you would need to do
-in C or most other traditional programming languages.
-
-@node Arithmetic Ops, Concatenation, Variables, Expressions
-@section Arithmetic Operators
-
-@cindex Arithmetic operators
-@cindex Operators, arithmetic
-The @code{awk} language uses the common arithmetic operators when
-evaluating expressions. All of these arithmetic operators follow normal
-precedence rules, and work as you would expect them to. This example
-divides field 3 by field 4, adds field 2, stores the result into field
-1, and prints the results:
-
-@example
-awk '@{ $1 = $2 + $3 / $4; print @}' inventory-shipped
-@end example
-
-The arithmetic operators in @code{awk} are:
-
-@table @code
-@item @var{x} + @var{y}
-Addition.
-
-@item @var{x} - @var{y}
-Subtraction.
-
-@item - @var{x}
-Negation.
-
-@item @var{x} / @var{y}
-Division. Since all numbers in @code{awk} are double--precision
-floating point, the result is not rounded to an integer: @samp{3 / 4}
-has the value 0.75.
-
-@item @var{x} * @var{y}
-Multiplication.
-
-@item @var{x} % @var{y}
-@cindex Mod function, semantics of
-@cindex Differences between @code{gawk} and @code{awk}
-@c @strong{How are gawk and awk different here?}
-Remainder. The quotient is rounded toward zero to an integer,
-multiplied by @var{y} and this result is subtracted from @var{x}.
-This operation is sometimes known as ``trunc--mod''. The following
-relation always holds:
-
-@display
-@code{b * int(a / b) + (a % b) == a}
-@end display
-
-One undesirable effect of this definition of remainder is that
-@var{x} % @var{y} is negative if @var{x} is negative. Thus,
-
-@example
--17 % 8 = -1
-@end example
-
-@item @var{x} ^ @var{y}
-@itemx @var{x} ** @var{y}
-Exponentiation: @var{x} raised to the @var{y} power. @samp{2 ^ 3} has
-the value 8. The character sequence @samp{**} is equivalent to
-@samp{^}.
-@end table
-
-@node Concatenation, Comparison Ops, Arithmetic Ops, Expressions
-@section String Concatenation
-
-@cindex String operators
-@cindex Operators, string
-@cindex Concatenation
-There is only one string operation: concatenation. It does not have a
-specific operator to represent it. Instead, concatenation is performed by
-writing expressions next to one another, with no operator. For example:
-
-@example
-awk '@{ print "Field number one: " $1 @}' BBS-list
-@end example
-
-@noindent
-produces, for the first record in @file{BBS-list}:
-
-@example
-Field number one: aardvark
-@end example
-
-If you hadn't put the space after the @samp{:}, the line would have run
-together. For example:
-
-@example
-awk '@{ print "Field number one:" $1 @}' BBS-list
-@end example
-
-@noindent
-produces, for the first record in @file{BBS-list}:
-
-@example
-Field number one:aardvark
-@end example
-
-@node Comparison Ops, Boolean Ops, Concatenation, Expressions
-@section Comparison Expressions
-@cindex Comparison expressions
-@cindex Expressions, comparison
-@cindex Relational operators
-@cindex Operators, relational
-
-@dfn{Comparison expressions} use @dfn{relational operators} to compare
-strings or numbers. The relational operators are the same as in C.
-Here is a table of them:
-
-@table @code
-@item @var{x} < @var{y}
-True if @var{x} is less than @var{y}.
-
-@item @var{x} <= @var{y}
-True if @var{x} is less than or equal to @var{y}.
-
-@item @var{x} > @var{y}
-True if @var{x} is greater than @var{y}.
-
-@item @var{x} >= @var{y}
-True if @var{x} is greater than or equal to @var{y}.
-
-@item @var{x} == @var{y}
-True if @var{x} is equal to @var{y}.
-
-@item @var{x} != @var{y}
-True if @var{x} is not equal to @var{y}.
-
-@item @var{x} ~ @var{regexp}
-True if regexp @var{regexp} matches the string @var{x}.
-
-@item @var{x} !~ @var{regexp}
-True if regexp @var{regexp} does not match the string @var{x}.
-
-@item @var{subscript} in @var{array}
-True if array @var{array} has an element with the subscript @var{subscript}.
-@end table
-
-Comparison expressions have the value 1 if true and 0 if false.
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (@pxref{Conversion}). Strings are compared by comparing the
-first character of each, then the second character of each, and so on.
-Thus, @code{"10"} is less than @code{"9"}.
-
-For example,
-
-@example
-$1 == "foo"
-@end example
-
-@noindent
-has the value of 1, or is true, if the first field of the current input
-record is precisely @samp{foo}. By contrast,
-
-@example
-$1 ~ /foo/
-@end example
-
-@noindent
-has the value 1 if the first field contains @samp{foo}.
-
-@node Boolean Ops, Assignment Ops, Comparison Ops, Expressions
-@section Boolean Operators
-@cindex Expressions, boolean
-@cindex Boolean expressions
-@cindex Operators, boolean
-@cindex Boolean operators
-
-A boolean expression is combination of comparison expressions or matching
-expressions, using the boolean operators ``or'' (@samp{||}), ``and''
-(@samp{&&}), and ``not'' (@samp{!}), along with parentheses to control
-nesting. The truth of the boolean expression is computed by combining the
-truth values of the component expressions.
-
-Boolean expressions can be used wherever comparison and matching
-expressions can be used. They can be used in @code{if} and @code{while}
-statements. They have numeric values (1 if true, 0 if false).
-
-In addition, every boolean expression is also a valid boolean pattern, so
-you can use it as a pattern to control the execution of rules.
-
-Here are descriptions of the three boolean operators, with an example of
-each. It may be instructive to compare these examples with the analogous
-examples of boolean patterns (@pxref{Boolean}), which use the same boolean
-operators in patterns instead of expressions.
-
-@table @code
-@item @var{boolean1} && @var{boolean2}
-True if both @var{boolean1} and @var{boolean2} are true. For example,
-the following statement prints the current input record if it contains
-both @samp{2400} and @samp{foo}.@refill
-
-@example
-if ($0 ~ /2400/ && $0 ~ /foo/) print
-@end example
-
-The subexpression @var{boolean2} is evaluated only if @var{boolean1}
-is true. This can make a difference when @var{boolean2} contains
-expressions that have side effects: in the case of @samp{$0 ~ /foo/ &&
-($2 == bar++)}, the variable @code{bar} is not incremented if there is
-no @samp{foo} in the record.
-
-@item @var{boolean1} || @var{boolean2}
-True if at least one of @var{boolean1} and @var{boolean2} is true.
-For example, the following command prints all records in the input
-file @file{BBS-list} that contain @emph{either} @samp{2400} or
-@samp{foo}, or both.@refill
-
-@example
-awk '@{ if ($0 ~ /2400/ || $0 ~ /foo/) print @}' BBS-list
-@end example
-
-The subexpression @var{boolean2} is evaluated only if @var{boolean1}
-is true. This can make a difference when @var{boolean2} contains
-expressions that have side effects.
-
-@item !@var{boolean}
-True if @var{boolean} is false. For example, the following program prints
-all records in the input file @file{BBS-list} that do @emph{not} contain the
-string @samp{foo}.
-
-@example
-awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
-@end example
-@end table
-
-@node Assignment Ops, Increment Ops, Boolean Ops, Expressions
-@section Assignment Operators
-
-@cindex Assignment operators
-@cindex Operators, assignment
-An @dfn{assignment} is an expression that stores a new value into a
-variable. For example, let's assign the value 1 to the variable
-@code{z}:@refill
-
-@example
-z = 1
-@end example
-
-After this expression is executed, the variable @code{z} has the value 1.
-Whatever old value @code{z} had before the assignment is forgotten.
-
-The @code{=} sign is called an @dfn{assignment operator}. It is the
-simplest assignment operator because the value of the right--hand
-operand is stored unchanged.
-
-@cindex Lvalue
-The left--hand operand of an assignment can be a variable
-(@pxref{Variables}), a field (@pxref{Changing Fields}) or an array
-element (@pxref{Arrays}). These are all called @dfn{lvalues}, which
-means they can appear on the left side of an assignment operator. The
-right--hand operand may be any expression; it produces the new value
-which the assignment stores in the specified variable, field or array
-element.
-
-Assignments can store string values also. For example, this would store
-the value @code{"this food is good"} in the variable @code{message}:
-
-@example
-thing = "food"
-predicate = "good"
-message = "this " thing " is " predicate
-@end example
-
-@noindent
-(This also illustrates concatenation of strings.)
-
-It is important to note that variables do @emph{not} have permanent types.
-The type of a variable is simply the type of whatever value it happens
-to hold at the moment. In the following program fragment, the variable
-@code{foo} has a numeric value at first, and a string value later on:
-
-@example
-foo = 1
-print foo
-foo = "bar"
-print foo
-@end example
-
-@noindent
-When the second assignment gives @code{foo} a string value, the fact that
-it previously had a numeric value is forgotten.
-
-An assignment is an expression, so it has a value: the same value that
-is assigned. Thus, @samp{z = 1} as an expression has the value 1.
-One consequence of this is that you can write multiple assignments together:
-
-@example
-x = y = z = 0
-@end example
-
-@noindent
-stores the value 0 in all three variables. It does this because the
-value of @samp{z = 0}, which is 0, is stored into @code{y}, and then
-the value of @samp{y = z = 0}, which is 0, is stored into @code{x}.
-
-You can use an assignment anywhere an expression is called for. For
-example, it is valid to write @samp{x != (y = 1)} to set @code{y} to 1
-and then test whether @code{x} equals 1. But this style tends to make
-programs hard to read; except in a one--shot program, you should
-rewrite it to get rid of such nesting of assignments. This is never very
-hard.
-
-Aside from @code{=}, there are several other assignment operators that
-do arithmetic with the old value of the variable. For example, the
-operator @code{+=} computes a new value by adding the right--hand value
-to the old value of the variable. Thus, the following assignment adds
-5 to the value of @code{foo}:
-
-@example
-foo += 5
-@end example
-
-@noindent
-This is precisely equivalent to the following:
-
-@example
-foo = foo + 5
-@end example
-
-@noindent
-Use whichever one makes the meaning of your program clearer.
-
-Here is a table of the arithmetic assignment operators. In each
-case, the right--hand operand is an expression whose value is converted
-to a number.
-
-@table @code
-@item @var{lvalue} += @var{increment}
-Adds @var{increment} to the value of @var{lvalue} to make the new value
-of @var{lvalue}.
-
-@item @var{lvalue} -= @var{decrement}
-Subtracts @var{decrement} from the value of @var{lvalue}.
-
-@item @var{lvalue} *= @var{coefficient}
-Multiplies the value of @var{lvalue} by @var{coefficient}.
-
-@item @var{lvalue} /= @var{quotient}
-Divides the value of @var{lvalue} by @var{quotient}.
-
-@item @var{lvalue} %= @var{modulus}
-Sets @var{lvalue} to its remainder by @var{modulus}.
-
-@item @var{lvalue} ^= @var{power}
-@itemx @var{lvalue} **= @var{power}
-Raises @var{lvalue} to the power @var{power}.
-@end table
-
-@node Increment Ops, Conversion, Assignment Ops, Expressions
-@section Increment Operators
-
-@cindex Increment operators
-@cindex Operators, increment
-@dfn{Increment operators} increase or decrease the value of a variable
-by 1. You could do the same thing with an assignment operator, so
-the increment operators add no power to the @code{awk} language; but they
-are convenient abbreviations for something very common.
-
-The operator to add 1 is written @code{++}. There are two ways to use
-this operator: pre--incrementation and post--incrementation.
-
-To pre--increment a variable @var{v}, write @code{++@var{v}}. This adds
-1 to the value of @var{v} and that new value is also the value of this
-expression. The assignment expression @code{@var{v} += 1} is completely
-equivalent.
-
-Writing the @code{++} after the variable specifies post--increment. This
-increments the variable value just the same; the difference is that the
-value of the increment expression itself is the variable's @emph{old}
-value. Thus, if @code{foo} has value 4, then the expression @code{foo++}
-has the value 4, but it changes the value of @code{foo} to 5.
-
-The post--increment @code{foo++} is nearly equivalent to writing @samp{(foo
-+= 1) - 1}. It is not perfectly equivalent because all numbers in
-@code{awk} are floating point: in floating point, @code{foo + 1 - 1} does
-not necessarily equal @code{foo}. But the difference will be minute as
-long as you stick to numbers that are fairly small (less than a trillion).
-
-Any lvalue can be incremented. Fields and array elements are incremented
-just like variables.
-
-The decrement operator @code{--} works just like @code{++} except that
-it subtracts 1 instead of adding. Like @code{++}, it can be used before
-the lvalue to pre--decrement or after it to post--decrement.
-
-Here is a summary of increment and decrement expressions.
-
-@table @code
-@item ++@var{lvalue}
-This expression increments @var{lvalue} and the new value becomes the
-value of this expression.
-
-@item @var{lvalue}++
-This expression causes the contents of @var{lvalue} to be incremented.
-The value of the expression is the @emph{old} value of @var{lvalue}.
-
-@item --@var{lvalue}
-Like @code{++@var{lvalue}}, but instead of adding, it subtracts. It
-decrements @var{lvalue} and delivers the value that results.
-
-@item @var{lvalue}--
-Like @code{@var{lvalue}++}, but instead of adding, it subtracts. It
-decrements @var{lvalue}. The value of the expression is the @emph{old}
-value of @var{lvalue}.
-@end table
-
-@node Conversion, Conditional Exp, Increment Ops, Expressions
-@section Conversion of Strings and Numbers
-
-@cindex Conversion of strings and numbers
-Strings are converted to numbers, and numbers to strings, if the context of
-your @code{awk} statement demands it. For example, if the values of
-@code{foo} or @code{bar} in the expression @code{foo + bar} happen to be
-strings, they are converted to numbers before the addition is performed.
-If numeric values appear in string concatenation, they are converted
-to strings. Consider this:@refill
-
-@example
-two = 2; three = 3
-print (two three) + 4
-@end example
-
-@noindent
-This eventually prints the (numeric) value @samp{27}. The numeric
-variables @code{two} and @code{three} are converted to strings and concatenated
-together, and the resulting string is converted back to a number before
-adding @samp{4}. The resulting numeric value @samp{27} is printed.
-
-If, for some reason, you need to force a number to be converted to a
-string, concatenate the null string with that number. To force a string
-to be converted to a number, add zero to that string. Strings that
-can't be interpreted as valid numbers are given the numeric value
-zero.@refill
-
-@vindex OFMT
-The exact manner in which numbers are converted into strings is controlled
-by the @code{awk} special variable @code{OFMT} (@pxref{Special}).
-Numbers are converted using a special
-version of the @code{sprintf} function (@pxref{Built-in}) with @code{OFMT}
-as the format specifier.@refill
-
-@code{OFMT}'s default value is @code{"%.6g"}, which prints a value with
-at least six significant digits. You might want to change it to specify
-more precision, if your version of @code{awk} uses double precision
-arithmetic. Double precision on most modern machines gives you 16 or 17
-decimal digits of precision.@refill
-
-Strange results can happen if you set @code{OFMT} to a string that doesn't
-tell @code{sprintf} how to format floating point numbers in a useful way.
-For example, if you forget the @samp{%} in the format, all numbers will be
-converted to the same constant string.@refill
-
-@node Conditional Exp, Function Calls, Conversion, Expressions
-@section Conditional Expressions
-@cindex Conditional expression
-@cindex Expression, conditional
-
-A @dfn{conditional expression} is a special kind of expression with
-three operands. It allows you to use one expression's value to select
-one of two other expressions.
-
-The conditional expression looks the same as in the C language:
-
-@example
-@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
-@end example
-
-@noindent
-There are three subexpressions. The first, @var{selector}, is always
-computed first. If it is ``true'' (not zero) then @var{if-true-exp} is
-computed next and its value becomes the value of the whole expression.
-Otherwise, @var{if-false-exp} is computed next and its value becomes the
-value of the whole expression.
-
-For example, this expression produces the absolute value of @code{x}:
-
-@example
-x > 0 ? x : -x
-@end example
-
-Each time the conditional expression is computed, exactly one of
-@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
-This is important when the expressions contain side effects. For example,
-this conditional expression examines element @code{i} of either array
-@code{a} or array @code{b}, and increments @code{i}.
-
-@example
-x == y ? a[i++] : b[i++]
-@end example
-
-@noindent
-This is guaranteed to increment @code{i} exactly once, because each time
-one or the other of the two increment expressions will be executed
-and the other will not be.
-
-@node Function Calls, , Conditional Exp, Expressions
-@section Function Calls
-@cindex Function call
-@cindex Calling a function
-
-A @dfn{function} is a name for a particular calculation. Because it has
-a name, you can ask for it by name at any point in the program. For
-example, the function @code{sqrt} computes the square root of a number.
-
-A fixed set of functions are @dfn{built in}, which means they are
-available in every @code{awk} program. The @code{sqrt} function is one
-of these. @xref{Built-in}, for a list of built--in functions and their
-descriptions. In addition, you can define your own functions in the
-program for use elsewhere in the same program. @xref{User-defined},
-for how to do this.
-
-@cindex Arguments in function call
-The way to use a function is with a @dfn{function call} expression,
-which consists of the function name followed by a list of
-@dfn{arguments} in parentheses. The arguments are expressions which
-give the raw materials for the calculation that the function will do.
-When there is more than one argument, they are separated by commas. If
-there are no arguments, write just @samp{()} after the function name.
-
-@strong{Do not put any space between the function name and the
-open--parenthesis!} A user--defined function name looks just like the name of
-a variable, and space would make the expression look like concatenation
-of a variable with an expression inside parentheses. Space before the
-parenthesis is harmless with built--in functions, but it is best not to get
-into the habit of using space, lest you do likewise for a user--defined
-function one day by mistake.
-
-Each function needs a particular number of arguments. For example, the
-@code{sqrt} function must be called with a single argument, like this:
-
-@example
-sqrt(@var{argument})
-@end example
-
-@noindent
-The argument is the number to take the square root of.
-
-Some of the built--in functions allow you to omit the final argument.
-If you do so, they will use a reasonable default. @xref{Built-in},
-for full details. If arguments are omitted in calls to user--defined
-functions, then those arguments are treated as local variables,
-initialized to the null string (@pxref{User-defined}).
-
-Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of @code{sqrt(@var{argument})} is the square root of the
-argument. A function can also have side effects, such as assigning the
-values of certain variables or doing I/O.
-
-Here is a command to read numbers, one number per line, and print the
-square root of each one:
-
-@example
-awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
-@end example
-
-@node Statements, Arrays, Expressions, Top
-@chapter Actions: Statements
-@cindex Statements
-
-@dfn{Control statements} such as @code{if}, @code{while}, and so on
-control the flow of execution in @code{awk} programs. Most of the
-control statements in @code{awk} are patterned on similar statements in
-C.
-
-The simplest kind of statement is an expression. The other kinds of
-statements start with special keywords such as @code{if} and
-@code{while}, to distinguish them from simple expressions.
-
-In all the examples in this chapter, @var{body} can be either a single
-statement or a group of statements. Groups of statements are enclosed
-in braces, and separated by newlines or semicolons.@refill
-
-@menu
-* Expressions:: One kind of statement simply computes an expression.
-
-* If:: Conditionally execute some @code{awk} statements.
-
-* While:: Loop until some condition is satisfied.
-
-* Do:: Do specified action while looping until some
- condition is satisfied.
-
-* For:: Another looping statement, that provides
- initialization and increment clauses.
-
-* Break:: Immediately exit the innermost enclosing loop.
-
-* Continue:: Skip to the end of the innermost enclosing loop.
-
-* Next:: Stop processing the current input record.
-
-* Exit:: Stop execution of @code{awk}.
-@end menu
-
-@node If, While, , Statements
-@section The @code{if} Statement
-
-@cindex @code{if} statement
-The @code{if}-@code{else} statement is @code{awk}'s decision--making
-statement. The @code{else} part of the statement is optional.@refill
-
-@display
-@code{if (@var{condition}) @var{body1} else @var{body2}}
-@end display
-
-@noindent
-Here @var{condition} is an expression that controls what the rest of the
-statement will do. If @var{condition} is true, @var{body1} is executed;
-otherwise, @var{body2} is executed (assuming that the @code{else} clause
-is present). The condition is considered true if it is nonzero or
-nonnull.
-
-Here is an example:
-
-@example
-awk '@{ if (x % 2 == 0)
- print "x is even"
- else
- print "x is odd" @}'
-@end example
-
-In this example, if the statement containing @code{x} is found to be true
-(that is, x is divisible by 2), then the first @code{print} statement is
-executed, otherwise the second @code{print} statement is performed.@refill
-
-If the @code{else} appears on the same line as @var{body1}, and @var{body1}
-is a single statement, then a semicolon must separate @var{body1} from
-@code{else}. To illustrate this, let's rewrite the previous example:
-
-@group
-@example
-awk '@{ if (x % 2 == 0) print "x is even"; else
- print "x is odd" @}'
-@end example
-@end group
-
-@noindent
-If you forget the @samp{;}, @code{awk} won't be able to parse it, and
-you will get a syntax error.
-
-We would not actually write this example this way, because a human
-reader might fail to see the @code{else} if it were not the first thing
-on its line.
-
-@node While, Do, If, Statements
-@section The @code{while} Statement
-@cindex @code{while} statement
-@cindex Loop
-@cindex Body of a loop
-
-In programming, a loop means a part of a program that is (or at least can
-be) executed two or more times in succession.
-
-The @code{while} statement is the simplest looping statement in
-@code{awk}. It repeatedly executes a statement as long as a condition is
-true. It looks like this:
-
-@example
-while (@var{condition})
- @var{body}
-@end example
-
-@noindent
-Here @var{body} is a statement that we call the @dfn{body} of the loop,
-and @var{condition} is an expression that controls how long the loop
-keeps running.
-
-The first thing the @code{while} statement does is test @var{condition}.
-If @var{condition} is true, it executes the statement @var{body}. After
-@var{body} has been executed, @var{condition} is tested again and this
-process is repeated until @var{condition} is no longer true. If
-@var{condition} is initially false, the body of the loop is never
-executed.@refill
-
-@example
-awk '@{ i = 1
- while (i <= 3) @{
- print $i
- i++
- @}
-@}'
-@end example
-
-@noindent
-This example prints the first three input fields, one per line.
-
-The loop works like this: first, the value of @code{i} is set to 1.
-Then, the @code{while} tests whether @code{i} is less than or equal to
-three. This is the case when @code{i} equals one, so the @code{i}-th
-field is printed. Then the @code{i++} increments the value of @code{i}
-and the loop repeats.
-
-When @code{i} reaches 4, the loop exits. Here @var{body} is a compound
-statement enclosed in braces. As you can see, a newline is not required
-between the condition and the body; but using one makes the program clearer
-unless the body is a compound statement or is very simple.
-
-@node Do, For, While, Statements
-@section The @code{do}--@code{while} Statement
-
-The @code{do} loop is a variation of the @code{while} looping statement.
-The @code{do} loop executes the @var{body} once, then repeats @var{body}
-as long as @var{condition} is true. It looks like this:
-
-@group
-@example
-do
- @var{body}
-while (@var{condition})
-@end example
-@end group
-
-Even if @var{condition} is false at the start, @var{body} is executed at
-least once (and only once, unless executing @var{body} makes
-@var{condition} true). Contrast this with the corresponding
-@code{while} statement:
-
-@example
-while (@var{condition})
- @var{body}
-@end example
-
-@noindent
-This statement will not execute @var{body} even once if @var{condition}
-is false to begin with.
-
-Here is an example of a @code{do} statement:
-
-@example
-awk '@{ i = 1
- do @{
- print $0
- i++
- @} while (i <= 10)
-@}'
-@end example
-
-@noindent
-prints each input record ten times. It isn't a very
-realistic example, since in this case an ordinary @code{while} would do
-just as well. But this is normal; there is only occasionally a real
-use for a @code{do} statement.@refill
-
-@node For, Break, Do, Statements
-@section The @code{for} Statement
-@cindex @code{for} statement
-
-The @code{for} statement makes it more convenient to count iterations of a
-loop. The general form of the @code{for} statement looks like this:@refill
-
-@example
-for (@var{initialization}; @var{condition}; @var{increment})
- @var{body}
-@end example
-
-@noindent
-This statement starts by executing @var{initialization}. Then, as long
-as @var{condition} is true, it repeatedly executes @var{body} and then
-@var{increment}. Typically @var{initialization} sets a variable to
-either zero or one, @var{increment} adds 1 to it, and @var{condition}
-compares it against the desired number of iterations.
-
-Here is an example of a @code{for} statement:
-
-@example
-awk '@{ for (i = 1; i <= 3; i++)
- print $i
-@}'
-@end example
-
-@noindent
-This prints the first three fields of each input record, one field per
-line.
-
-In the @code{for} statement, @var{body} stands for any statement, but
-@var{initialization}, @var{condition} and @var{increment} are just
-expressions. You cannot set more than one variable in the
-@var{initialization} part unless you use a multiple assignment statement
-such as @code{x = y = 0}, which is possible only if all the initial values
-are equal. (But you can initialize additional variables by writing
-their assignments as separate statements preceding the @code{for} loop.)
-
-The same is true of the @var{increment} part; to increment additional
-variables, you must write separate statements at the end of the loop.
-The C compound expression, using C's comma operator, would be useful in
-this context, but it is not supported in @code{awk}.
-
-Most often, @var{increment} is an increment expression, as in the
-example above. But this is not required; it can be any expression
-whatever. For example, this statement prints odd numbers from 1 to 100:
-
-@example
-# print odd numbers from 1 to 100
-for (i = 1; i <= 100; i += 2)
- print i
-@end example
-
-Any of the three expressions following @code{for} may be omitted if you
-don't want it to do anything. Thus, @w{@samp{for (;x > 0;)}} is equivalent
-to @w{@samp{while (x > 0)}}.
-If the @var{condition} part is empty, it is treated as @var{true},
-effectively yielding an infinite loop.@refill
-
-In most cases, a @code{for} loop is an abbreviation for a @code{while}
-loop, as shown here:
-
-@example
-@var{initialization}
-while (@var{condition}) @{
- @var{body}
- @var{increment}
-@}
-@end example
-
-@noindent
-(The only exception is when the @code{continue} statement
-(@pxref{Continue}) is used inside the loop; changing a @code{for} statement
-to a @code{while} statement in this way can change the effect of the
-@code{continue} statement inside the loop.)@refill
-
-The @code{awk} language has a @code{for} statement in addition to a
-@code{while} statement because often a @code{for} loop is both less work to
-type and more natural to think of. Counting the number of iterations is
-very common in loops. It can be easier to think of this counting as part
-of looping rather than as something to do inside the loop.
-
-The next section has more complicated examples of @code{for} loops.
-
-There is an alternate version of the @code{for} loop, for iterating over
-all the indices of an array:
-
-@example
-for (i in array)
- @var{process} array[i]
-@end example
-
-@noindent
-@xref{Arrays}, for more information on this version of the @code{for} loop.
-
-@node Break, Continue, For, Statements
-@section The @code{break} Statement
-@cindex @code{break} statement
-@cindex Loops, breaking out of
-
-The @code{break} statement jumps out of the innermost @code{for}, @code{while},
-or @code{do}--@code{while} loop that encloses it.
-The following example finds the
-smallest divisor of any number, and also identifies prime numbers:@refill
-
-@example
-awk '# find smallest divisor of num
- @{ num = $1
- for (div = 2; div*div <= num; div++)
- if (num % div == 0)
- break
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num @}'
-@end example
-
-When the remainder is zero in the first @code{if} statement, @code{awk}
-immediately @dfn{breaks} out of the containing @code{for} loop. This means
-that @code{awk} proceeds immediately to the statement following the loop
-and continues processing. (This is very different from the @code{exit}
-statement (@pxref{Exit}) which stops the entire @code{awk}
-program.)@refill
-
-Here is another program equivalent to the previous one. It illustrates how
-the @var{condition} of a @code{for} or @code{while} could just as well be
-replaced with a @code{break} inside an @code{if}:
-
-@example
-awk '# find smallest divisor of num
- @{ num = $1
- for (div = 2; ; div++) @{
- if (num % div == 0) @{
- printf "Smallest divisor of %d is %d\n", num, div
- break
- @}
- if (div*div > num) @{
- printf "%d is prime\n", num
- break
- @}
- @}
-@}'
-@end example
-
-@node Continue, Next, Break, Statements
-@section The @code{continue} Statement
-
-@cindex @code{continue} statement
-The @code{continue} statement, like @code{break}, is used only inside
-@code{for}, @code{while}, and @code{do}--@code{while} loops. It skips
-over the rest of the loop body, causing the next cycle around the loop
-to begin immediately. Contrast this with @code{break}, which jumps out
-of the loop altogether. Here is an example:@refill
-
-@example
-# print names that don't contain the string "ignore"
-
-# first, save the text of each line
-@{ names[NR] = $0 @}
-
-# print what we're interested in
-END @{
- for (x in names) @{
- if (names[x] ~ /ignore/)
- continue
- print names[x]
- @}
-@}
-@end example
-
-If any of the input records contain the string @samp{ignore}, this example
-skips the print statement and continues back to the first statement in the
-loop.
-
-This isn't a practical example of @code{continue}, since it would be
-just as easy to write the loop like this:
-
-@example
-for (x in names)
- if (x !~ /ignore/)
- print x
-@end example
-
-The @code{continue} statement causes @code{awk} to skip the rest of what is
-inside a @code{for} loop, but it resumes execution with the increment part
-of the @code{for} loop. The following program illustrates this fact:@refill
-
-@example
-awk 'BEGIN @{
- for (x = 0; x <= 20; x++) @{
- if (x == 5)
- continue
- printf ("%d ", x)
- @}
- print ""
-@}'
-@end example
-
-@noindent
-This program prints all the numbers from 0 to 20, except for 5, for
-which the @code{printf} is skipped. Since the increment @code{x++}
-is not skipped, @code{x} does not remain stuck at 5.
-
-@node Next, Exit, Continue, Statements
-@section The @code{next} Statement
-@cindex @code{next} statement
-
-The @code{next} statement forces @code{awk} to immediately stop processing
-the current record and go on to the next record. This means that no
-further rules are executed for the current record. The rest of the
-current rule's action is not executed either.
-
-Contrast this with the effect of the @code{getline} function
-(@pxref{Getline}). That too causes @code{awk} to read the next record
-immediately, but it does not alter the flow of control in any way. So
-the rest of the current action executes with a new input record.
-
-At the grossest level, @code{awk} program execution is a loop that reads
-an input record and then tests each rule pattern against it. If you
-think of this loop as a @code{for} statement whose body contains the
-rules, then the @code{next} statement is analogous to a @code{continue}
-statement: it skips to the end of the body of the loop, and executes the
-increment (which reads another record).
-
-For example, if your @code{awk} program works only on records with four
-fields, and you don't want it to fail when given bad input, you might use
-the following rule near the beginning of the program:
-
-@example
-NF != 4 @{
- printf ("line %d skipped: doesn't have 4 fields", FNR) > "/dev/tty"
- next
-@}
-@end example
-
-@noindent
-so that the following rules will not see the bad record. The error message
-is redirected to @file{/dev/tty} (the terminal), so that it won't get lost
-amid the rest of the program's regular output.
-
-@node Exit, , Next, Statements
-@section The @code{exit} Statement
-
-@cindex @code{exit} statement
-The @code{exit} statement causes @code{awk} to immediately stop
-executing the current rule and to stop processing input; any remaining input
-is ignored.@refill
-
-If an @code{exit} statement is executed from a @code{BEGIN} rule
-the program stops processing everything immediately.
-No input records will be read. However, if an @code{END} rule is
-present, it will be executed (@pxref{BEGIN/END}).@refill
-
-If @code{exit} is used as part of an @code{END} rule, it causes
-the program to stop immediately.
-
-An @code{exit} statement that is part an ordinary rule (that is, not part
-of a @code{BEGIN} or @code{END} rule) stops the execution of any further
-automatic rules, but the @code{END} rule is executed if there is one.
-If you don't want the @code{END} rule to do its job in this case, you
-can set a variable to nonzero before the @code{exit} statement, and check
-that variable in the @code{END} rule.
-
-If an argument is supplied to @code{exit}, its value is used as the exit
-status code for the @code{awk} process. If no argument is supplied,
-@code{exit} returns status zero (success).@refill
-
-For example, let's say you've discovered an error condition you really
-don't know how to handle. Conventionally, programs report this by
-exiting with a nonzero status. Your @code{awk} program can do this
-using an @code{exit} statement with a nonzero argument. Here's an
-example of this:@refill
-
-@example
-BEGIN @{
- if (("date" | getline date_now) < 0) @{
- print "Can't get system date"
- exit 4
- @}
-@}
-@end example
-
-@node Arrays, Built-in, Statements, Top
-@chapter Actions: Using Arrays in @code{awk}
-
-An @dfn{array} is a table of various values, called @dfn{elements}. The
-elements of an array are distinguished by their @dfn{indices}. Names
-of arrays in @code{awk} are strings of alphanumeric characters and
-underscores, just like regular variables.
-
-You cannot use the same identifier as both a variable and as an array
-name in one @code{awk} program.
-
-@menu
-* Intro: Array Intro. Basic facts abou arrays in @code{awk}.
-* Reference to Elements:: How to examine one element of an array.
-* Assigning Elements:: How to change an element of an array.
-* Example: Array Example. Sample program explained.
-
-* Scanning an Array:: A variation of the @code{for} statement. It loops
- through the indices of an array's existing elements.
-
-* Delete:: The @code{delete} statement removes an element from an array.
-
-* Multi-dimensional:: Emulating multi--dimensional arrays in @code{awk}.
-* Multi-scanning:: Scanning multi--dimensional arrays.
-@end menu
-
-@node Array Intro, Reference to Elements, , Arrays
-@section Introduction to Arrays
-
-@cindex Arrays
-The @code{awk} language has one--dimensional @dfn{arrays} for storing groups
-of related strings or numbers. Each array must have a name; valid array
-names are the same as valid variable names, and they do conflict with
-variable names: you can't have both an array and a variable with the same
-name at any point in an @code{awk} program.
-
-Arrays in @code{awk} superficially resemble arrays in other programming
-languages; but there are fundamental differences. In @code{awk}, you
-don't need to declare the size of an array before you start to use it.
-What's more, in @code{awk} any number or even a string may be used as an
-array index.
-
-In most other languages, you have to @dfn{declare} an array and specify
-how many elements or components it has. In such languages, the
-declaration causes a contiguous block of memory to be allocated for that
-many elements. An index in the array must be a positive integer; for
-example, the index 0 specifies the first element in the array, which is
-actually stored at the beginning of the block of memory. Index 1
-specifies the second element, which is stored in memory right after the
-first element, and so on. It is impossible to add more elements to the
-array, because it has room for only as many elements as you declared.
-(Some languages have arrays whose first index is 1, others require that
-you specify both the first and last index when you declare the array.
-In such a language, an array could be indexed, for example, from -3 to
-17.) A contiguous array of four elements might look like this,
-conceptually, if the element values are 8, @code{"foo"}, @code{""} and
-30:@refill
-
-@example
-+---------+---------+--------+---------+
-| 8 | "foo" | "" | 30 | @r{value}
-+---------+---------+--------+---------+
- 0 1 2 3 @r{index}
-@end example
-
-@noindent
-Only the values are stored; the indices are implicit from the order of
-the values. 8 is the value at index 0, because 8 appears in the
-position with 0 elements before it.
-
-@cindex Arrays, definition of
-@cindex Associative arrays
-Arrays in @code{awk} are different: they are @dfn{associative}. This means
-that each array is a collection of pairs: an index, and its corresponding
-array element value:
-
-@example
-@r{Element} 4 @r{Value} 30
-@r{Element} 2 @r{Value} "foo"
-@r{Element} 1 @r{Value} 8
-@r{Element} 3 @r{Value} ""
-@end example
-
-@noindent
-We have shown the pairs in jumbled order because their order doesn't
-mean anything.
-
-One advantage of an associative array is that new pairs can be added
-at any time. For example, suppose we add to that array a tenth element
-whose value is @w{@code{"number ten"}}. The result is this:
-
-@example
-@r{Element} 10 @r{Value} "number ten"
-@r{Element} 4 @r{Value} 30
-@r{Element} 2 @r{Value} "foo"
-@r{Element} 1 @r{Value} 8
-@r{Element} 3 @r{Value} ""
-@end example
-
-@noindent
-Now the array is @dfn{sparse} (i.e. some indices are missing): it has
-elements number 4 and 10, but doesn't have an element 5, 6, 7, 8, or
-9.@refill
-
-Another consequence of associative arrays is that the indices don't
-have to be positive integers. Any number, or even a string, can be
-an index. For example, here is an array which translates words from
-English into French:
-
-@example
-@r{Element} "dog" @r{Value} "chien"
-@r{Element} "cat" @r{Value} "chat"
-@r{Element} "one" @r{Value} "un"
-@r{Element} 1 @r{Value} "un"
-@end example
-
-@noindent
-Here we decided to translate the number 1 in both spelled--out and
-numeral form---thus illustrating that a single array can have both
-numbers and strings as indices.
-
-When @code{awk} creates an array for you, e.g. with the @code{split}
-built--in function (@pxref{String Functions}), that array's indices
-start at the number one.
-
-@node Reference to Elements, Assigning Elements, Array Intro, Arrays
-@section Referring to an Array Element
-@cindex Array reference
-@cindex Element of array
-@cindex Reference to array
-
-The principal way of using an array is to refer to one of its elements.
-An array reference is an expression which looks like this:
-
-@example
-@var{array}[@var{index}]
-@end example
-
-@noindent
-Here @var{array} is the name of an array. The expression @var{index} is
-the index of the element of the array that you want. The value of the
-array reference is the current value of that array element.
-
-For example, @samp{foo[4.3]} is an expression for the element of array
-@code{foo} at index 4.3.
-
-If you refer to an array element that has no recorded value, the value
-of the reference is @code{""}, the null string. This includes elements
-to which you have not assigned any value, and elements that have been
-deleted (@pxref{Delete}). Such a reference automatically creates that
-array element, with the null string as its value. (In some cases,
-this is unfortunate, because it might waste memory inside @code{awk}).
-
-@cindex Arrays, determining presence of elements
-You can find out if an element exists in an array at a certain index with
-the expression:
-
-@example
-@var{index} in @var{array}
-@end example
-
-@noindent
-This expression tests whether or not the particular index exists,
-without the side effect of creating that element if it is not present.
-The expression has the value 1 (true) if
-@code{@var{array}[@var{subscript}]} exists, and 0 (false) if it does not
-exist.@refill
-
-For example, to find out whether the array @code{frequencies} contains the
-subscript @code{"2"}, you would ask:@refill
-
-@example
-if ("2" in frequencies) print "Subscript \"2\" is present."
-@end example
-
-Note that this is @emph{not} a test of whether or not the array
-@code{frequencies} contains an element whose @emph{value} is @code{"2"}.
-(There is no way to that except to scan all the elements.) Also, this
-@emph{does not} create @code{frequencies["2"]}, while the following
-(incorrect) alternative would:@refill
-
-@example
-if (frequencies["2"] != "") print "Subscript \"2\" is present."
-@end example
-
-@node Assigning Elements, Array Example, Reference to Elements, Arrays
-@section Assigning Array Elements
-@cindex Array assignment
-@cindex Element assignment
-
-Array elements are lvalues: they can be assigned values just like
-@code{awk} variables:
-
-@example
-@var{array}[@var{subscript}] = @var{value}
-@end example
-
-@noindent
-Here @var{array} is the name of your array. The expression
-@var{subscript} is the index of the element of the array that you want
-to assign a value. The expression @var{value} is the value you are
-assigning to that element of the array.@refill
-
-@node Array Example, Scanning an Array, Assigning Elements, Arrays
-@section Basic Example of an Array
-
-The following program takes a list of lines, each beginning with a line
-number, and prints them out in order of line number. The line numbers are
-not in order, however, when they are first read: they are scrambled. This
-program sorts the lines by making an array using the line numbers as
-subscripts. It then prints out the lines in sorted order of their numbers.
-It is a very simple program, and will get confused if it encounters repeated
-numbers, gaps, or lines that don't begin with a number.@refill
-
-@example
-BEGIN @{
- max=0
-@}
-
-@{
- if ($1 > max)
- max = $1
- arr[$1] = $0
-@}
-
-END @{
- for (x = 1; x <= max; x++)
- print arr[x]
-@}
-@end example
-
-The first rule just initializes the variable @code{max}. (This is not
-strictly necessary, since an uninitialized variable has the null string
-as its value, and the null string is effectively zero when used in
-a context where a number is required.)
-
-The second rule keeps track of the largest line number seen so far;
-it also stores each line into the array @code{arr}, at an index that
-is the line's number.
-
-The third rule runs after all the input has been read, to print out
-all the lines.
-
-When this program is run with the following input:
-
-@example
-5 I am the Five man
-2 Who are you? The new number two!
-4 . . . And four on the floor
-1 Who is number one?
-3 I three you.
-@end example
-
-@noindent
-its output is this:
-
-@example
-1 Who is number one?
-2 Who are you? The new number two!
-3 I three you.
-4 . . . And four on the floor
-5 I am the Five man
-@end example
-
-@node Scanning an Array, Delete, Array Example, Arrays
-@section Scanning All Elements of an Array
-@cindex @code{for (x in @dots{})}
-@cindex Arrays, special @code{for} statement
-@cindex Scanning an array
-
-In programs that use arrays, often you need a loop that will execute
-once for each element of an array. In other languages, where arrays are
-contiguous and indices are limited to positive integers, this is
-easy: the largest index is one less than the length of the array, and you can
-find all the valid indices by counting from zero up to that value. This
-technique won't do the job in @code{awk}, since any number or string
-may be an array index. So @code{awk} has a special kind of @code{for}
-statement for scanning an array:
-
-@example
-for (@var{var} in @var{array})
- @var{body}
-@end example
-
-@noindent
-This loop executes @var{body} once for each different value that your
-program has previously used as an index in @var{array}, with the
-variable @var{var} set to that index.@refill
-
-Here is a program that uses this form of the @code{for} statement. The
-first rule scans the input records and notes which words appear (at
-least once) in the input, by storing a 1 into the array @code{used} with
-the word as index. The second rule scans the elements of @code{used} to
-find all the distinct words that appear in the input. It prints each
-word that is more than 10 characters long, and also prints the number of
-such words. @xref{Built-in}, for more information on the built--in
-function @code{length}.
-
-@example
-# Record a 1 for each word that is used at least once.
-@{
- for (i = 0; i < NF; i++)
- used[$i] = 1
-@}
-
-# Find number of distinct words more than 10 characters long.
-END @{
- num_long_words = 0
- for (x in used)
- if (length(x) > 10) @{
- ++num_long_words
- print x
- @}
- print num_long_words, "words longer than 10 characters"
-@}
-@end example
-
-@noindent
-@xref{Sample Program}, for a more detailed example of this type.
-
-The order in which elements of the array are accessed by this statement
-is determined by the internal arrangement of the array elements within
-@code{awk} and cannot be controlled or changed. This can lead to
-problems if new elements are added to @var{array} by statements in
-@var{body}; you cannot predict whether or not the @code{for} loop will
-reach them. Similarly, changing @var{var} inside the loop can produce
-strange results. It is best to avoid such things.@refill
-
-@node Delete, Multi-dimensional, Scanning an Array, Arrays
-@section The @code{delete} Statement
-@cindex @code{delete} statement
-@cindex Deleting elements of arrays
-@cindex Removing elements of arrays
-@cindex Arrays, deleting an element
-
-You can remove an individual element of an array using the @code{delete}
-statement:
-
-@example
-delete @var{array}[@var{index}]
-@end example
-
-When an array element is deleted, it is as if you had never referred to it
-and had never given it any value. Any value the element formerly had
-can no longer be obtained.
-
-Here is an example of deleting elements in an array:
-
-@example
-awk '@{ for (i in frequencies)
- delete frequencies[i]
-@}'
-@end example
-
-@noindent
-This example removes all the elements from the array @code{frequencies}.
-
-If you delete an element, the @code{for} statement to scan the array
-will not report that element, and the @code{in} operator to check for
-the presence of that element will return 0:
-
-@example
-delete foo[4]
-if (4 in foo)
- print "This will never be printed"
-@end example
-
-@node Multi-dimensional, Multi-scanning, Delete, Arrays
-@section Multi--dimensional arrays
-
-@cindex Subscripts, multi-dimensional in arrays
-@cindex Arrays, multi-dimensional subscripts
-A multi--dimensional array is an array in which an element is identified
-by a sequence of indices, not a single index. For example, a
-two--dimensional array requires two indices. The usual way (in most
-languages, including @code{awk}) to refer to an element of a
-two--dimensional array named @code{grid} is with @code{grid[x,y]}.
-
-@vindex SUBSEP
-Multi--dimensional arrays are supported in @code{awk} through
-concatenation of indices into one string. What happens is that
-@code{awk} converts the indices into strings (@pxref{Conversion}) and
-concatenates them together, with a separator between them. This creates
-a single string that describes the values of the separate indices. The
-combined string is used as a single index into an ordinary,
-one--dimensional array. The separator used is the value of the special
-variable @code{SUBSEP}.
-
-For example, suppose the value of @code{SUBSEP} is @code{","} and the
-expression @samp{foo[5,12]="value"} is executed. The numbers 5 and 12
-will be concatenated with a comma between them, yielding @code{"5,12"};
-thus, the array element @code{foo["5,12"]} will be set to
-@code{"value"}.
-
-Once the element's value is stored, @code{awk} has no record of whether
-it was stored with a single index or a sequence of indices. The two
-expressions @code{foo[5,12]} and @w{@code{foo[5 SUBSEP 12]}} always have
-the same value.
-
-The default value of @code{SUBSEP} is not a comma; it is the string
-@code{"\034"}, which contains a nonprinting character that is unlikely
-to appear in an @code{awk} program or in the input data.
-
-The usefulness of choosing an unlikely character comes from the fact
-that index values that contain a string matching @code{SUBSEP} lead to
-combined strings that are ambiguous. Suppose that @code{SUBSEP} is a
-comma; then @w{@code{foo["a,b", "c"]}} and @w{@code{foo["a", "b,c"]}} will be
-indistinguishable because both are actually stored as
-@code{foo["a,b,c"]}. Because @code{SUBSEP} is @code{"\034"}, such
-confusion can actually happen only when an index contains the character
-@code{"\034"}, which is a rare event.
-
-You can test whether a particular index--sequence exists in a
-``multi--dimensional'' array with the same operator @code{in} used for single
-dimensional arrays. Instead of a single index as the left--hand operand,
-write the whole sequence of indices, separated by commas, in
-parentheses:@refill
-
-@example
-(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
-@end example
-
-The following example treats its input as a two--dimensional array of
-fields; it rotates this array 90 degrees clockwise and prints the
-result. It assumes that all lines have the same number of
-elements.
-
-@example
-awk 'BEGIN @{
- max_nf = max_nr = 0
-@}
-
-@{
- if (max_nf < NF)
- max_nf = NF
- max_nr = NR
- for (x = 1; x <= NF; x++)
- vector[x, NR] = $x
-@}
-
-END @{
- for (x = 1; x <= max_nf; x++) @{
- for (y = max_nr; y >= 1; --y)
- printf("%s ", vector[x, y])
- printf("\n")
- @}
-@}'
-@end example
-
-@noindent
-When given the input:
-
-@example
-1 2 3 4 5 6
-2 3 4 5 6 1
-3 4 5 6 1 2
-4 5 6 1 2 3
-@end example
-
-@noindent
-it produces:
-
-@example
-4 3 2 1
-5 4 3 2
-6 5 4 3
-1 6 5 4
-2 1 6 5
-3 2 1 6
-@end example
-
-@node Multi-scanning, , Multi-dimensional, Arrays
-@section Scanning Multi--dimensional Arrays
-
-There is no special @code{for} statement for scanning a
-``multi--dimensional'' array; there cannot be one, because in truth there
-are no multi--dimensional arrays or elements; there is only a
-multi--dimensional @emph{way of accessing} an array.
-
-However, if your program has an array that is always accessed as
-multi--dimensional, you can get the effect of scanning it by combining
-the scanning @code{for} statement (@pxref{Scanning an Array}) with the
-@code{split} built--in function (@pxref{String Functions}). It works
-like this:
-
-@example
-for (combined in @var{array}) @{
- split (combined, separate, SUBSEP)
- @dots{}
-@}
-@end example
-
-@noindent
-This finds each concatenated, combined index in the array, and splits it
-into the individual indices by breaking it apart where the value of
-@code{SUBSEP} appears. The split--out indices become the elements of
-the array @code{separate}.
-
-Thus, suppose you have previously stored in @code{@var{array}[1,
-"foo"]}; then an element with index @code{"1\034foo"} exists in
-@var{array}. (Recall that the default value of @code{SUBSEP} contains
-the character with code 034.) Sooner or later the @code{for} statement
-will find that index and do an iteration with @code{combined} set to
-@code{"1\034foo"}. Then the @code{split} function will be called as
-follows:
-
-@example
-split ("1\034foo", separate, "\034")
-@end example
-
-@noindent
-The result of this is to set @code{separate[1]} to 1 and @code{separate[2]}
-to @code{"foo"}. Presto, the original sequence of separate indices has
-been recovered.
-
-@node Built-in, User-defined, Arrays, Top
-@chapter Built--in functions
-
-@cindex Built-in functions, list of
-@dfn{Built--in} functions are functions always available for your
-@code{awk} program to call. This chapter defines all the built--in
-functions that exist; some of them are mentioned in other sections, but
-they are summarized here for your convenience. (You can also define
-new functions yourself. @xref{User-defined}.)
-
-In most cases, any extra arguments given to built--in functions are ignored.
-The defaults for omitted arguments vary from function to function and are
-described under the individual functions.
-
-The name of a built--in function need not be followed immediately by
-the opening left parenthesis of the arguments; whitespace is allowed.
-However, it is wise to write no space there, since user--defined
-functions do not allow space.
-
-When a function is called, expressions that create the function's actual
-parameters are evaluated completely before the function call is performed.
-For example, in the code fragment:
-
-@example
-i = 4
-j = myfunc(i++)
-@end example
-
-@noindent
-the variable @code{i} will be set to 5 before @code{myfunc} is called
-with a value of 4 for its actual parameter.
-
-@menu
-* Numeric Functions:: Functions that work with numbers,
- including @code{int}, @code{sin} and @code{rand}.
-
-* String Functions:: Functions for string manipulation,
- such as @code{split}, @code{match}, and @code{sprintf}.
-
-* I/O Functions:: Functions for files and shell commands
-@end menu
-
-@node Numeric Functions, String Functions, , Built-in
-@section Numeric Built--in Functions
-
-The general syntax of the numeric built--in functions is the same for
-each. Here is an example of that syntax:@refill
-
-@example
-awk '# Read input records containing a pair of points: x0, y0, x1, y1.
- # Print the points and the distance between them.
- @{ printf "%f %f %f %f %f\n", $1, $2, $3, $4,
- sqrt(($2-$1) * ($2-$1) + ($4-$3) * ($4-$3)) @}'
-@end example
-
-@noindent
-This calculates the square root of a calculation that uses the values
-of the fields. It then prints the first four fields of the input
-record and the result of the square root calculation.
-
-Here is the full list of numeric built--in functions:
-
-@table @code
-@item int(@var{x})
-This gives you the integer part of @var{x}, truncated toward 0. This
-produces the nearest integer to @var{x}, located between @var{x} and 0.
-
-For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
-is -3, and @code{int(-3)} is -3 as well.@refill
-
-@item sqrt(@var{x})
-This gives you the positive square root of @var{x}. It reports an error
-if @var{x} is negative.@refill
-
-@item exp(@var{x})
-This gives you the exponential of @var{x}, or reports an error if @var{x} is
-out of range. The range of values @var{x} can have depends on your
-machine's floating point representation.@refill
-
-@item log(@var{x})
-This gives you the natural logarithm of @var{x}, if @var{x} is positive;
-otherwise, it reports an error.@refill
-
-@item sin(@var{x})
-This gives you the sine of @var{x}, with @var{x} in radians.
-
-@item cos(@var{x})
-This gives you the cosine of @var{x}, with @var{x} in radians.
-
-@item atan2(@var{y}, @var{x})
-This gives you the arctangent of @var{y/x}, with both in radians.
-
-@item rand()
-This gives you a random number. The values of @w{@code{rand()}} are
-uniformly--distributed between 0 and 1. The value is never 0 and never
-1.
-
-Often you want random integers instead. Here is a user--defined function
-you can use to obtain a random nonnegative integer less than @var{n}:
-
-@example
-function randint(n) @{
- return int(n * rand())
-@}
-@end example
-
-@noindent
-The multiplication produces a random real number at least 0, and less
-than @var{n}. We then make it an integer (using @code{int}) between 0
-and @code{@var{n}@minus{}1}.
-
-Here is an example where a similar function is used to produce
-random integers between 1 and @var{n}:
-
-@example
-awk '
-# Function to roll a simulated die.
-function roll(n) @{ return 1 + int(rand() * n) @}
-
-# Roll 3 six--sided dice and print total number of points.
-@{
- printf("%d points\n", roll(6)+roll(6)+roll(6))
-@}'
-@end example
-
-@emph{Note} that @w{@code{rand()}} starts generating numbers from the same
-point, or @dfn{seed}, each time you run @code{awk}. This means that
-the same program will produce the same results each time you run it.
-The numbers are random within one @code{awk} run, but predictable
-from run to run. This is convenient for debugging, but if you want
-a program to do different things each time it is used, you must change
-the seed to a value that will be different in each run. To do this,
-use @code{srand}.
-
-@item srand(@var{x})
-The function @code{srand(@var{x})} sets the starting point, or @dfn{seed},
-for generating random numbers to the value @var{x}.
-
-Each seed value leads to a particular sequence of ``random'' numbers.
-Thus, if you set the seed to the same value a second time, you will get
-the same sequence of ``random'' numbers again.
-
-If you omit the argument @var{x}, as in @code{srand()}, then the current
-date and time of day are used for a seed. This is the way to get random
-numbers that are truly unpredictable.
-
-The return value of @code{srand()} is the previous seed. This makes it
-easy to keep track of the seeds for use in consistently reproducing
-sequences of random numbers.
-@end table
-
-@node String Functions, I/O Functions, Numeric Functions, Built-in
-@section Built--in Functions for String Manipulation
-
-@table @code
-@item index(@var{in}, @var{find})
-@findex match
-This searches the string @var{in} for the first occurrence of the string
-@var{find}, and returns the position where that occurrence begins in the
-string @var{in}. For example:@refill
-
-@example
-awk 'BEGIN @{ print index("peanut", "an") @}'
-@end example
-
-@noindent
-prints @samp{3}. If @var{find} is not found, @code{index} returns 0.
-
-@item length(@var{string})
-@findex length
-This gives you the number of characters in @var{string}. If
-@var{string} is a number, the length of the digit string representing
-that number is returned. For example, @code{length("abcde")} is 5.
-Whereas, @code{length(15 * 35)} works out to 3. How? Well, 15 * 35 =
-525, and 525 is then converted to the string @samp{"525"}, which has
-three characters.
-
-@item match(@var{string}, @var{regexp})
-@findex match
-The @code{match} function searches the string, @var{string}, for the
-longest, leftmost substring matched by the regular expression,
-@var{regexp}. It returns the character position, or @dfn{index}, of
-where that substring begins (1, if it starts at the beginning of
-@var{string}). If no match if found, it returns 0.
-
-@vindex RSTART
-@vindex RLENGTH
-The @code{match} function sets the special variable @code{RSTART} to
-the index. It also sets the special variable @code{RLENGTH} to the
-length of the matched substring. If no match is found, @code{RSTART}
-is set to 0, and @code{RLENGTH} to -1.
-
-For example:
-
-@example
-awk '@{
- if ($1 == "FIND")
- regex = $2
- else @{
- where = match($0, regex)
- if (where)
- print "Match of", regex, "found at", where, "in", $0
- @}
-@}'
-@end example
-
-@noindent
-This program looks for lines that match the regular expression stored in
-the variable @code{regex}. This regular expression can be changed. If the
-first word on a line is @samp{FIND}, @code{regex} is changed to be the
-second word on that line. Therefore, given:
-
-@example
-FIND fo*bar
-My program was a foobar
-But none of it would doobar
-FIND Melvin
-JF+KM
-This line is property of The Reality Engineering Co.
-This file was created by Melvin.
-@end example
-
-@noindent
-@code{awk} prints:
-
-@example
-Match of fo*bar found at 18 in My program was a foobar
-Match of Melvin found at 26 in This file was created by Melvin.
-@end example
-
-@item split(@var{string}, @var{array}, @var{field_separator})
-@findex split
-This divides @var{string} up into pieces separated by
-@var{field_separator}, and stores the pieces in @var{array}. The
-first piece is stored in @code{@var{array}[1]}, the second piece in
-@code{@var{array}[2]}, and so forth. The string value of the third
-argument, @var{field_separator}, is used as a regexp to search for to
-find the places to split @var{string}. If the @var{field_separator}
-is omitted, the value of @code{FS} is used. @code{split} returns the
-number of elements created.@refill
-
-The @code{split} function, then, splits strings into pieces in a
-manner similar to the way input lines are split into fields. For example:
-
-@example
-split("auto-da-fe", a, "-")
-@end example
-
-@noindent
-splits the string @samp{auto-da-fe} into three fields using @samp{-} as the
-separator. It sets the contents of the array @code{a} as follows:
-
-@example
-a[1] = "auto"
-a[2] = "da"
-a[3] = "fe"
-@end example
-
-@noindent
-The value returned by this call to @code{split} is 3.
-
-@item sprintf(@var{format}, @var{expression1},@dots{})
-@findex sprintf
-This returns (without printing) the string that @code{printf} would
-have printed out with the same arguments (@pxref{Printf}). For
-example:
-
-@example
-sprintf("pi = %.2f (approx.)", 22/7)
-@end example
-
-@noindent
-returns the string @w{@code{"pi = 3.14 (approx.)"}}.
-
-@item sub(@var{regexp}, @var{replacement_string}, @var{target_variable})
-@findex sub
-The @code{sub} function alters the value of @var{target_variable}.
-It searches this value, which should be a string, for the
-leftmost substring matched by the regular expression, @var{regexp},
-extending this match as far as possible. Then the entire string is
-changed by replacing the matched text with @var{replacement_string}.
-The modified string becomes the new value of @var{target_variable}.
-
-This function is peculiar because @var{target_variable} is not simply
-used to compute a value, and not just any expression will do: it
-must be a variable, field or array reference, so that @code{sub} can
-store a modified value there. If this argument is omitted, then the
-default is to use and alter @code{$0}.
-
-For example:@refill
-
-@example
-str = "water, water, everywhere"
-sub(/at/, "ith", str)
-@end example
-
-@noindent
-sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
-leftmost, longest occurrence of @samp{at} with @samp{ith}.
-
-The @code{sub} function returns the number of substitutions made (either
-one or zero).
-
-The special character, @samp{&}, in the replacement string,
-@var{replacement_string}, stands for the precise substring that was
-matched by @var{regexp}. (If the regexp can match more than one string,
-then this precise substring may vary.) For example:@refill
-
-@example
-awk '@{ sub(/candidate/, "& and his wife"); print @}'
-@end example
-
-@noindent
-will change the first occurrence of ``candidate'' to ``candidate and
-his wife'' on each input line.
-
-@noindent
-The effect of this special character can be turned off by preceding
-it with a backslash (@samp{\&}). To include a backslash in the
-replacement string, it too must be preceded with a (second) backslash.
-
-Note: if you use @code{sub} with a third argument that is not a variable,
-field or array element reference, then it will still search for the pattern
-and return 0 or 1, but the modified string is thrown away because there
-is no place to put it. For example:
-
-@example
-sub(/USA/, "United States", "the USA and Canada")
-@end example
-
-will indeed produce a string @w{@code{"the United States and Canada"}},
-but there will be no way to use that string!
-
-@item gsub(@var{regexp}, @var{replacement_string}, @var{target_variable})
-@findex gsub
-This is similar to the @code{sub} function, except @code{gsub} replaces
-@emph{all} of the longest, leftmost, @emph{non--overlapping} matching
-substrings it can find. The ``g'' in @code{gsub} stands for @dfn{global},
-which means replace @emph{everywhere}. For example:@refill
-
-@example
-awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
-@end example
-
-@noindent
-replaces all occurrences of the string @samp{Britain} with @samp{United
-Kingdom} for all input records.@refill
-
-The @code{gsub} function returns the number of substitutions made. If
-the variable to be searched and altered, @var{target_variable}, is
-omitted, then the entire input record, @code{$0}, is used.@refill
-
-The characters @samp{&} and @samp{\} are special in @code{gsub}
-as they are in @code{sub} (see immediately above).
-
-@item substr(@var{string}, @var{start}, @var{length})
-@findex substr
-This returns a @var{length}--character--long substring of @var{string},
-starting at character number @var{start}. The first character of a
-string is character number one. For example,
-@code{substr("washington", 5, 3)} returns @samp{"ing"}.@refill
-
-If @var{length} is not present, this function returns the whole suffix of
-@var{string} that begins at character number @var{start}. For example,
-@code{substr("washington", 5)} returns @samp{"ington"}.
-@end table
-
-@node I/O Functions, , String Functions, Built-in
-@section Built--in Functions for I/O to Files and Commands
-
-@table @code
-@item close(@var{filename})
-Close the file @var{filename}. The argument may alternatively be
-a shell command that was used for redirecting to or from a pipe; then the
-pipe is closed.
-
-@xref{Close Input}, regarding closing input files and pipes.
-@xref{Close Output}, regarding closing output files and pipes.
-
-@item system(@var{command})
-@findex system
-@cindex Interaction of @code{awk} with other programs
-The system function allows the user to execute operating system commands and
-then return to the @code{awk} program. The @code{system} function executes
-the command given by the string value of @var{command}. It returns, as its
-value, the status returned by the command that was executed. This is known
-as returning the @dfn{exit status}.
-
-For example, if the following fragment of code is put in your @code{awk}
-program:
-
-@example
-END @{
- system("mail -s 'awk run done' operator < /dev/null")
-@}
-@end example
-
-@noindent
-the system operator will be sent mail when the @code{awk} program
-finishes processing input and begins its end--of--input processing.
-
-Note that much the same result can be obtained by redirecting
-@code{print} or @code{printf} into a pipe.
-However, if your @code{awk} program is interactive, this function is
-useful for cranking up large self--contained programs, such as a shell
-or an editor.@refill
-@end table
-
-@node User-defined, Special, Built-in, Top
-@chapter User--defined Functions
-
-@cindex User-defined functions
-@cindex Functions, user-defined
-Complicated @code{awk} programs can often be simplified by defining
-your own functions. User--defined functions can be called just like
-built--in ones (@pxref{Function Calls}), but it is up to you to define
-them---to tell @code{awk} what they should do.
-
-@menu
-* Definition Syntax:: How to write definitions and what they mean.
-* Function Example:: An example function definition and what it does.
-* Function Caveats:: Things to watch out for.
-* Return Statement:: Specifying the value a function returns.
-@end menu
-
-@node Definition Syntax, Function Example, , User-defined
-@section Syntax of Function Definitions
-
-The definition of a function named @var{name} looks like this:
-
-@example
-function @var{name} (@var{parameter-list}) @{
- @var{body-of-function}
-@}
-@end example
-
-A valid function name is like a valid variable name: a sequence of
-letters, digits and underscores, not starting with a digit.
-
-Such function definitions can appear anywhere between the rules
-of the @code{awk} program. The general format of an @code{awk}
-program, then, is now modified to include sequences of rules @emph{and}
-user--defined function definitions.
-
-The function definition need not precede all the uses of the function.
-This is because @code{awk} reads the entire program before starting to
-execute any of it.
-
-The @var{parameter-list} is a list of the function's @dfn{local}
-variable names, separated by commas. Within the body of the function,
-local variables refer to arguments with which the function is called.
-If the function is called with fewer arguments than it has local
-variables, this is not an error; the extra local variables are simply
-set as the null string.
-
-The local variable values hide or @dfn{shadow} any variables of the same
-names used in the rest of the program. The shadowed variables are not
-accessible in the function definition, because there is no way to name
-them while their names have been taken away for the local variables.
-All other variables used in the @code{awk} program can be referenced
-or set normally in the function definition.
-
-The local variables last only as long as the function is executing.
-Once the function finishes, the shadowed variables come back.
-
-The @var{body-of-function} part of the definition is the most important
-part, because this is what says what the function should actually @emph{do}.
-The local variables exist to give the body a way to talk about the arguments.
-
-Functions may be @dfn{recursive}, i.e., they can call themselves, either
-directly, or indirectly (via calling a second function that calls the first
-again).
-
-The keyword @samp{function} may also be written @samp{func}.
-
-@node Function Example, Function Caveats, Definition Syntax, User-defined
-@section Function Definition Example
-
-Here is an example of a user--defined function, called @code{myprint}, that
-takes a number and prints it in a specific format.
-
-@example
-function myprint(num)
-@{
- printf "%6.3g\n", num
-@}
-@end example
-
-@noindent
-To illustrate, let's use the following @code{awk} rule to use, or
-@dfn{call}, our @code{myprint} function:
-
-@example
-$3 > 0 @{ myprint($3) @}'
-@end example
-
-@noindent
-This program prints, in our special format, all the third fields that
-contain a positive number in our input. Therefore, when given:
-
-@example
- 1.2 3.4 5.6 7.8
- 9.10 11.12 13.14 15.16
-17.18 19.20 21.22 23.24
-@end example
-
-@noindent
-this program, using our function to format the results, will print:
-
-@example
- 5.6
- 13.1
- 21.2
-@end example
-
-Here is a rather contrived example of a recursive function. It prints a
-string backwards:
-
-@example
-function rev (str, len) @{
- if (len == 0) @{
- printf "\n"
- return
- @}
- printf "%c", substr(str, len, 1)
- rev(str, len - 1)
-@}
-@end example
-
-@node Function Caveats, Return Statement, Function Example, User-defined
-@section Caveats of Function Calling
-
-@emph{Note} that there cannot be any blanks between the function name and
-the left parenthesis of the argument list, when calling a function.
-This is so @code{awk} can tell you are not trying to concatenate the value
-of a variable with the value of an expression inside the parentheses.
-
-When a function is called, it is given a @emph{copy} of the values of
-its arguments. This is called @dfn{passing by value}. The caller may
-use a variable as the expression for the argument, but the called
-function does not know this: all it knows is what value the argument
-had. For example, if you write this code:
-
-@example
-foo = "bar"
-z = myfunc(foo)
-@end example
-
-@noindent
-then you should not think of the argument to @code{myfunc} as being
-``the variable @code{foo}''. Instead, think of the argument as the
-string value, @code{"bar"}.
-
-If the function @code{myfunc} alters the values of its local variables,
-this has no effect on any other variables. In particular, if @code{myfunc}
-does this:
-
-@example
-function myfunc (win) @{
- print win
- win = "zzz"
- print win
-@}
-@end example
-
-@noindent
-to change its first argument variable @code{win}, this @emph{does not}
-change the value of @code{foo} in the caller. The role of @code{foo} in
-calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
-If @code{win} also exists outside of @code{myfunc}, this definition
-will not change it---that value is shadowed during the execution of
-@code{myfunc} and cannot be seen or changed from there.
-
-However, when arrays are the parameters to functions, they are @emph{not}
-copied. Instead, the array itself is made available for direct manipulation
-by the function. This is usually called @dfn{passing by reference}.
-Changes made to an array parameter inside the body of a function @emph{are}
-visible outside that function. @emph{This can be very dangerous if you don't
-watch what you are doing.} For example:@refill
-
-@example
-function changeit (array, ind, nvalue) @{
- array[ind] = nvalue
-@}
-
-BEGIN @{
- a[1] = 1 ; a[2] = 2 ; a[3] = 3
- changeit(a, 2, "two")
- printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
- @}
-@end example
-
-@noindent
-will print @samp{a[1] = 1, a[2] = two, a[3] = 3}, because the call to
-@code{changeit} stores @code{"two"} in the second element of @code{a}.
-
-@node Return Statement, , Function Caveats, User-defined
-@section The @code{return} statement
-@cindex @code{return} statement
-
-The body of a user--defined function can contain a @code{return} statement.
-This statement returns control to the rest of the @code{awk} program. It
-can also be used to return a value for use in the rest of the @code{awk}
-program. It looks like:@refill
-
-@display
-@code{return @var{expression}}
-@end display
-
-The @var{expression} part is optional. If it is omitted, then the returned
-value is undefined and, therefore, unpredictable.
-
-A @code{return} statement with no value expression is assumed at the end of
-every function definition. So if control reaches the end of the function
-definition, then the function returns an unpredictable value.
-
-Here is an example of a user--defined function that returns a value
-for the largest number among the elements of an array:@refill
-
-@example
-function maxelt (vec, i, ret) @{
- for (i in vec) @{
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- @}
- return ret
-@}
-@end example
-
-@noindent
-You call @code{maxelt} with one argument, an array name. The local
-variables @code{i} and @code{ret} are not intended to be arguments;
-while there is nothing to stop you from passing two or three arguments
-to @code{maxelt}, the results would be strange.
-
-When writing a function definition, it is conventional to separate the
-parameters from the local variables with extra spaces, as shown above
-in the definition of @code{maxelt}.
-
-Here is a program that uses, or calls, our @code{maxelt} function. This
-program loads an array, calls @code{maxelt}, and then reports the maximum
-number in that array:@refill
-
-@example
-awk '
-function maxelt (vec, i, ret) @{
- for (i in vec) @{
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- @}
- return ret
-@}
-
-# Load all fields of each record into nums.
-@{
- for(i = 1; i <= NF; i++)
- nums[NR, i] = $i
-@}
-
-END @{
- print maxelt(nums)
-@}'
-@end example
-
-Given the following input:
-
-@example
- 1 5 23 8 16
-44 3 5 2 8 26
-256 291 1396 2962 100
--6 467 998 1101
-99385 11 0 225
-@end example
-
-@noindent
-our program tells us (predictably) that:
-
-@example
-99385
-@end example
-
-@noindent
-is the largest number in our array.
-
-@node Special, Sample Program , User-defined, Top
-@chapter Special Variables
-
-Most @code{awk} variables are available for you to use for your own
-purposes; they will never change except when your program assigns them, and
-will never affect anything except when your program examines them.
-
-A few variables have special meanings. Some of them @code{awk} examines
-automatically, so that they enable you to tell @code{awk} how to do
-certain things. Others are set automatically by @code{awk}, so that they
-carry information from the internal workings of @code{awk} to your program.
-
-Most of these variables are also documented in the chapters where their
-areas of activity are described.
-
-@menu
-* User-modified:: Special variables that you change to control @code{awk}.
-
-* Auto-set:: Special variables where @code{awk} gives you information.
-@end menu
-
-@node User-modified, Auto-set, , Special
-@section Special Variables That Control @code{awk}
-@cindex Special variables, user modifiable
-
-This is a list of the variables which you can change to control how
-@code{awk} does certain things.
-
-@table @code
-@c it's unadvisable to have multiple index entries for the same name
-@c since in Info there is no way to distinguish the two.
-@c @vindex FS
-@item FS
-@code{FS} is the input field separator (@pxref{Field Separators}).
-The value is a regular expression that matches the separations
-between fields in an input record.
-
-The default value is @w{@code{" "}}, a string consisting of a single
-space. As a special exception, this value actually means that any
-sequence of spaces and tabs is a single separator. It also causes
-spaces and tabs at the beginning or end of a line to be ignored.
-
-You can set the value of @code{FS} on the command line using the
-@samp{-F} option:
-
-@example
-awk -F, '@var{program}' @var{input-files}
-@end example
-
-@item OFMT
-@c @vindex OFMT
-This string is used by @code{awk} to control conversion of numbers to
-strings (@pxref{Conversion}). It works by being passed, in effect, as
-the first argument to the @code{sprintf} function. Its default value
-is @code{"%.6g"}.@refill
-
-@item OFS
-@c @vindex OFS
-This is the output field separator (@pxref{Output Separators}). It is
-output between the fields output by a @code{print} statement. Its
-default value is @w{@code{" "}}, a string consisting of a single space.
-
-@item ORS
-@c @vindex ORS
-This is the output record separator (@pxref{Output Separators}). It
-is output at the end of every @code{print} statement. Its default
-value is the newline character, often represented in @code{awk}
-programs as @samp{\n}.
-
-@item RS
-@c @vindex RS
-This is @code{awk}'s record separator (@pxref{Records}). Its default
-value is a string containing a single newline character, which means
-that an input record consists of a single line of text.@refill
-
-@item SUBSEP
-@c @vindex SUBSEP
-@code{SUBSEP} is a subscript separator (@pxref{Multi-dimensional}). It
-has the default value of @code{"\034"}, and is used to separate the
-parts of the name of a multi--dimensional array. Thus, if you access
-@code{foo[12,3]}, it really accesses @code{foo["12\0343"]}.@refill
-@end table
-
-@node Auto-set, , User-modified, Special
-@section Special Variables That Convey Information to You
-
-This is a list of the variables that are set automatically by @code{awk}
-on certain occasions so as to provide information for your program.
-
-@table @code
-@item ARGC
-@itemx ARGV
-@c @vindex ARGC
-@c @vindex ARGV
-The command--line arguments available to @code{awk} are stored in an
-array called @code{ARGV}. @code{ARGC} is the number of command--line
-arguments present. @code{ARGV} is indexed from zero to @w{@code{ARGC} - 1}.
-For example:
-
-@example
-awk '@{ print ARGV[$1] @}' inventory-shipped BBS-list
-@end example
-
-@noindent
-In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
-contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
-@code{"BBS-list"}. @code{ARGC} is 3, one more than the index of the
-last element in @code{ARGV} since the elements are numbered from zero.
-
-Notice that the @code{awk} program is not treated as an argument. The
-@samp{-f} @file{@var{filename}} option, and the @samp{-F} option,
-are also not treated as arguments for this purpose.
-
-Variable assignments on the command line @emph{are} treated as arguments,
-and do show up in the @code{ARGV} array.
-
-Your program can alter @code{ARGC} the elements of @code{ARGV}. Each
-time @code{awk} reaches the end of an input file, it uses the next
-element of @code{ARGV} as the name of the next input file. By storing a
-different string there, your program can change which files are read.
-You can use @samp{-} to represent the standard input. By storing
-additional elements and incrementing @code{ARGC} you can cause
-additional files to be read.
-
-If you decrease the value of @code{ARGC}, that eliminates input files
-from the end of the list. By recording the old value of @code{ARGC}
-elsewhere, your program can treat the eliminated arguments as
-something other than file names.
-
-To eliminate a file from the middle of the list, store the null string
-(@code{""}) into @code{ARGV} in place of the file's name. As a
-special feature, @code{awk} ignores file names that have been
-replaced with the null string.
-
-@item ENVIRON
-@vindex ENVIRON
-This is an array that contains the values of the environment. The array
-indices are the environment variable names; the values are the values of
-the particular environment variables. For example,
-@code{ENVIRON["HOME"]} might be @file{/u/close}. Changing this array
-does not affect the environment passed on to any programs that
-@code{awk} may spawn via redirection or the @code{system} function.
-(This may not work under operating systems other than MS-DOS, Unix, or
-GNU.)
-
-@item FILENAME
-@c @vindex FILENAME
-This is the name of the file that @code{awk} is currently reading.
-If @code{awk} is reading from the standard input (in other words,
-there are no files listed on the command line),
-@code{FILENAME} is set to @code{"-"}.
-@code{FILENAME} is changed each time a new file is read (@pxref{Reading
-Files}).@refill
-
-@item FNR
-@c @vindex FNR
-@code{FNR} is the current record number in the current file. @code{FNR} is
-incremented each time a new record is read (@pxref{Getline}).
-It is reinitialized to 0 each time a new input file is started.
-
-@item NF
-@c @vindex NF
-@code{NF} is the number of fields in the current input record.
-@code{NF} is set each time a new record is read, when a new field is
-created, or when $0 changes (@pxref{Fields}).@refill
-
-@item NR
-@c @vindex NR
-This is the number of input records @code{awk} has processed since
-the beginning of the program's execution. (@pxref{Records}).
-@code{NR} is set each time a new record is read.@refill
-
-@item RLENGTH
-@c @vindex RLENGTH
-@code{RLENGTH} is the length of the string matched by the @code{match}
-function (@pxref{String Functions}). @code{RLENGTH} is set by
-invoking the @code{match} function. Its value is the length of the
-matched string, or -1 if no match was found.@refill
-
-@item RSTART
-@c @vindex RSTART
-@code{RSTART} is the start of the string matched by the @code{match}
-function (@pxref{String Functions}). @code{RSTART} is set by invoking
-the @code{match} function. Its value is the position of the string where
-the matched string starts, or 0 if no match was found.@refill
-@end table
-
-@node Sample Program, Notes, Special , Top
-@appendix Sample Program
-
-The following example is a complete @code{awk} program, which prints
-the number of occurrences of each word in its input. It illustrates the
-associative nature of @code{awk} arrays by using strings as subscripts. It
-also demonstrates the @code{for @var{x} in @var{array}} construction.
-Finally, it shows how @code{awk} can be used in conjunction with other
-utility programs to do a useful task of some complexity with a minimum of
-effort. Some explanations follow the program listing.@refill
-
-@example
-awk '
-# Print list of word frequencies
-@{
- for (i = 1; i <= NF; i++)
- freq[$i]++
-@}
-
-END @{
- for (word in freq)
- printf "%s\t%d\n", word, freq[word]
-@}'
-@end example
-
-The first thing to notice about this program is that it has two rules. The
-first rule, because it has an empty pattern, is executed on every line of
-the input. It uses @code{awk}'s field--accessing mechanism (@pxref{Fields})
-to pick out the individual words from the line, and the special variable
-@code{NF} (@pxref{Special}) to know how many fields are available.
-
-For each input word, an element of the array @code{freq} is incremented to
-reflect that the word has been seen an additional time.@refill
-
-The second rule, because it has the pattern @code{END}, is not executed
-until the input has been exhausted. It prints out the contents of the
-@code{freq} table that has been built up inside the first action.@refill
-
-Note that this program has several problems that would prevent it from being
-useful by itself on real text files:@refill
-
-@itemize @bullet
-@item
-Words are detected using the @code{awk} convention that fields are
-separated by whitespace and that other characters in the input (except
-newlines) don't have any special meaning to @code{awk}. This means that
-punctuation characters count as part of words.@refill
-
-@item
-The @code{awk} language considers upper and lower case characters to be
-distinct. Therefore, @samp{foo} and @samp{Foo} will not be treated by this
-program as the same word. This is undesirable since in normal text, words
-are capitalized if they begin sentences, and a frequency analyzer should not
-be sensitive to that.@refill
-
-@item
-The output does not come out in any useful order. You're more likely to be
-interested in which words occur most frequently, or having an alphabetized
-table of how frequently each word occurs.@refill
-@end itemize
-
-The way to solve these problems is to use other operating system utilities
-to process the input and output of the @code{awk} script. Suppose the
-script shown above is saved in the file @file{frequency.awk}. Then the
-shell command:@refill
-
-@example
-tr A-Z a-z < file1 | tr -cd 'a-z\012' \
- | awk -f frequency.awk \
- | sort +1 -nr
-@end example
-
-@noindent
-produces a table of the words appearing in @file{file1} in order of
-decreasing frequency.
-
-The first @code{tr} command in this pipeline translates all the upper case
-characters in @file{file1} to lower case. The second @code{tr} command
-deletes all the characters in the input except lower case characters and
-newlines. The second argument to the second @code{tr} is quoted to protect
-the backslash in it from being interpreted by the shell. The @code{awk}
-program reads this suitably massaged data and produces a word frequency
-table, which is not ordered.
-
-The @code{awk} script's output is now sorted by the @code{sort} command and
-printed on the terminal. The options given to @code{sort} in this example
-specify to sort by the second field of each input line (skipping one field),
-that the sort keys should be treated as numeric quantities (otherwise
-@samp{15} would come before @samp{5}), and that the sorting should be done
-in descending (reverse) order.@refill
-
-See the general operating system documentation for more information on how
-to use the @code{tr} and @code{sort} commands.@refill
-
-@ignore
-@strong{I have some more substantial programs courtesy of Rick Adams
-at UUNET. I am planning on incorporating those either in addition to or
-instead of this program.}
-@end ignore
-
-@node Notes, Glossary, Sample Program, Top
-@appendix Implementation Notes
-
-This appendix contains information mainly of interest to implementors and
-maintainers of @code{gawk}. Everything in it applies specifically to
-@code{gawk}, and not to other implementations.
-
-@menu
-* Extensions:: Things@code{gawk} does that Unix @code{awk} does not.
-
-* Future Extensions:: Things likely to appear in a future release.
-
-* Improvements:: Suggestions for future improvements.
-
-* Manual Improvements:: Suggestions for improvements to this manual.
-@end menu
-
-@node Extensions, Future Extensions, , Notes
-@appendixsec GNU Extensions to the AWK Language
-
-Several new features are in a state of flux. They are described here
-merely to document them somewhat, but they will probably change. We hope
-they will be incorporated into other versions of @code{awk}, too.
-
-All of these features can be turned off either by compiling @code{gawk}
-with @samp{-DSTRICT}, or by invoking @code{gawk} as @samp{awk}.
-
-@table @asis
-@item The @code{AWKPATH} environment variable
-When opening a file supplied via the @samp{-f} option, if the filename does
-not contain a @samp{/}, @code{gawk} will perform a @dfn{path search}
-for the file, similar to that performed by the shell. @code{gawk} gets
-its search path from the @code{AWKPATH} environment variable. If that
-variable does not exist, it uses the default path
-@code{".:/usr/lib/awk:/usr/local/lib/awk"}.@refill
-
-@item Case Independent Matching
-Two new operators have been introduced, @code{~~}, and @code{!~~}.
-These perform regular expression match and no-match operations that are
-case independent. In other words, @samp{A} and @samp{a} would both
-match @samp{/a/}.
-
-@item The @samp{-i} option
-This option causes the @code{~} and @code{!~} operators to behave
-like the @code{~~} and @code{!~~} operators described above.
-
-@item The @samp{-v} option
-This option prints version information for this particular copy of @code{gawk}.
-This is so you can determine if your copy of @code{gawk} is up to date
-with respect to whatever the Free Software Foundation is currently
-distributing. It may disappear in a future version of @code{gawk}.
-@end table
-
-@node Future Extensions, Improvements, Extensions, Notes
-@appendixsec Extensions Likely To Appear In A Future Release
-
-Here are some more extensions that indicate the directions we are
-currently considering for @code{gawk}. Like the previous section, this
-section is also subject to change. None of these are implemented yet.
-
-@table @asis
-@item The @code{IGNORECASE} special variable
-If @code{IGNORECASE} is non--zero, then @emph{all} regular expression matching
-will be done in a case--independent fashion. The @samp{-i} option and the
-@code{~~} and @code{!~~} operators will go away, as this mechanism
-generalizes those facilities.
-
-@item More Escape Sequences
-The ANSI C @samp{\a}, and @samp{\x} escape sequences will be recognized.
-Unix @code{awk} does not recognize @samp{\v}, although @code{gawk} does.
-
-@item @code{RS} as a regexp
-The meaning of @code{RS} will be generalized along the lines of @code{FS}.
-
-@item Transliteration Functions
-We are planning on adding @code{toupper} and @code{tolower} functions which
-will take string arguments, and return strings where the case of each letter
-has been transformed to upper-- or lower--case respectively.
-
-@item Access To System File Descriptors
-@code{gawk} will recognize the special file names @file{/dev/stdin},
-@file{/dev/stdout}, @file{/dev/stderr}, and @file{/dev/fd/@var{N}} internally.
-These will allow access to inherited file descriptors from within an
-@code{awk} program.@refill
-
-@c this is @emph{very} long term --- not worth including right now.
-@ignore
-@item The C Comma Operator
-We may add the C comma operator, which takes the form
-@var{expr1}@code{,}@code{expr2}. The first expression is evaluated, and the
-result is thrown away. The value of the full expression is the value of
-@var{expr2}.@refill
-@end ignore
-@end table
-
-@node Improvements, Manual Improvements, Future Extensions, Notes
-@appendixsec Suggestions for Future Improvements
-
-Here are some projects that would--be @code{gawk} hackers might like to take
-on. They vary in size from a few days to a few weeks of programming,
-depending on which one you choose and how fast a programmer you are. Please
-send any improvements you write to the maintainers at the GNU
-project.@refill
-
-@enumerate
-@item
-State machine regexp matcher: At present, @code{gawk} uses the backtracking
-regular expression matcher from the GNU subroutine library. If a regexp is
-really going to be used a lot of times, it is faster to convert it once to a
-description of a finite state machine, then run a routine simulating that
-machine every time you want to match the regexp. You could use
-the matching routines used by GNU @code{egrep}.
-
-@item
-Compilation of @code{awk} programs: @code{gawk} uses a @code{Bison}
-(YACC--like) parser to convert the script given it into a syntax tree;
-the syntax tree is then executed by a simple recursive evaluator.
-Both of these steps incur a lot of overhead, since parsing can be slow
-(especially if you also do the previous project and convert regular
-expressions to finite state machines at compile time) and the
-recursive evaluator performs many procedure calls to do even the
-simplest things.@refill
-
-It should be possible for @code{gawk} to convert the script's parse tree
-into a C program which the user would then compile, using the normal
-C compiler and a special @code{gawk} library to provide all the needed
-functions (regexps, fields, associative arrays, type coercion, and so
-on).@refill
-
-An easier possibility might be for an intermediate phase of @code{awk} to
-convert the parse tree into a linear byte code form like the one used
-in GNU Emacs Lisp. The recursive evaluator would then be replaced by
-a straight line byte code interpreter that would be intermediate in speed
-between running a compiled program and doing what @code{gawk} does
-now.@refill
-@end enumerate
-
-@node Manual Improvements, , Improvements, Notes
-@appendixsec Suggestions For Future Improvements of This Manual
-
-@enumerate
-@item
-An error message section has not been included in this version of the
-manual. Perhaps some nice beta testers will document some of the messages
-for the future.
-
-@item
-A summary page has not been included, as the ``man'', or help, page that
-comes with the @code{gawk} code should suffice.
-
-GNU only supports Info, so this manual itself should contain whatever
-forms of information it would be useful to have on an Info summary page.
-
-@item
-A function and variable index has not been included as we are not sure what to
-put in it.
-@c @strong{ADR: I think I can tackle this.}
-
-@item
-A section summarizing the differences between V7 @code{awk} and
-System V Release 4 @code{awk} would be useful for long--time @code{awk}
-hackers.
-@end enumerate
-
-@node Glossary, Index , Notes, Top
-@appendix Glossary
-
-@c @strong{Add a cross-reference to most of these entries.}
-
-@table @asis
-@item Action
-A series of @code{awk} statements attached to a rule. If the rule's
-pattern matches an input record, the @code{awk} language executes the
-rule's action. Actions are always enclosed in curly braces.@refill
-
-@item Amazing @code{awk} assembler
-Henry Spencer at the University of Toronto wrote a retargetable assembler
-completely as @code{awk} scripts. It is thousands of lines long, including
-machine descriptions for several 8--bit microcomputers. It is distributed
-with @code{gawk} and is a good example of a program that would have been
-better written in another language.@refill
-
-@item Assignment
-An @code{awk} expression that changes the value of some @code{awk}
-variable or data object. An object that you can assign to is called an
-@dfn{lvalue}.@refill
-
-@item Built-in function
-The @code{awk} language provides built--in functions that perform various
-numerical and string computations. Examples are @code{sqrt} (for the
-square root of a number) and @code{substr} (for a substring of a
-string).@refill
-
-@item C
-The system programming language that most of GNU is written in. The
-@code{awk} programming language has C--like syntax, and this manual
-points out similarities between @code{awk} and C when appropriate.@refill
-
-@item Compound statement
-A series of @code{awk} statements, enclosed in curly braces. Compound
-statements may be nested.@refill
-
-@item Concatenation
-Concatenating two strings means sticking them together, one after another,
-giving a new string. For example, the string @samp{foo} concatenated with
-the string @samp{bar} gives the string @samp{foobar}.@refill
-
-@item Conditional expression
-A relation that is either true or false, such as @code{(a < b)}.
-Conditional expressions are used in @code{if} and @code{while} statements,
-and in patterns to select which input records to process.@refill
-
-@item Curly braces
-The characters @samp{@{} and @samp{@}}. Curly braces are used in
-@code{awk} for delimiting actions, compound statements, and function
-bodies.@refill
-
-@item Data objects
-These are numbers and strings of characters. Numbers are converted into
-strings and vice versa, as needed.@refill
-
-@item Escape Sequences
-A special sequence of characters used for describing non--printable
-characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
-ESC (escape) character.
-
-@item Field
-When @code{awk} reads an input record, it splits the record into pieces
-separated by whitespace (or by a separator regexp which you can
-change by setting the special variable @code{FS}). Such pieces are
-called fields.@refill
-
-@item Format
-Format strings are used to control the appearance of output in the
-@code{printf} statement. Also, data conversions from numbers to strings
-are controlled by the format string contained in the special variable
-@code{OFMT}.@refill
-
-@item Function
-A specialized group of statements often used to encapsulate general
-or program--specific tasks. @code{awk} has a number of built--in
-functions, and also allows you to define your own.
-
-@item @code{gawk}
-The GNU implementation of @code{awk}.
-
-@item @code{awk} language
-The language in which @code{awk} programs are written.
-
-@item @code{awk} program
-An @code{awk} program consists of a series of @dfn{patterns} and
-@dfn{actions}, collectively known as @dfn{rules}. For each input record
-given to the program, the program's rules are all processed in turn.
-@code{awk} programs may also contain function definitions.@refill
-
-@item @code{awk} script
-Another name for an @code{awk} program.
-
-@item Input record
-A single chunk of data read in by @code{awk}. Usually, an @code{awk} input
-record consists of one line of text.@refill
-
-@item Keyword
-In the @code{awk} language, a keyword is a word that has special
-meaning. Keywords are reserved and may not be used as variable names.
-
-The keywords are:
-@code{if},
-@code{else},
-@code{while},
-@code{do@dots{}while},
-@code{for},
-@code{for@dots{}in},
-@code{break},
-@code{continue},
-@code{delete},
-@code{next},
-@code{function},
-@code{func},
-and @code{exit}.@refill
-
-@item Lvalue
-An expression that can appear on the left side of an assignment
-operator. In most languages, lvalues can be variables or array
-elements. In @code{awk}, a field designator can also be used as an
-lvalue.@refill
-
-@item Number
-A numeric valued data object. The @code{gawk} implementation uses double
-precision floating point to represent numbers.@refill
-
-@item Pattern
-Patterns tell @code{awk} which input records are interesting to which
-rules.
-
-A pattern is an arbitrary conditional expression against which input is
-tested. If the condition is satisfied, the pattern is said to @dfn{match}
-the input record. A typical pattern might compare the input record against
-a regular expression.@refill
-
-@item Range (of input lines)
-A sequence of consecutive lines from the input file. A pattern
-can specify ranges of input lines for @code{awk} to process, or it can
-specify single lines.@refill
-
-@item Recursion
-When a function calls itself, either directly or indirectly.
-If this isn't clear, refer to the entry for ``recursion''.
-
-@item Redirection
-Redirection means performing input from other than the standard input
-stream, or output to other than the standard output stream.
-
-You can redirect the output of the @code{print} and @code{printf} statements
-to a file or a system command, using the @code{>}, @code{>>}, and @code{|}
-operators. You can redirect input to the @code{getline} statement using
-the @code{<} and @code{|} operators.@refill
-
-@item Regular Expression
-See ``regexp''.
-
-@item Regexp
-Short for @dfn{regular expression}. A regexp is a pattern that denotes a
-set of strings, possibly an infinite set. For example, the regexp
-@samp{R.*xp} matches any string starting with the letter @samp{R}
-and ending with the letters @samp{xp}. In @code{awk}, regexps are
-used in patterns and in conditional expressions.@refill
-
-@item Rule
-A segment of an @code{awk} program, that specifies how to process single
-input records. A rule consists of a @dfn{pattern} and an @dfn{action}.
-@code{awk} reads an input record; then, for each rule, if the input record
-satisfies the rule's pattern, @code{awk} executes the rule's action.
-Otherwise, the rule does nothing for that input record.@refill
-
-@item Special Variable
-The variables @code{ARGC}, @code{ARGV}, @code{ENVIRON}, @code{FILENAME},
-@code{FNR}, @code{FS}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS},
-@code{ORS}, @code{RLENGTH}, @code{RSTART}, @code{RS}, @code{SUBSEP}, have
-special meaning to @code{awk}. Changing some of them affects @code{awk}'s
-running environment.@refill
-
-@item Stream Editor
-A program that reads records from an input stream and processes them one
-or more at a time. This is in contrast with batch programs, which may
-expect to read their input files in entirety before starting to do
-anything, and with interactive programs, which require input from the
-user.@refill
-
-@item String
-A datum consisting of a sequence of characters, such as @samp{I am a
-string}. Constant strings are written with double--quotes in the
-@code{awk} language, and may contain @dfn{escape sequences}.
-
-@item Whitespace
-A sequence of blank or tab characters occurring inside an input record or a
-string.@refill
-@end table
-
-@node Index, , Glossary, Top
-@unnumbered Index
-@printindex cp
-
-@summarycontents
-@contents
-@bye
diff --git a/gawk.toc b/gawk.toc
deleted file mode 100644
index b07c6d3b..00000000
--- a/gawk.toc
+++ /dev/null
@@ -1,104 +0,0 @@
-\unnumbchapentry {Preface}{1}
-\unnumbsecentry{History of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}{1}
-\unnumbchapentry {GNU GENERAL PUBLIC LICENSE}{3}
-\unnumbsecentry{Preamble}{3}
-\unnumbsecentry{TERMS AND CONDITIONS}{4}
-\unnumbsecentry{Appendix: How to Apply These Terms to Your New Programs}{7}
-\chapentry {Using This Manual}{1}{9}
-\secentry {Input Files for the Examples}{1}{1}{9}
-\chapentry {Getting Started With {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{2}{11}
-\secentry {A Very Simple Example}{2}{1}{11}
-\secentry {An Example with Two Rules}{2}{2}{12}
-\secentry {A More Complex Example}{2}{3}{13}
-\secentry {How to Run {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{4}{14}
-\subsecentry {One--shot Throw--away {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{4}{1}{15}
-\subsecentry {Running {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} without Input Files}{2}{4}{2}{15}
-\subsecentry {Running Long Programs}{2}{4}{3}{16}
-\subsecentry {Executable {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{4}{4}{17}
-\subsecentry {Details of the {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Command Line}{2}{4}{5}{18}
-\secentry {Comments in {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{5}{19}
-\secentry {{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Statements versus Lines}{2}{6}{20}
-\secentry {When to Use {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{2}{7}{21}
-\chapentry {Reading Files (Input)}{3}{23}
-\secentry {How Input is Split into Records}{3}{1}{23}
-\secentry {Examining Fields}{3}{2}{24}
-\secentry {Non-constant Field Numbers}{3}{3}{26}
-\secentry {Changing the Contents of a Field}{3}{4}{27}
-\secentry {Specifying How Fields Are Separated}{3}{5}{28}
-\secentry {Multiple--Line Records}{3}{6}{31}
-\secentry {Assigning Variables on the Command Line}{3}{7}{32}
-\secentry {Explicit Input with {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {}}{3}{8}{32}
-\subsecentry {Closing Input Files}{3}{8}{1}{36}
-\chapentry {Printing Output}{4}{39}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} Statement}{4}{1}{39}
-\secentry {Examples of {\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} Statements}{4}{2}{40}
-\secentry {Output Separators}{4}{3}{41}
-\secentry {Redirecting Output of {\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}}{4}{4}{42}
-\subsecentry {Closing Output Files and Pipes}{4}{4}{1}{43}
-\secentry {Using {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} Statements For Fancier Printing}{4}{5}{44}
-\subsecentry {Introduction to the {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} Statement}{4}{5}{1}{45}
-\subsecentry {Format--Control Characters}{4}{5}{2}{45}
-\subsecentry {Modifiers for {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} Formats}{4}{5}{3}{46}
-\subsecentry {Examples of Using {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}}{4}{5}{4}{46}
-\chapentry {Useful ``One-liners''}{5}{49}
-\chapentry {Patterns}{6}{51}
-\secentry {The Empty Pattern}{6}{1}{51}
-\secentry {Regular Expressions as Patterns}{6}{2}{52}
-\subsecentry {How to use Regular Expressions}{6}{2}{1}{52}
-\subsecentry {Regular Expression Operators}{6}{2}{2}{53}
-\secentry {Comparison Expressions as Patterns}{6}{3}{55}
-\secentry {Specifying Record Ranges With Patterns}{6}{4}{56}
-\secentry {{\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {} Special Patterns}{6}{5}{57}
-\secentry {Boolean Operators and Patterns}{6}{6}{58}
-\secentry {Conditional Patterns}{6}{7}{59}
-\chapentry {Actions: The Basics}{7}{61}
-\chapentry {Actions: Expressions}{8}{63}
-\secentry {Constant Expressions}{8}{1}{63}
-\secentry {Variables}{8}{2}{64}
-\secentry {Arithmetic Operators}{8}{3}{65}
-\secentry {String Concatenation}{8}{4}{65}
-\secentry {Comparison Expressions}{8}{5}{66}
-\secentry {Boolean Operators}{8}{6}{67}
-\secentry {Assignment Operators}{8}{7}{68}
-\secentry {Increment Operators}{8}{8}{70}
-\secentry {Conversion of Strings and Numbers}{8}{9}{71}
-\secentry {Conditional Expressions}{8}{10}{72}
-\secentry {Function Calls}{8}{11}{73}
-\chapentry {Actions: Statements}{9}{75}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing if}\hbox {} Statement}{9}{1}{75}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} Statement}{9}{2}{76}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing do}\hbox {}--{\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} Statement}{9}{3}{77}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} Statement}{9}{4}{77}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing break}\hbox {} Statement}{9}{5}{79}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing continue}\hbox {} Statement}{9}{6}{80}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing next}\hbox {} Statement}{9}{7}{81}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing exit}\hbox {} Statement}{9}{8}{82}
-\chapentry {Actions: Using Arrays in {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{10}{83}
-\secentry {Introduction to Arrays}{10}{1}{83}
-\secentry {Referring to an Array Element}{10}{2}{85}
-\secentry {Assigning Array Elements}{10}{3}{86}
-\secentry {Basic Example of an Array}{10}{4}{86}
-\secentry {Scanning All Elements of an Array}{10}{5}{87}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing delete}\hbox {} Statement}{10}{6}{88}
-\secentry {Multi--dimensional arrays}{10}{7}{89}
-\secentry {Scanning Multi--dimensional Arrays}{10}{8}{91}
-\chapentry {Built--in functions}{11}{93}
-\secentry {Numeric Built--in Functions}{11}{1}{93}
-\secentry {Built--in Functions for String Manipulation}{11}{2}{95}
-\secentry {Built--in Functions for I/O to Files and Commands}{11}{3}{98}
-\chapentry {User--defined Functions}{12}{99}
-\secentry {Syntax of Function Definitions}{12}{1}{99}
-\secentry {Function Definition Example}{12}{2}{100}
-\secentry {Caveats of Function Calling}{12}{3}{101}
-\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing return}\hbox {} statement}{12}{4}{102}
-\chapentry {Special Variables}{13}{105}
-\secentry {Special Variables That Control {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{13}{1}{105}
-\secentry {Special Variables That Convey Information to You}{13}{2}{106}
-\chapentry {Sample Program}{Appendix \char 65}{109}
-\chapentry {Implementation Notes}{Appendix \char 66}{111}
-\secentry {GNU Extensions to the AWK Language}{\char 66}{1}{111}
-\secentry {Extensions Likely To Appear In A Future Release}{\char 66}{2}{111}
-\secentry {Suggestions for Future Improvements}{\char 66}{3}{112}
-\secentry {Suggestions For Future Improvements of This Manual}{\char 66}{4}{113}
-\chapentry {Glossary}{Appendix \char 67}{115}
-\unnumbchapentry {Index}{119}
diff --git a/gawk.tp b/gawk.tp
deleted file mode 100644
index e69de29b..00000000
--- a/gawk.tp
+++ /dev/null
diff --git a/gawk.tps b/gawk.tps
deleted file mode 100644
index e69de29b..00000000
--- a/gawk.tps
+++ /dev/null
diff --git a/gawk.vr b/gawk.vr
deleted file mode 100644
index 9b2ba722..00000000
--- a/gawk.vr
+++ /dev/null
@@ -1,17 +0,0 @@
-\entry {ARGV}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ARGV}\hbox {}}
-\entry {OFS}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}
-\entry {ORS}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}
-\entry {RS}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}
-\entry {FILENAME}{23}{{\fam \ttfam \tentt \rawbackslash \frenchspacing FILENAME}\hbox {}}
-\entry {RS}{23}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}
-\entry {NR}{24}{{\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}}
-\entry {FNR}{24}{{\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}}
-\entry {NF}{25}{{\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}}
-\entry {FS}{28}{{\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}}
-\entry {OFS}{41}{{\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}
-\entry {ORS}{41}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}
-\entry {OFMT}{71}{{\fam \ttfam \tentt \rawbackslash \frenchspacing OFMT}\hbox {}}
-\entry {SUBSEP}{89}{{\fam \ttfam \tentt \rawbackslash \frenchspacing SUBSEP}\hbox {}}
-\entry {RSTART}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RSTART}\hbox {}}
-\entry {RLENGTH}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RLENGTH}\hbox {}}
-\entry {ENVIRON}{106}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ENVIRON}\hbox {}}
diff --git a/gawk.vrs b/gawk.vrs
deleted file mode 100644
index 0ee09c6d..00000000
--- a/gawk.vrs
+++ /dev/null
@@ -1,21 +0,0 @@
-\initial {A}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing ARGV}\hbox {}}{19}
-\initial {E}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing ENVIRON}\hbox {}}{106}
-\initial {F}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing FILENAME}\hbox {}}{23}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}}{24}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}}{28}
-\initial {N}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}}{25}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}}{24}
-\initial {O}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing OFMT}\hbox {}}{71}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}{19, 41}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}{19, 41}
-\initial {R}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing RLENGTH}\hbox {}}{95}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}{19, 23}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing RSTART}\hbox {}}{95}
-\initial {S}
-\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing SUBSEP}\hbox {}}{89}
diff --git a/gnu.getopt.c b/gnu.getopt.c
deleted file mode 100644
index 93002de9..00000000
--- a/gnu.getopt.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/* Getopt for GNU.
- Copyright (C) 1987, 1989 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-
-
-
-/* This version of `getopt' appears to the caller like standard Unix `getopt'
- but it behaves differently for the user, since it allows the user
- to intersperse the options with the other arguments.
-
- As `getopt' works, it permutes the elements of `argv' so that,
- when it is done, all the options precede everything else. Thus
- all application programs are extended to handle flexible argument order.
-
- Setting the environment variable _POSIX_OPTION_ORDER disables permutation.
- Then the behavior is completely standard.
-
- GNU application programs can use a third alternative mode in which
- they can distinguish the relative order of options and other arguments. */
-
-#include <stdio.h>
-
-#ifdef sparc
-#include <alloca.h>
-#endif
-#if defined(USG) || defined(MSDOS)
-extern char *alloca();
-extern char *strchr();
-#define index strchr
-#define bcopy(s, d, l) memcpy((d), (s), (l))
-#endif
-
-/* For communication from `getopt' to the caller.
- When `getopt' finds an option that takes an argument,
- the argument value is returned here.
- Also, when `ordering' is RETURN_IN_ORDER,
- each non-option ARGV-element is returned here. */
-
-char *optarg = 0;
-
-/* Index in ARGV of the next element to be scanned.
- This is used for communication to and from the caller
- and for communication between successive calls to `getopt'.
-
- On entry to `getopt', zero means this is the first call; initialize.
-
- When `getopt' returns EOF, this is the index of the first of the
- non-option elements that the caller should itself scan.
-
- Otherwise, `optind' communicates from one call to the next
- how much of ARGV has been scanned so far. */
-
-int optind = 0;
-
-/* The next char to be scanned in the option-element
- in which the last option character we returned was found.
- This allows us to pick up the scan where we left off.
-
- If this is zero, or a null string, it means resume the scan
- by advancing to the next ARGV-element. */
-
-static char *nextchar;
-
-/* Callers store zero here to inhibit the error message
- for unrecognized options. */
-
-int opterr = 1;
-
-/* Describe how to deal with options that follow non-option ARGV-elements.
-
- UNSPECIFIED means the caller did not specify anything;
- the default is then REQUIRE_ORDER if the environment variable
- _OPTIONS_FIRST is defined, PERMUTE otherwise.
-
- REQUIRE_ORDER means don't recognize them as options.
- Stop option processing when the first non-option is seen.
- This is what Unix does.
-
- PERMUTE is the default. We permute the contents of `argv' as we scan,
- so that eventually all the options are at the end. This allows options
- to be given in any order, even with programs that were not written to
- expect this.
-
- RETURN_IN_ORDER is an option available to programs that were written
- to expect options and other ARGV-elements in any order and that care about
- the ordering of the two. We describe each non-option ARGV-element
- as if it were the argument of an option with character code zero.
- Using `-' as the first character of the list of option characters
- requests this mode of operation.
-
- The special argument `--' forces an end of option-scanning regardless
- of the value of `ordering'. In the case of RETURN_IN_ORDER, only
- `--' can cause `getopt' to return EOF with `optind' != ARGC. */
-
-static enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering;
-
-/* Handle permutation of arguments. */
-
-/* Describe the part of ARGV that contains non-options that have
- been skipped. `first_nonopt' is the index in ARGV of the first of them;
- `last_nonopt' is the index after the last of them. */
-
-static int first_nonopt;
-static int last_nonopt;
-
-/* Exchange two adjacent subsequences of ARGV.
- One subsequence is elements [first_nonopt,last_nonopt)
- which contains all the non-options that have been skipped so far.
- The other is elements [last_nonopt,optind), which contains all
- the options processed since those non-options were skipped.
-
- `first_nonopt' and `last_nonopt' are relocated so that they describe
- the new indices of the non-options in ARGV after they are moved. */
-
-static void
-exchange (argv)
- char **argv;
-{
- int nonopts_size
- = (last_nonopt - first_nonopt) * sizeof (char *);
- char **temp = (char **) alloca (nonopts_size);
-
- /* Interchange the two blocks of data in argv. */
-
- bcopy (&argv[first_nonopt], temp, nonopts_size);
- bcopy (&argv[last_nonopt], &argv[first_nonopt],
- (optind - last_nonopt) * sizeof (char *));
- bcopy (temp, &argv[first_nonopt + optind - last_nonopt],
- nonopts_size);
-
- /* Update records for the slots the non-options now occupy. */
-
- first_nonopt += (optind - last_nonopt);
- last_nonopt = optind;
-}
-
-/* Scan elements of ARGV (whose length is ARGC) for option characters
- given in OPTSTRING.
-
- If an element of ARGV starts with '-', and is not exactly "-" or "--",
- then it is an option element. The characters of this element
- (aside from the initial '-') are option characters. If `getopt'
- is called repeatedly, it returns successively each of theoption characters
- from each of the option elements.
-
- If `getopt' finds another option character, it returns that character,
- updating `optind' and `nextchar' so that the next call to `getopt' can
- resume the scan with the following option character or ARGV-element.
-
- If there are no more option characters, `getopt' returns `EOF'.
- Then `optind' is the index in ARGV of the first ARGV-element
- that is not an option. (The ARGV-elements have been permuted
- so that those that are not options now come last.)
-
- OPTSTRING is a string containing the legitimate option characters.
- A colon in OPTSTRING means that the previous character is an option
- that wants an argument. The argument is taken from the rest of the
- current ARGV-element, or from the following ARGV-element,
- and returned in `optarg'.
-
- If an option character is seen that is not listed in OPTSTRING,
- return '?' after printing an error message. If you set `opterr' to
- zero, the error message is suppressed but we still return '?'.
-
- If a char in OPTSTRING is followed by a colon, that means it wants an arg,
- so the following text in the same ARGV-element, or the text of the following
- ARGV-element, is returned in `optarg. Two colons mean an option that
- wants an optional arg; if there is text in the current ARGV-element,
- it is returned in `optarg'.
-
- If OPTSTRING starts with `-', it requests a different method of handling the
- non-option ARGV-elements. See the comments about RETURN_IN_ORDER, above. */
-
-int
-getopt (argc, argv, optstring)
- int argc;
- char **argv;
- char *optstring;
-{
- /* Initialize the internal data when the first call is made.
- Start processing options with ARGV-element 1 (since ARGV-element 0
- is the program name); the sequence of previously skipped
- non-option ARGV-elements is empty. */
-
- if (optind == 0)
- {
- first_nonopt = last_nonopt = optind = 1;
-
- nextchar = 0;
-
- /* Determine how to handle the ordering of options and nonoptions. */
-
- if (optstring[0] == '-')
- ordering = RETURN_IN_ORDER;
- else if (getenv ("_POSIX_OPTION_ORDER") != 0)
- ordering = REQUIRE_ORDER;
- else
- ordering = PERMUTE;
- }
-
- if (nextchar == 0 || *nextchar == 0)
- {
- if (ordering == PERMUTE)
- {
- /* If we have just processed some options following some non-options,
- exchange them so that the options come first. */
-
- if (first_nonopt != last_nonopt && last_nonopt != optind)
- exchange (argv);
- else if (last_nonopt != optind)
- first_nonopt = optind;
-
- /* Now skip any additional non-options
- and extend the range of non-options previously skipped. */
-
- while (optind < argc
- && (argv[optind][0] != '-'
- || argv[optind][1] == 0))
- optind++;
- last_nonopt = optind;
- }
-
- /* Special ARGV-element `--' means premature end of options.
- Skip it like a null option,
- then exchange with previous non-options as if it were an option,
- then skip everything else like a non-option. */
-
- if (optind != argc && !strcmp (argv[optind], "--"))
- {
- optind++;
-
- if (first_nonopt != last_nonopt && last_nonopt != optind)
- exchange (argv);
- else if (first_nonopt == last_nonopt)
- first_nonopt = optind;
- last_nonopt = argc;
-
- optind = argc;
- }
-
- /* If we have done all the ARGV-elements, stop the scan
- and back over any non-options that we skipped and permuted. */
-
- if (optind == argc)
- {
- /* Set the next-arg-index to point at the non-options
- that we previously skipped, so the caller will digest them. */
- if (first_nonopt != last_nonopt)
- optind = first_nonopt;
- return EOF;
- }
-
- /* If we have come to a non-option and did not permute it,
- either stop the scan or describe it to the caller and pass it by. */
-
- if (argv[optind][0] != '-' || argv[optind][1] == 0)
- {
- if (ordering == REQUIRE_ORDER)
- return EOF;
- optarg = argv[optind++];
- return 0;
- }
-
- /* We have found another option-ARGV-element.
- Start decoding its characters. */
-
- nextchar = argv[optind] + 1;
- }
-
- /* Look at and handle the next option-character. */
-
- {
- char c = *nextchar++;
- char *temp = (char *) index (optstring, c);
-
- /* Increment `optind' when we start to process its last character. */
- if (*nextchar == 0)
- optind++;
-
- if (temp == 0 || c == ':')
- {
- if (opterr != 0)
- {
- if (c < 040 || c >= 0177)
- fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
- argv[0], c);
- else
- fprintf (stderr, "%s: unrecognized option `-%c'\n",
- argv[0], c);
- }
- return '?';
- }
- if (temp[1] == ':')
- {
- if (temp[2] == ':')
- {
- /* This is an option that accepts an argument optionally. */
- if (*nextchar != 0)
- {
- optarg = nextchar;
- optind++;
- }
- else
- optarg = 0;
- nextchar = 0;
- }
- else
- {
- /* This is an option that requires an argument. */
- if (*nextchar != 0)
- {
- optarg = nextchar;
- /* If we end this ARGV-element by taking the rest as an arg,
- we must advance to the next element now. */
- optind++;
- }
- else if (optind == argc)
- {
- if (opterr != 0)
- fprintf (stderr, "%s: no argument for `-%c' option\n",
- argv[0], c);
- c = '?';
- }
- else
- /* We already incremented `optind' once;
- increment it again when taking next ARGV-elt as argument. */
- optarg = argv[optind++];
- nextchar = 0;
- }
- }
- return c;
- }
-}
-
-#ifdef TEST
-
-/* Compile with -DTEST to make an executable for use in testing
- the above definition of `getopt'. */
-
-int
-main (argc, argv)
- int argc;
- char **argv;
-{
- char c;
- int digit_optind = 0;
-
- while (1)
- {
- int this_option_optind = optind;
- if ((c = getopt (argc, argv, "abc:d:0123456789")) == EOF)
- break;
-
- switch (c)
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (digit_optind != 0 && digit_optind != this_option_optind)
- printf ("digits occur in two different argv-elements.\n");
- digit_optind = this_option_optind;
- printf ("option %c\n", c);
- break;
-
- case 'a':
- printf ("option a\n");
- break;
-
- case 'b':
- printf ("option b\n");
- break;
-
- case 'c':
- printf ("option c with value `%s'\n", optarg);
- break;
-
- case '?':
- break;
-
- default:
- printf ("?? getopt returned character code 0%o ??\n", c);
- }
- }
-
- if (optind < argc)
- {
- printf ("non-option ARGV-elements: ");
- while (optind < argc)
- printf ("%s ", argv[optind++]);
- printf ("\n");
- }
-
- return 0;
-}
-
-#endif /* TEST */
-
diff --git a/makefile.pc b/makefile.pc
deleted file mode 100644
index b812dad4..00000000
--- a/makefile.pc
+++ /dev/null
@@ -1,169 +0,0 @@
-# Makefile for GNU Awk (for use with Microsoft C V5.1)
-#
-# Rewritten by Arnold Robbins, September 1988, March 1989.
-#
-# Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
-#
-# This file is part of GAWK, the GNU implementation of the
-# AWK Progamming Language.
-#
-# GAWK is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 1, or (at your option)
-# any later version.
-#
-# GAWK is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GAWK; see the file COPYING. If not, write to
-# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
-# CFLAGS: options to the C compiler
-#
-# -Ox optimize
-# -Zi include debugging info (include /CO in LINKFLAFS)
-# -AL compile in large model
-# -AS compile in small model
-#
-# -DNOVPRINTF - system has no vprintf and associated routines
-# -DHASDOPRNT - system needs version of vprintf et al. defined in awk5.c
-# and has a BSD compatable doprnt()
-# -DNOMEMCMP - system lacks memcmp()
-# -DUSG - system is generic-ish System V.
-#
-CC=cl
-LINKFLAGS= /E /FAR /PAC /NOE /NOI /st:0x1800
-#LINKFLAGS= /CO /NOE /NOI /st:0x1800
-OPTIMIZE= -AL -Ox
-DEBUG=#-DDEBUG #-DFUNC_TRACE -DMEMDEBUG
-DEBUGGER= #-Zi
-PROFILE=#-pg
-SYSV=
-BSD=#-DHASDOPRNT
-MEMCMP=#-DNOMEMCMP
-VPRINTF=#-DNOVPRINTF
-
-FLAGS= $(OPTIMIZE) $(SYSV) $(DEBUG) $(BSD) $(MEMCMP) $(VPRINTF)
-CFLAGS= $(FLAGS) $(DEBUGGER) $(PROFILE)
-LDFLAGS= #-Bstatic
-
-SRC = awk1.c awk2.c awk3.c awk4.c awk5.c \
- awk6.c awk7.c awk8.c awk9.c regex.c version.c do_free.c awka.c
-
-PCSTUFF= makefile.pc names.lnk random.c
-
-AWKOBJS = awk1.obj awk2.obj awk3.obj awk4.obj awk5.obj awk6.obj awk7.obj \
- awk8.obj awk9.obj version.obj awka.obj # do_free.obj # used for MEMDEBUG
-ALLOBJS = $(AWKOBJS) awk_tab.obj
-
-# Parser to use on grammar -- if you don't have bison use the first one
-PARSER = yacc
-PARSOUT=ytab.c
-#PARSER = bison -y
-#PARSOUT=y_tab.c
-#PARSFLAGS=-v
-
-# S5OBJS
-# Set equal to alloca.o if your system is S5 and you don't have
-# alloca. Uncomment the rule below to actually make alloca.o.
-S5OBJS=
-
-# GETOPT
-# Set equal to getopt.o if you have a generic BSD system. The
-# generic BSD getopt is reported to not work with gawk. The
-# gnu getopt is supplied in gnu.getopt.c. The Public Domain
-# getopt from AT&T is in att.getopt.c. Choose one of these,
-# and rename it getopt.c.
-GETOPT=getopt.obj
-
-# LIBOBJS
-# Stuff that awk uses as library routines, but not in /lib/libc.a.
-LIBOBJS= regex.obj random.obj $(S5OBJS) $(GETOPT)
-
-UPDATES = Makefile awk.h awk.y \
- $(SRC) regex.c regex.h
-
-INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \
- gawk-info-5 gawk.aux gawk.cp gawk.cps gawk.dvi gawk.fn gawk.fns \
- gawk.ky gawk.kys gawk.pg gawk.pgs gawk.texinfo gawk.toc \
- gawk.tp gawk.tps gawk.vr gawk.vrs
-
-# DOCS
-# Documentation for users
-#
-DOCS=gawk.1 $(INFOFILES)
-
-# We don't distribute shar files, but they're useful for mailing.
-SHARS = $(DOCS) COPYING README PROBLEMS $(UPDATES) awk.tab.c \
- alloca.s alloca.c att_getopt.c gnu_getopt.c $(PCSTUFF)
-
-gawk: $(ALLOBJS) $(LIBOBJS)
- link @names.lnk,gawk.exe $(LINKFLAGS);
-
-# this kludge necessary because MSC 5.1 compiler bombs with -Oa or -Ol (where
-# -Ox == "-Oailt -Gs")
-regex.obj: regex.c
- $(CC) -c -Oit -AL regex.c
-
-$(AWKOBJS): awk.h
-
-awk_tab.obj: awk.h awk_tab.c
-
-awk_tab.c: awk.y
- @-del awk_tab.c
- $(PARSER) $(PARSFLAGS) awk.y
- -rename $(PARSOUT) awk_tab.c
-
-# Alloca: uncomment this if your system (notably System V boxen)
-# does not have alloca in /lib/libc.a
-#
-#alloca.o: alloca.s
-# /lib/cpp < alloca.s | sed '/^#/d' > t.s
-# as t.s -o alloca.o
-# rm t.s
-
-# If your machine is not supported by the assembly version of alloca.s,
-# use the C version instead. This uses the default rules to make alloca.o.
-#
-#alloca.o: alloca.c
-
-lint: $(SRC)
- lint -hcbax $(FLAGS) $(SRC) awk_tab.c
-
-clean:
- rm -f gawk *.obj core awk.output gmon.out make.out #awk_tab.c
-
-awk.shar: $(SHARS)
- shar -f awk -c $(SHARS)
-
-awk.tar: $(SHARS)
- tar cvf awk.tar $(SHARS)
-
-updates.tar: $(UPDATES)
- tar cvf gawk.tar $(UPDATES)
-
-awk.tar.Z: awk.tar
- compress < awk.tar > awk.tar.Z
-
-doc: $(DOCS)
- nroff -man $(DOCS) | col > $(DOCS).out
-
-# This command probably won't be useful to the rest of the world, but makes
-# life much easier for me.
-dist: awk.tar awk.tar.Z
-
-diff:
- for i in RCS/*; do rcsdiff -c -b $$i > `basename $$i ,v`.diff; done
-
-update: $(UPDATES)
- sendup $?
- touch update
-
-release: $(SHARS)
- -rm -fr gawk-dist
- mkdir gawk-dist
- cp $(SHARS) gawk-dist
- tar -cvf - gawk-dist | compress > dist.tar.Z