summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-08-29 13:19:10 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-08-29 13:19:10 +0300
commitba1e7ab66563efb5b597a418e2e9fb4a01998d03 (patch)
treea9cf2a74f9fd34725a507a3b9cef8bd1566a1664
parent5a05ddf24b9f5ebc81a1b295ba7a6fbc7348776b (diff)
parent6c541fd0f75cd328dd80afec757ecccc833719af (diff)
downloadgawk-ba1e7ab66563efb5b597a418e2e9fb4a01998d03.tar.gz
Merge branch 'gawk-4.1-stable'
-rw-r--r--awklib/eg/lib/getopt.awk3
-rw-r--r--awklib/eg/lib/strtonum.awk7
-rw-r--r--doc/ChangeLog5
-rw-r--r--doc/gawk.info1507
-rw-r--r--doc/gawk.texi549
-rw-r--r--doc/gawktexi.in461
6 files changed, 1326 insertions, 1206 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index db957ceb..6b1f4c50 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -38,8 +38,7 @@ function getopt(argc, argv, options, thisopt, i)
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk
index 9342e789..5e20626b 100644
--- a/awklib/eg/lib/strtonum.awk
+++ b/awklib/eg/lib/strtonum.awk
@@ -13,8 +13,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
@@ -26,6 +27,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
diff --git a/doc/ChangeLog b/doc/ChangeLog
index b6a13dc7..4eaa8138 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,8 @@
+2014-08-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments, and other
+ bug fixes, miscellanious improvements.
+
2014-08-26 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Use a different mechanism to exclude
diff --git a/doc/gawk.info b/doc/gawk.info
index 514965f4..afa825cc 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -170,10 +170,10 @@ entitled "GNU Free Documentation License".
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between `[...]'.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -1455,15 +1455,17 @@ end-of-file character may be different. For example, on OS/2, it is
As an example, the following program prints a friendly piece of
advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'),
to keep you from worrying about the complexities of computer
-programming (`BEGIN' is a feature we haven't discussed yet):
+programming:
- $ awk "BEGIN { print \"Don't Panic!\" }"
+ $ awk "BEGIN { print "Don\47t Panic!" }"
-| Don't Panic!
- This program does not read any input. The `\' before each of the
-inner double quotes is necessary because of the shell's quoting
-rules--in particular because it mixes both single quotes and double
-quotes.(1)
+ `awk' executes statements associated with `BEGIN' before reading any
+input. If there are no other statements in your program, as is the
+case here, `awk' just stops, instead of trying to read input it doesn't
+know how to process. The `\47' is a magic way of getting a single
+quote into the program, without having to engage in ugly shell quoting
+tricks.
NOTE: As a side note, if you use Bash as your shell, you should
execute the command `set +H' before running this program
@@ -1486,12 +1488,6 @@ works is explained shortly).
-| What, me worry?
Ctrl-d
- ---------- Footnotes ----------
-
- (1) Although we generally recommend the use of single quotes around
-the program text, double quotes are needed here in order to put the
-single quote into the message.
-

File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
@@ -1937,6 +1933,9 @@ different ways to do the same things shown here:
awk '{ if (length($0) > max) max = length($0) }
END { print max }' data
+ The code associated with `END' executes after all input has been
+ read; it's the other side of the coin to `BEGIN'.
+
* Print the length of the longest line in `data':
expand data | awk '{ if (x < length($0)) x = length($0) }
@@ -2729,6 +2728,10 @@ arguments, including variable assignments, are included. As each
element of `ARGV' is processed, `gawk' sets the variable `ARGIND' to
the index in `ARGV' of the current element.
+ Changing `ARGC' and `ARGV' in your `awk' program lets you control
+how `awk' processes the input files; this is described in more detail
+in *note ARGC and ARGV::.
+
The distinction between file name arguments and variable-assignment
arguments is made when `awk' is about to open the next input file. At
that point in execution, it checks the file name to see whether it is
@@ -3218,10 +3221,10 @@ you specify more complicated classes of strings.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between `[...]'.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.

@@ -3363,17 +3366,19 @@ apply to both string constants and regexp constants:
`\/'
A literal slash (necessary for regexp constants only). This
sequence is used when you want to write a regexp constant that
- contains a slash. Because the regexp is delimited by slashes, you
- need to escape the slash that is part of the pattern, in order to
+ contains a slash (such as `/.*:\/home\/[[:alnum:]]+:.*/'; the
+ `[[:alnum:]]' notation is discussed shortly, in *note Bracket
+ Expressions::). Because the regexp is delimited by slashes, you
+ need to escape any slash that is part of the pattern, in order to
tell `awk' to keep processing the rest of the regexp.
`\"'
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string constant
- that contains a double quote. Because the string is delimited by
- double quotes, you need to escape the quote that is part of the
- string, in order to tell `awk' to keep processing the rest of the
- string.
+ that contains a double quote (such as `"He said \"hi!\" to her."').
+ Because the string is delimited by double quotes, you need to
+ escape any quote that is part of the string, in order to tell
+ `awk' to keep processing the rest of the string.
In `gawk', a number of additional two-character sequences that begin
with a backslash have special meaning in regexps. *Note GNU Regexp
@@ -3611,7 +3616,7 @@ list".
regexp operator or function.

-File: gawk.info, Node: Bracket Expressions, Next: GNU Regexp Operators, Prev: Regexp Operators, Up: Regexp
+File: gawk.info, Node: Bracket Expressions, Next: Leftmost Longest, Prev: Regexp Operators, Up: Regexp
3.4 Using Bracket Expressions
=============================
@@ -3716,9 +3721,118 @@ Equivalence classes
classes.

-File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Bracket Expressions, Up: Regexp
+File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Bracket Expressions, Up: Regexp
+
+3.5 How Much Text Matches?
+==========================
-3.5 `gawk'-Specific Regexp Operators
+Consider the following:
+
+ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+
+ This example uses the `sub()' function (which we haven't discussed
+yet; *note String Functions::) to make a change to the input record.
+Here, the regexp `/a+/' indicates "one or more `a' characters," and the
+replacement text is `<A>'.
+
+ The input contains four `a' characters. `awk' (and POSIX) regular
+expressions always match the leftmost, _longest_ sequence of input
+characters that can match. Thus, all four `a' characters are replaced
+with `<A>' in this example:
+
+ $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+ -| <A>bcd
+
+ For simple match/no-match tests, this is not so important. But when
+doing text matching and substitutions with the `match()', `sub()',
+`gsub()', and `gensub()' functions, it is very important. *Note String
+Functions::, for more information on these functions. Understanding
+this principle is also important for regexp-based record and field
+splitting (*note Records::, and also *note Field Separators::).
+
+
+File: gawk.info, Node: Computed Regexps, Next: GNU Regexp Operators, Prev: Leftmost Longest, Up: Regexp
+
+3.6 Using Dynamic Regexps
+=========================
+
+The righthand side of a `~' or `!~' operator need not be a regexp
+constant (i.e., a string of characters between slashes). It may be any
+expression. The expression is evaluated and converted to a string if
+necessary; the contents of the string are then used as the regexp. A
+regexp computed in this way is called a "dynamic regexp" or a "computed
+regexp":
+
+ BEGIN { digits_regexp = "[[:digit:]]+" }
+ $0 ~ digits_regexp { print }
+
+This sets `digits_regexp' to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+ NOTE: When using the `~' and `!~' operators, there is a difference
+ between a regexp constant enclosed in slashes and a string
+ constant enclosed in double quotes. If you are going to use a
+ string constant, you have to understand that the string is, in
+ essence, scanned _twice_: the first time when `awk' reads your
+ program, and the second time when it goes to match the string on
+ the lefthand side of the operator with the pattern on the right.
+ This is true of any string-valued expression (such as
+ `digits_regexp', shown previously), not just string constants.
+
+ What difference does it make if the string is scanned twice? The
+answer has to do with escape sequences, and particularly with
+backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+ For example, `/\*/' is a regexp constant for a literal `*'. Only
+one backslash is needed. To do the same thing with a string, you have
+to type `"\\*"'. The first backslash escapes the second one so that
+the string actually contains the two characters `\' and `*'.
+
+ Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is "regexp
+constants," for several reasons:
+
+ * String constants are more complicated to write and more difficult
+ to read. Using regexp constants makes your programs less
+ error-prone. Not understanding the difference between the two
+ kinds of constants is a common source of errors.
+
+ * It is more efficient to use regexp constants. `awk' can note that
+ you have supplied a regexp and store it internally in a form that
+ makes pattern matching more efficient. When using a string
+ constant, `awk' must first convert the string into this internal
+ form and then perform the pattern matching.
+
+ * Using regexp constants is better form; it shows clearly that you
+ intend a regexp match.
+
+ Using `\n' in Bracket Expressions of Dynamic Regexps
+
+ Some versions of `awk' do not allow the newline character to be used
+inside a bracket expression for a dynamic regexp:
+
+ $ awk '$0 ~ "[ \t\n]"'
+ error--> awk: newline in character class [
+ error--> ]...
+ error--> source line number 1
+ error--> context is
+ error--> >>> <<<
+
+ But a newline in a regexp constant works with no problem:
+
+ $ awk '$0 ~ /[ \t\n]/'
+ here is a sample line
+ -| here is a sample line
+ Ctrl-d
+
+ `gawk' does not have this problem, and it isn't likely to occur
+often in practice, but it's worth noting for future reference.
+
+
+File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Computed Regexps, Up: Regexp
+
+3.7 `gawk'-Specific Regexp Operators
====================================
GNU software that deals with regular expressions provides a number of
@@ -3812,9 +3926,9 @@ No options
default.

-File: gawk.info, Node: Case-sensitivity, Next: Leftmost Longest, Prev: GNU Regexp Operators, Up: Regexp
+File: gawk.info, Node: Case-sensitivity, Next: Regexp Summary, Prev: GNU Regexp Operators, Up: Regexp
-3.6 Case Sensitivity in Matching
+3.8 Case Sensitivity in Matching
================================
Case is normally significant in regular expressions, both when matching
@@ -3887,116 +4001,7 @@ obscure and we don't recommend it.
means that `gawk' does the right thing.

-File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Case-sensitivity, Up: Regexp
-
-3.7 How Much Text Matches?
-==========================
-
-Consider the following:
-
- echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
-
- This example uses the `sub()' function (which we haven't discussed
-yet; *note String Functions::) to make a change to the input record.
-Here, the regexp `/a+/' indicates "one or more `a' characters," and the
-replacement text is `<A>'.
-
- The input contains four `a' characters. `awk' (and POSIX) regular
-expressions always match the leftmost, _longest_ sequence of input
-characters that can match. Thus, all four `a' characters are replaced
-with `<A>' in this example:
-
- $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
- -| <A>bcd
-
- For simple match/no-match tests, this is not so important. But when
-doing text matching and substitutions with the `match()', `sub()',
-`gsub()', and `gensub()' functions, it is very important. *Note String
-Functions::, for more information on these functions. Understanding
-this principle is also important for regexp-based record and field
-splitting (*note Records::, and also *note Field Separators::).
-
-
-File: gawk.info, Node: Computed Regexps, Next: Regexp Summary, Prev: Leftmost Longest, Up: Regexp
-
-3.8 Using Dynamic Regexps
-=========================
-
-The righthand side of a `~' or `!~' operator need not be a regexp
-constant (i.e., a string of characters between slashes). It may be any
-expression. The expression is evaluated and converted to a string if
-necessary; the contents of the string are then used as the regexp. A
-regexp computed in this way is called a "dynamic regexp" or a "computed
-regexp":
-
- BEGIN { digits_regexp = "[[:digit:]]+" }
- $0 ~ digits_regexp { print }
-
-This sets `digits_regexp' to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
- NOTE: When using the `~' and `!~' operators, there is a difference
- between a regexp constant enclosed in slashes and a string
- constant enclosed in double quotes. If you are going to use a
- string constant, you have to understand that the string is, in
- essence, scanned _twice_: the first time when `awk' reads your
- program, and the second time when it goes to match the string on
- the lefthand side of the operator with the pattern on the right.
- This is true of any string-valued expression (such as
- `digits_regexp', shown previously), not just string constants.
-
- What difference does it make if the string is scanned twice? The
-answer has to do with escape sequences, and particularly with
-backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
- For example, `/\*/' is a regexp constant for a literal `*'. Only
-one backslash is needed. To do the same thing with a string, you have
-to type `"\\*"'. The first backslash escapes the second one so that
-the string actually contains the two characters `\' and `*'.
-
- Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is "regexp
-constants," for several reasons:
-
- * String constants are more complicated to write and more difficult
- to read. Using regexp constants makes your programs less
- error-prone. Not understanding the difference between the two
- kinds of constants is a common source of errors.
-
- * It is more efficient to use regexp constants. `awk' can note that
- you have supplied a regexp and store it internally in a form that
- makes pattern matching more efficient. When using a string
- constant, `awk' must first convert the string into this internal
- form and then perform the pattern matching.
-
- * Using regexp constants is better form; it shows clearly that you
- intend a regexp match.
-
- Using `\n' in Bracket Expressions of Dynamic Regexps
-
- Some versions of `awk' do not allow the newline character to be used
-inside a bracket expression for a dynamic regexp:
-
- $ awk '$0 ~ "[ \t\n]"'
- error--> awk: newline in character class [
- error--> ]...
- error--> source line number 1
- error--> context is
- error--> >>> <<<
-
- But a newline in a regexp constant works with no problem:
-
- $ awk '$0 ~ /[ \t\n]/'
- here is a sample line
- -| here is a sample line
- Ctrl-d
-
- `gawk' does not have this problem, and it isn't likely to occur
-often in practice, but it's worth noting for future reference.
-
-
-File: gawk.info, Node: Regexp Summary, Prev: Computed Regexps, Up: Regexp
+File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp
3.9 Summary
===========
@@ -5383,35 +5388,47 @@ input record and split it up into fields. This is useful if you've
finished processing the current record, but want to do some special
processing on the next record _right now_. For example:
+ # Remove text between /* and */, inclusive
{
- if ((t = index($0, "/*")) != 0) {
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) {
- if (getline <= 0) {
+ if ((i = index($0, "/*")) != 0) {
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) {
+ rest = substr(rest, j + 2) # remove comment
+ } else {
+ while (j == 0) {
+ # get more text
+ if (getline <= 0) {
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- }
- u = index($0, "*/")
- offset = 0
- }
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- }
- print $0
+ }
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) {
+ rest = substr(rest, j + 2)
+ break
+ }
+ }
+ }
+ # build up the output line using string concatenation
+ $0 = out rest
+ }
+ print $0
}
This `awk' program deletes C-style comments (`/* ... */') from the
-input. By replacing the `print $0' with other statements, you could
-perform more complicated processing on the decommented input, such as
-searching for matches of a regular expression. (This program has a
-subtle problem--it does not work if one comment ends and another begins
-on the same line.)
+input. It uses a number of features we haven't covered yet, including
+string concatenation (*note Concatenation::) and the `index()' and
+`substr()' built-in functions (*note String Functions::). By replacing
+the `print $0' with other statements, you could perform more
+complicated processing on the decommented input, such as searching for
+matches of a regular expression. (This program has a subtle
+problem--it does not work if one comment ends and another begins on the
+same line.)
This form of the `getline' command sets `NF', `NR', `FNR', `RT', and
the value of `$0'.
@@ -5975,8 +5992,8 @@ File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading File
2. *note Plain Getline::, presented a program to remove C-style
comments (`/* ... */') from the input. That program does not work
if one comment ends on one line and another one starts later on
- the same line. Write a program that does handle multiple comments
- on the line.
+ the same line. That can be fixed by making one simple change.
+ What is it?

@@ -7307,8 +7324,9 @@ File: gawk.info, Node: Regexp Constants, Prev: Nondecimal-numbers, Up: Consta
A regexp constant is a regular expression description enclosed in
slashes, such as `/^beginning and end$/'. Most regexps used in `awk'
programs are constant, but the `~' and `!~' matching operators can also
-match computed or dynamic regexps (which are just ordinary strings or
-variables that contain a regexp).
+match computed or dynamic regexps (which are typically just ordinary
+strings or variables that contain a regexp, but could be a more complex
+expression).

File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Values
@@ -8458,7 +8476,7 @@ following program is one way to print lines in between special
bracketing lines:
$1 == "START" { interested = ! interested; next }
- interested == 1 { print }
+ interested { print }
$1 == "END" { interested = ! interested; next }
The variable `interested', as with all `awk' variables, starts out
@@ -8468,6 +8486,14 @@ using `!'. The next rule prints lines as long as `interested' is true.
When a line is seen whose first field is `END', `interested' is toggled
back to false.(1)
+ Most commonly, the `!' operator is used in the conditions of `if'
+and `while' statements, where it often makes more sense to phrase the
+logic in the negative:
+
+ if (! SOME CONDITION || SOME OTHER CONDITION) {
+ ... DO WHATEVER PROCESSING ...
+ }
+
NOTE: The `next' statement is discussed in *note Next Statement::.
`next' tells `awk' to skip the rest of the rules, get the next
record, and start processing the rules over again at the top. The
@@ -9836,7 +9862,7 @@ reset to one, and processing starts over with the first rule in the
program. If the `nextfile' statement causes the end of the input to be
reached, then the code in any `END' rules is executed. An exception to
this is when `nextfile' is invoked during execution of any statement in
-an `END' rule; In this case, it causes the program to stop immediately.
+an `END' rule; in this case, it causes the program to stop immediately.
*Note BEGIN/END::.
The `nextfile' statement is useful when there are many data files to
@@ -9846,10 +9872,10 @@ would have to continue scanning the unwanted records. The `nextfile'
statement accomplishes this much more efficiently.
In `gawk', execution of `nextfile' causes additional things to
-happen: any `ENDFILE' rules are executed except in the case as
-mentioned below, `ARGIND' is incremented, and any `BEGINFILE' rules are
-executed. (`ARGIND' hasn't been introduced yet. *Note Built-in
-Variables::.)
+happen: any `ENDFILE' rules are executed if `gawk' is not currently in
+an `END' or `BEGINFILE' rule, `ARGIND' is incremented, and any
+`BEGINFILE' rules are executed. (`ARGIND' hasn't been introduced yet.
+*Note Built-in Variables::.)
With `gawk', `nextfile' is useful inside a `BEGINFILE' rule to skip
over a file that would otherwise cause `gawk' to exit with a fatal
@@ -11283,7 +11309,7 @@ might look like this:
> line 2
> line 3' | awk '{ l[lines] = $0; ++lines }
> END {
- > for (i = lines-1; i >= 0; --i)
+ > for (i = lines - 1; i >= 0; i--)
> print l[i]
> }'
-| line 3
@@ -11304,7 +11330,7 @@ following version of the program works correctly:
{ l[lines++] = $0 }
END {
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
}
@@ -14396,8 +14422,9 @@ versions of `awk':
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
@@ -14409,6 +14436,8 @@ versions of `awk':
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -14869,7 +14898,7 @@ that might be as follows:
This function reads from `file' one record at a time, building up
the full contents of the file in the local variable `contents'. It
-works, but is not necessarily efficient.
+works, but is not necessarily efficient.(1)
The following function, based on a suggestion by Denis Shirokov,
reads the entire contents of the named file in one shot:
@@ -14904,6 +14933,13 @@ string. Thus calling code may use something like:
This tests the result to see if it is empty or not. An equivalent
test would be `contents == ""'.
+ ---------- Footnotes ----------
+
+ (1) Execution time grows quadratically in the size of the input; for
+each record, `awk' has to allocate a bigger internal buffer for
+`contents', copy the old contents into it, and then append the contents
+of the new record.
+

File: gawk.info, Node: Data File Management, Next: Getopt Function, Prev: General Functions, Up: Library Functions
@@ -15357,8 +15393,7 @@ not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
@@ -30982,10 +31017,9 @@ Index
* ! (exclamation point), !~ operator <3>: Comparison Operators.
(line 11)
* ! (exclamation point), !~ operator <4>: Regexp Constants. (line 6)
-* ! (exclamation point), !~ operator <5>: Computed Regexps. (line 6)
-* ! (exclamation point), !~ operator <6>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <5>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <6>: Computed Regexps. (line 6)
* ! (exclamation point), !~ operator: Regexp Usage. (line 19)
-* " (double quote) in shell commands: Read Terminal. (line 25)
* " (double quote), in regexp constants: Computed Regexps. (line 29)
* " (double quote), in shell commands: Quoting. (line 54)
* # (number sign), #! (executable scripts): Executable Scripts.
@@ -31173,8 +31207,7 @@ Index
* ? (question mark), regexp operator: Regexp Operators. (line 111)
* [] (square brackets), regexp operator: Regexp Operators. (line 56)
* \ (backslash): Comments. (line 50)
-* \ (backslash) in shell commands: Read Terminal. (line 25)
-* \ (backslash), \" escape sequence: Escape Sequences. (line 82)
+* \ (backslash), \" escape sequence: Escape Sequences. (line 84)
* \ (backslash), \' operator (gawk): GNU Regexp Operators.
(line 56)
* \ (backslash), \/ escape sequence: Escape Sequences. (line 75)
@@ -31217,7 +31250,7 @@ Index
* \ (backslash), in bracket expressions: Bracket Expressions. (line 17)
* \ (backslash), in escape sequences: Escape Sequences. (line 6)
* \ (backslash), in escape sequences, POSIX and: Escape Sequences.
- (line 118)
+ (line 120)
* \ (backslash), in regexp constants: Computed Regexps. (line 29)
* \ (backslash), in shell commands: Quoting. (line 48)
* \ (backslash), regexp operator: Regexp Operators. (line 18)
@@ -31444,8 +31477,7 @@ Index
* awkvars.out file: Options. (line 93)
* b debugger command (alias for break): Breakpoint Control. (line 11)
* backslash (\): Comments. (line 50)
-* backslash (\) in shell commands: Read Terminal. (line 25)
-* backslash (\), \" escape sequence: Escape Sequences. (line 82)
+* backslash (\), \" escape sequence: Escape Sequences. (line 84)
* backslash (\), \' operator (gawk): GNU Regexp Operators.
(line 56)
* backslash (\), \/ escape sequence: Escape Sequences. (line 75)
@@ -31488,7 +31520,7 @@ Index
* backslash (\), in bracket expressions: Bracket Expressions. (line 17)
* backslash (\), in escape sequences: Escape Sequences. (line 6)
* backslash (\), in escape sequences, POSIX and: Escape Sequences.
- (line 118)
+ (line 120)
* backslash (\), in regexp constants: Computed Regexps. (line 29)
* backslash (\), in shell commands: Quoting. (line 48)
* backslash (\), regexp operator: Regexp Operators. (line 18)
@@ -31593,7 +31625,7 @@ Index
(line 67)
* Brian Kernighan's awk <12>: GNU Regexp Operators.
(line 83)
-* Brian Kernighan's awk <13>: Escape Sequences. (line 122)
+* Brian Kernighan's awk <13>: Escape Sequences. (line 124)
* Brian Kernighan's awk: When. (line 21)
* Brian Kernighan's awk, extensions: BTL. (line 6)
* Brian Kernighan's awk, source code: Other Versions. (line 13)
@@ -31621,6 +31653,7 @@ Index
* built-in variables, conveying information: Auto-set. (line 6)
* built-in variables, user-modifiable: User-modified. (line 6)
* Busybox Awk: Other Versions. (line 88)
+* c.e., See common extensions: Conventions. (line 51)
* call by reference: Pass By Value/Reference.
(line 47)
* call by value: Pass By Value/Reference.
@@ -31819,9 +31852,9 @@ Index
* dark corner, command-line arguments: Assignment Options. (line 43)
* dark corner, continue statement: Continue Statement. (line 44)
* dark corner, CONVFMT variable: Strings And Numbers. (line 40)
-* dark corner, escape sequences: Other Arguments. (line 31)
+* dark corner, escape sequences: Other Arguments. (line 35)
* dark corner, escape sequences, for metacharacters: Escape Sequences.
- (line 140)
+ (line 142)
* dark corner, exit statement: Exit Statement. (line 30)
* dark corner, field separators: Field Splitting Summary.
(line 46)
@@ -32087,7 +32120,6 @@ Index
* dollar sign ($), incrementing fields and arrays: Increment Ops.
(line 30)
* dollar sign ($), regexp operator: Regexp Operators. (line 35)
-* double quote (") in shell commands: Read Terminal. (line 25)
* double quote ("), in regexp constants: Computed Regexps. (line 29)
* double quote ("), in shell commands: Quoting. (line 54)
* down debugger command: Execution Stack. (line 21)
@@ -32187,8 +32219,8 @@ Index
* exclamation point (!), !~ operator <3>: Comparison Operators.
(line 11)
* exclamation point (!), !~ operator <4>: Regexp Constants. (line 6)
-* exclamation point (!), !~ operator <5>: Computed Regexps. (line 6)
-* exclamation point (!), !~ operator <6>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <5>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <6>: Computed Regexps. (line 6)
* exclamation point (!), !~ operator: Regexp Usage. (line 19)
* exit statement: Exit Statement. (line 6)
* exit status, of gawk: Exit Status. (line 6)
@@ -32196,7 +32228,7 @@ Index
* exit the debugger: Miscellaneous Debugger Commands.
(line 99)
* exp: Numeric Functions. (line 33)
-* expand utility: Very Simple. (line 69)
+* expand utility: Very Simple. (line 72)
* Expat XML parser library: gawkextlib. (line 35)
* exponent: Numeric Functions. (line 33)
* expressions: Expressions. (line 6)
@@ -32330,7 +32362,7 @@ Index
(line 47)
* files, message object, specifying directory of: Explaining gettext.
(line 54)
-* files, multiple passes over: Other Arguments. (line 49)
+* files, multiple passes over: Other Arguments. (line 53)
* files, multiple, duplicating output into: Tee Program. (line 6)
* files, output, See output files: Close Files And Pipes.
(line 6)
@@ -32491,7 +32523,7 @@ Index
* gawk, ERRNO variable in <4>: Close Files And Pipes.
(line 139)
* gawk, ERRNO variable in: Getline. (line 19)
-* gawk, escape sequences: Escape Sequences. (line 130)
+* gawk, escape sequences: Escape Sequences. (line 132)
* gawk, extensions, disabling: Options. (line 252)
* gawk, features, adding: Adding Code. (line 6)
* gawk, features, advanced: Advanced Features. (line 6)
@@ -32715,7 +32747,7 @@ Index
* input files, examples: Sample Data Files. (line 6)
* input files, reading: Reading Files. (line 6)
* input files, running awk without: Read Terminal. (line 6)
-* input files, variable assignments and: Other Arguments. (line 19)
+* input files, variable assignments and: Other Arguments. (line 23)
* input pipeline: Getline/Pipe. (line 9)
* input record, length of: String Functions. (line 174)
* input redirection: Getline/File. (line 6)
@@ -32924,7 +32956,7 @@ Index
* mawk utility <2>: Nextfile Statement. (line 47)
* mawk utility <3>: Concatenation. (line 36)
* mawk utility <4>: Getline/Pipe. (line 62)
-* mawk utility: Escape Sequences. (line 130)
+* mawk utility: Escape Sequences. (line 132)
* maximum precision supported by MPFR library: Auto-set. (line 221)
* McIlroy, Doug: Glossary. (line 149)
* McPhee, Patrick: Contributors. (line 100)
@@ -32937,7 +32969,7 @@ Index
(line 54)
* messages from extensions: Printing Messages. (line 6)
* metacharacters in regular expressions: Regexp Operators. (line 6)
-* metacharacters, escape sequences for: Escape Sequences. (line 136)
+* metacharacters, escape sequences for: Escape Sequences. (line 138)
* minimum precision supported by MPFR library: Auto-set. (line 224)
* mktime: Time Functions. (line 25)
* modifiers, in format specifiers: Format Modifiers. (line 6)
@@ -32975,7 +33007,7 @@ Index
(line 43)
* next file statement: Feature History. (line 169)
* next statement <1>: Next Statement. (line 6)
-* next statement: Boolean Ops. (line 85)
+* next statement: Boolean Ops. (line 93)
* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36)
* next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 49)
@@ -33156,14 +33188,14 @@ Index
* plus sign (+), += operator: Assignment Ops. (line 82)
* plus sign (+), regexp operator: Regexp Operators. (line 105)
* pointers to functions: Indirect Calls. (line 6)
-* portability: Escape Sequences. (line 100)
+* portability: Escape Sequences. (line 102)
* portability, #! (executable scripts): Executable Scripts. (line 33)
* portability, ** operator and: Arithmetic Ops. (line 81)
* portability, **= operator and: Assignment Ops. (line 143)
* portability, ARGV variable: Executable Scripts. (line 59)
* portability, backslash continuation and: Statements/Lines. (line 30)
* portability, backslash in escape sequences: Escape Sequences.
- (line 118)
+ (line 120)
* portability, close() function and: Close Files And Pipes.
(line 81)
* portability, data files as single record: gawk split records.
@@ -33202,7 +33234,7 @@ Index
* POSIX awk, < operator and: Getline/File. (line 26)
* POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30)
* POSIX awk, backslashes in string constants: Escape Sequences.
- (line 118)
+ (line 120)
* POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 16)
* POSIX awk, bracket expressions and: Bracket Expressions. (line 26)
* POSIX awk, bracket expressions and, character classes: Bracket Expressions.
@@ -33546,7 +33578,6 @@ Index
* set watchpoint: Viewing And Changing Data.
(line 67)
* shadowing of variable values: Definition Syntax. (line 70)
-* shell quoting, double quote: Read Terminal. (line 25)
* shell quoting, rules for: Quoting. (line 6)
* shells, piping commands into: Redirection. (line 142)
* shells, quoting: Using Shell Variables.
@@ -33583,14 +33614,14 @@ Index
* sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers.
(line 64)
* sidebar, Backslash Before Regular Characters: Escape Sequences.
- (line 116)
+ (line 118)
* sidebar, Changing FS Does Not Affect the Fields: Field Splitting Summary.
(line 38)
* sidebar, Changing NR and FNR: Auto-set. (line 307)
* sidebar, Controlling Output Buffering with system(): I/O Functions.
(line 138)
* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
- (line 134)
+ (line 136)
* sidebar, FS and IGNORECASE: Field Splitting Summary.
(line 64)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
@@ -33790,8 +33821,8 @@ Index
* tilde (~), ~ operator <3>: Comparison Operators.
(line 11)
* tilde (~), ~ operator <4>: Regexp Constants. (line 6)
-* tilde (~), ~ operator <5>: Computed Regexps. (line 6)
-* tilde (~), ~ operator <6>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <5>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <6>: Computed Regexps. (line 6)
* tilde (~), ~ operator: Regexp Usage. (line 19)
* time functions: Time Functions. (line 6)
* time, alarm clock example program: Alarm Program. (line 11)
@@ -33818,7 +33849,7 @@ Index
(line 37)
* troubleshooting, awk uses FS not IFS: Field Separators. (line 30)
* troubleshooting, backslash before nonspecial character: Escape Sequences.
- (line 118)
+ (line 120)
* troubleshooting, division: Arithmetic Ops. (line 44)
* troubleshooting, fatal errors, field widths, specifying: Constant Size.
(line 23)
@@ -33874,7 +33905,7 @@ Index
* uniq.awk program: Uniq Program. (line 65)
* Unix: Glossary. (line 611)
* Unix awk, backslashes in escape sequences: Escape Sequences.
- (line 130)
+ (line 132)
* Unix awk, close() function and: Close Files And Pipes.
(line 131)
* Unix awk, password files, field separators and: Command Line Field Separator.
@@ -33897,7 +33928,7 @@ Index
* USR1 signal, for dynamic profiling: Profiling. (line 188)
* values, numeric: Basic Data Typing. (line 13)
* values, string: Basic Data Typing. (line 13)
-* variable assignments and input files: Other Arguments. (line 19)
+* variable assignments and input files: Other Arguments. (line 23)
* variable typing: Typing and Comparison.
(line 9)
* variables <1>: Basic Data Typing. (line 6)
@@ -34011,8 +34042,8 @@ Index
* ~ (tilde), ~ operator <3>: Comparison Operators.
(line 11)
* ~ (tilde), ~ operator <4>: Regexp Constants. (line 6)
-* ~ (tilde), ~ operator <5>: Computed Regexps. (line 6)
-* ~ (tilde), ~ operator <6>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <5>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <6>: Computed Regexps. (line 6)
* ~ (tilde), ~ operator: Regexp Usage. (line 19)
@@ -34038,533 +34069,533 @@ Node: Getting Started70581
Node: Running gawk73015
Node: One-shot74205
Node: Read Terminal75430
-Ref: Read Terminal-Footnote-177393
-Node: Long77564
-Node: Executable Scripts78958
-Ref: Executable Scripts-Footnote-181759
-Node: Comments81861
-Node: Quoting84334
-Node: DOS Quoting89647
-Node: Sample Data Files90322
-Node: Very Simple92929
-Node: Two Rules97688
-Node: More Complex99582
-Ref: More Complex-Footnote-1102496
-Node: Statements/Lines102581
-Ref: Statements/Lines-Footnote-1107037
-Node: Other Features107302
-Node: When108230
-Ref: When-Footnote-1109986
-Node: Intro Summary110051
-Node: Invoking Gawk110934
-Node: Command Line112449
-Node: Options113240
-Ref: Options-Footnote-1128887
-Node: Other Arguments128912
-Node: Naming Standard Input131574
-Node: Environment Variables132667
-Node: AWKPATH Variable133225
-Ref: AWKPATH Variable-Footnote-1136091
-Ref: AWKPATH Variable-Footnote-2136136
-Node: AWKLIBPATH Variable136396
-Node: Other Environment Variables137155
-Node: Exit Status140607
-Node: Include Files141282
-Node: Loading Shared Libraries144860
-Node: Obsolete146244
-Node: Undocumented146941
-Node: Invoking Summary147208
-Node: Regexp148808
-Node: Regexp Usage150267
-Node: Escape Sequences152300
-Node: Regexp Operators158200
-Ref: Regexp Operators-Footnote-1165631
-Ref: Regexp Operators-Footnote-2165778
-Node: Bracket Expressions165876
-Ref: table-char-classes167898
-Node: GNU Regexp Operators170838
-Node: Case-sensitivity174547
-Ref: Case-sensitivity-Footnote-1177439
-Ref: Case-sensitivity-Footnote-2177674
-Node: Leftmost Longest177782
-Node: Computed Regexps178983
-Node: Regexp Summary182355
-Node: Reading Files183824
-Node: Records185916
-Node: awk split records186638
-Node: gawk split records191496
-Ref: gawk split records-Footnote-1196017
-Node: Fields196054
-Ref: Fields-Footnote-1199018
-Node: Nonconstant Fields199104
-Ref: Nonconstant Fields-Footnote-1201334
-Node: Changing Fields201536
-Node: Field Separators207490
-Node: Default Field Splitting210192
-Node: Regexp Field Splitting211309
-Node: Single Character Fields214636
-Node: Command Line Field Separator215695
-Node: Full Line Fields219121
-Ref: Full Line Fields-Footnote-1219629
-Node: Field Splitting Summary219675
-Ref: Field Splitting Summary-Footnote-1222807
-Node: Constant Size222908
-Node: Splitting By Content227514
-Ref: Splitting By Content-Footnote-1231587
-Node: Multiple Line231627
-Ref: Multiple Line-Footnote-1237483
-Node: Getline237662
-Node: Plain Getline239873
-Node: Getline/Variable241968
-Node: Getline/File243115
-Node: Getline/Variable/File244499
-Ref: Getline/Variable/File-Footnote-1246098
-Node: Getline/Pipe246185
-Node: Getline/Variable/Pipe248871
-Node: Getline/Coprocess249978
-Node: Getline/Variable/Coprocess251230
-Node: Getline Notes251967
-Node: Getline Summary254771
-Ref: table-getline-variants255179
-Node: Read Timeout256091
-Ref: Read Timeout-Footnote-1259918
-Node: Command-line directories259976
-Node: Input Summary260880
-Node: Input Exercises264017
-Node: Printing264750
-Node: Print266472
-Node: Print Examples267965
-Node: Output Separators270744
-Node: OFMT272760
-Node: Printf274118
-Node: Basic Printf275024
-Node: Control Letters276563
-Node: Format Modifiers280554
-Node: Printf Examples286581
-Node: Redirection289045
-Node: Special Files296017
-Node: Special FD296550
-Ref: Special FD-Footnote-1300147
-Node: Special Network300221
-Node: Special Caveats301071
-Node: Close Files And Pipes301867
-Ref: Close Files And Pipes-Footnote-1309028
-Ref: Close Files And Pipes-Footnote-2309176
-Node: Output Summary309326
-Node: Output Exercises310323
-Node: Expressions311003
-Node: Values312188
-Node: Constants312864
-Node: Scalar Constants313544
-Ref: Scalar Constants-Footnote-1314403
-Node: Nondecimal-numbers314653
-Node: Regexp Constants317653
-Node: Using Constant Regexps318128
-Node: Variables321200
-Node: Using Variables321855
-Node: Assignment Options323579
-Node: Conversion325454
-Node: Strings And Numbers325978
-Ref: Strings And Numbers-Footnote-1329040
-Node: Locale influences conversions329149
-Ref: table-locale-affects331866
-Node: All Operators332454
-Node: Arithmetic Ops333084
-Node: Concatenation335589
-Ref: Concatenation-Footnote-1338408
-Node: Assignment Ops338514
-Ref: table-assign-ops343497
-Node: Increment Ops344800
-Node: Truth Values and Conditions348238
-Node: Truth Values349321
-Node: Typing and Comparison350370
-Node: Variable Typing351163
-Node: Comparison Operators354815
-Ref: table-relational-ops355225
-Node: POSIX String Comparison358775
-Ref: POSIX String Comparison-Footnote-1359859
-Node: Boolean Ops359997
-Ref: Boolean Ops-Footnote-1364072
-Node: Conditional Exp364163
-Node: Function Calls365890
-Node: Precedence369770
-Node: Locales373439
-Node: Expressions Summary375070
-Node: Patterns and Actions377611
-Node: Pattern Overview378727
-Node: Regexp Patterns380404
-Node: Expression Patterns380947
-Node: Ranges384727
-Node: BEGIN/END387833
-Node: Using BEGIN/END388595
-Ref: Using BEGIN/END-Footnote-1391331
-Node: I/O And BEGIN/END391437
-Node: BEGINFILE/ENDFILE393708
-Node: Empty396639
-Node: Using Shell Variables396956
-Node: Action Overview399239
-Node: Statements401566
-Node: If Statement403414
-Node: While Statement404912
-Node: Do Statement406956
-Node: For Statement408112
-Node: Switch Statement411264
-Node: Break Statement413652
-Node: Continue Statement415693
-Node: Next Statement417518
-Node: Nextfile Statement419908
-Node: Exit Statement422544
-Node: Built-in Variables424948
-Node: User-modified426075
-Ref: User-modified-Footnote-1433764
-Node: Auto-set433826
-Ref: Auto-set-Footnote-1446745
-Ref: Auto-set-Footnote-2446950
-Node: ARGC and ARGV447006
-Node: Pattern Action Summary450910
-Node: Arrays453133
-Node: Array Basics454682
-Node: Array Intro455508
-Ref: figure-array-elements457481
-Ref: Array Intro-Footnote-1460005
-Node: Reference to Elements460133
-Node: Assigning Elements462583
-Node: Array Example463074
-Node: Scanning an Array464806
-Node: Controlling Scanning467807
-Ref: Controlling Scanning-Footnote-1472980
-Node: Delete473296
-Ref: Delete-Footnote-1476047
-Node: Numeric Array Subscripts476104
-Node: Uninitialized Subscripts478287
-Node: Multidimensional479912
-Node: Multiscanning483025
-Node: Arrays of Arrays484614
-Node: Arrays Summary489277
-Node: Functions491382
-Node: Built-in492255
-Node: Calling Built-in493333
-Node: Numeric Functions495321
-Ref: Numeric Functions-Footnote-1500157
-Ref: Numeric Functions-Footnote-2500514
-Ref: Numeric Functions-Footnote-3500562
-Node: String Functions500831
-Ref: String Functions-Footnote-1523828
-Ref: String Functions-Footnote-2523957
-Ref: String Functions-Footnote-3524205
-Node: Gory Details524292
-Ref: table-sub-escapes526065
-Ref: table-sub-proposed527585
-Ref: table-posix-sub528949
-Ref: table-gensub-escapes530489
-Ref: Gory Details-Footnote-1531665
-Node: I/O Functions531816
-Ref: I/O Functions-Footnote-1538926
-Node: Time Functions539073
-Ref: Time Functions-Footnote-1549537
-Ref: Time Functions-Footnote-2549605
-Ref: Time Functions-Footnote-3549763
-Ref: Time Functions-Footnote-4549874
-Ref: Time Functions-Footnote-5549986
-Ref: Time Functions-Footnote-6550213
-Node: Bitwise Functions550479
-Ref: table-bitwise-ops551041
-Ref: Bitwise Functions-Footnote-1555286
-Node: Type Functions555470
-Node: I18N Functions556612
-Node: User-defined558257
-Node: Definition Syntax559061
-Ref: Definition Syntax-Footnote-1564374
-Node: Function Example564443
-Ref: Function Example-Footnote-1567083
-Node: Function Caveats567105
-Node: Calling A Function567623
-Node: Variable Scope568578
-Node: Pass By Value/Reference571566
-Node: Return Statement575076
-Node: Dynamic Typing578060
-Node: Indirect Calls578989
-Node: Functions Summary588702
-Node: Library Functions591241
-Ref: Library Functions-Footnote-1594859
-Ref: Library Functions-Footnote-2595002
-Node: Library Names595173
-Ref: Library Names-Footnote-1598646
-Ref: Library Names-Footnote-2598866
-Node: General Functions598952
-Node: Strtonum Function599980
-Node: Assert Function602760
-Node: Round Function606086
-Node: Cliff Random Function607627
-Node: Ordinal Functions608643
-Ref: Ordinal Functions-Footnote-1611708
-Ref: Ordinal Functions-Footnote-2611960
-Node: Join Function612171
-Ref: Join Function-Footnote-1613942
-Node: Getlocaltime Function614142
-Node: Readfile Function617878
-Node: Data File Management619717
-Node: Filetrans Function620349
-Node: Rewind Function624418
-Node: File Checking625976
-Ref: File Checking-Footnote-1627108
-Node: Empty Files627309
-Node: Ignoring Assigns629288
-Node: Getopt Function630842
-Ref: Getopt Function-Footnote-1642145
-Node: Passwd Functions642348
-Ref: Passwd Functions-Footnote-1651327
-Node: Group Functions651415
-Ref: Group Functions-Footnote-1659346
-Node: Walking Arrays659559
-Node: Library Functions Summary661162
-Node: Library Exercises662550
-Node: Sample Programs663830
-Node: Running Examples664600
-Node: Clones665328
-Node: Cut Program666552
-Node: Egrep Program676410
-Ref: Egrep Program-Footnote-1683997
-Node: Id Program684107
-Node: Split Program687761
-Ref: Split Program-Footnote-1691299
-Node: Tee Program691427
-Node: Uniq Program694214
-Node: Wc Program701635
-Ref: Wc Program-Footnote-1705900
-Node: Miscellaneous Programs705992
-Node: Dupword Program707205
-Node: Alarm Program709236
-Node: Translate Program714040
-Ref: Translate Program-Footnote-1718431
-Ref: Translate Program-Footnote-2718701
-Node: Labels Program718835
-Ref: Labels Program-Footnote-1722196
-Node: Word Sorting722280
-Node: History Sorting726323
-Node: Extract Program728159
-Node: Simple Sed735695
-Node: Igawk Program738757
-Ref: Igawk Program-Footnote-1753061
-Ref: Igawk Program-Footnote-2753262
-Node: Anagram Program753400
-Node: Signature Program756468
-Node: Programs Summary757715
-Node: Programs Exercises758930
-Node: Advanced Features762581
-Node: Nondecimal Data764529
-Node: Array Sorting766106
-Node: Controlling Array Traversal766803
-Node: Array Sorting Functions775083
-Ref: Array Sorting Functions-Footnote-1778990
-Node: Two-way I/O779184
-Ref: Two-way I/O-Footnote-1784128
-Ref: Two-way I/O-Footnote-2784307
-Node: TCP/IP Networking784389
-Node: Profiling787234
-Node: Advanced Features Summary794785
-Node: Internationalization796649
-Node: I18N and L10N798129
-Node: Explaining gettext798815
-Ref: Explaining gettext-Footnote-1803841
-Ref: Explaining gettext-Footnote-2804025
-Node: Programmer i18n804190
-Ref: Programmer i18n-Footnote-1808984
-Node: Translator i18n809033
-Node: String Extraction809827
-Ref: String Extraction-Footnote-1810960
-Node: Printf Ordering811046
-Ref: Printf Ordering-Footnote-1813828
-Node: I18N Portability813892
-Ref: I18N Portability-Footnote-1816341
-Node: I18N Example816404
-Ref: I18N Example-Footnote-1819110
-Node: Gawk I18N819182
-Node: I18N Summary819820
-Node: Debugger821159
-Node: Debugging822181
-Node: Debugging Concepts822622
-Node: Debugging Terms824478
-Node: Awk Debugging827075
-Node: Sample Debugging Session827967
-Node: Debugger Invocation828487
-Node: Finding The Bug829820
-Node: List of Debugger Commands836302
-Node: Breakpoint Control837634
-Node: Debugger Execution Control841298
-Node: Viewing And Changing Data844658
-Node: Execution Stack848016
-Node: Debugger Info849529
-Node: Miscellaneous Debugger Commands853523
-Node: Readline Support858707
-Node: Limitations859599
-Node: Debugging Summary861873
-Node: Arbitrary Precision Arithmetic863041
-Node: Computer Arithmetic864528
-Ref: Computer Arithmetic-Footnote-1868915
-Node: Math Definitions868972
-Ref: table-ieee-formats872261
-Ref: Math Definitions-Footnote-1872801
-Node: MPFR features872904
-Node: FP Math Caution874521
-Ref: FP Math Caution-Footnote-1875571
-Node: Inexactness of computations875940
-Node: Inexact representation876888
-Node: Comparing FP Values878243
-Node: Errors accumulate879207
-Node: Getting Accuracy880640
-Node: Try To Round883299
-Node: Setting precision884198
-Ref: table-predefined-precision-strings884880
-Node: Setting the rounding mode886673
-Ref: table-gawk-rounding-modes887037
-Ref: Setting the rounding mode-Footnote-1890491
-Node: Arbitrary Precision Integers890670
-Ref: Arbitrary Precision Integers-Footnote-1894443
-Node: POSIX Floating Point Problems894592
-Ref: POSIX Floating Point Problems-Footnote-1898468
-Node: Floating point summary898506
-Node: Dynamic Extensions900710
-Node: Extension Intro902262
-Node: Plugin License903527
-Node: Extension Mechanism Outline904212
-Ref: figure-load-extension904636
-Ref: figure-load-new-function906121
-Ref: figure-call-new-function907123
-Node: Extension API Description909107
-Node: Extension API Functions Introduction910557
-Node: General Data Types915424
-Ref: General Data Types-Footnote-1921117
-Node: Requesting Values921416
-Ref: table-value-types-returned922153
-Node: Memory Allocation Functions923111
-Ref: Memory Allocation Functions-Footnote-1925858
-Node: Constructor Functions925954
-Node: Registration Functions927712
-Node: Extension Functions928397
-Node: Exit Callback Functions930699
-Node: Extension Version String931947
-Node: Input Parsers932597
-Node: Output Wrappers942411
-Node: Two-way processors946927
-Node: Printing Messages949131
-Ref: Printing Messages-Footnote-1950208
-Node: Updating `ERRNO'950360
-Node: Accessing Parameters951099
-Node: Symbol Table Access952329
-Node: Symbol table by name952843
-Node: Symbol table by cookie954819
-Ref: Symbol table by cookie-Footnote-1958952
-Node: Cached values959015
-Ref: Cached values-Footnote-1962519
-Node: Array Manipulation962610
-Ref: Array Manipulation-Footnote-1963708
-Node: Array Data Types963747
-Ref: Array Data Types-Footnote-1966450
-Node: Array Functions966542
-Node: Flattening Arrays970416
-Node: Creating Arrays977268
-Node: Extension API Variables981999
-Node: Extension Versioning982635
-Node: Extension API Informational Variables984536
-Node: Extension API Boilerplate985622
-Node: Finding Extensions989426
-Node: Extension Example989986
-Node: Internal File Description990716
-Node: Internal File Ops994807
-Ref: Internal File Ops-Footnote-11006239
-Node: Using Internal File Ops1006379
-Ref: Using Internal File Ops-Footnote-11008726
-Node: Extension Samples1008994
-Node: Extension Sample File Functions1010518
-Node: Extension Sample Fnmatch1018086
-Node: Extension Sample Fork1019568
-Node: Extension Sample Inplace1020781
-Node: Extension Sample Ord1022456
-Node: Extension Sample Readdir1023292
-Ref: table-readdir-file-types1024148
-Node: Extension Sample Revout1024947
-Node: Extension Sample Rev2way1025538
-Node: Extension Sample Read write array1026279
-Node: Extension Sample Readfile1028158
-Node: Extension Sample API Tests1029258
-Node: Extension Sample Time1029783
-Node: gawkextlib1031098
-Node: Extension summary1033911
-Node: Extension Exercises1037604
-Node: Language History1038326
-Node: V7/SVR3.11039969
-Node: SVR41042289
-Node: POSIX1043731
-Node: BTL1045117
-Node: POSIX/GNU1045851
-Node: Feature History1051627
-Node: Common Extensions1064718
-Node: Ranges and Locales1066030
-Ref: Ranges and Locales-Footnote-11070647
-Ref: Ranges and Locales-Footnote-21070674
-Ref: Ranges and Locales-Footnote-31070908
-Node: Contributors1071129
-Node: History summary1076554
-Node: Installation1077923
-Node: Gawk Distribution1078874
-Node: Getting1079358
-Node: Extracting1080182
-Node: Distribution contents1081824
-Node: Unix Installation1087594
-Node: Quick Installation1088211
-Node: Additional Configuration Options1090653
-Node: Configuration Philosophy1092391
-Node: Non-Unix Installation1094742
-Node: PC Installation1095200
-Node: PC Binary Installation1096511
-Node: PC Compiling1098359
-Ref: PC Compiling-Footnote-11101358
-Node: PC Testing1101463
-Node: PC Using1102639
-Node: Cygwin1106791
-Node: MSYS1107600
-Node: VMS Installation1108114
-Node: VMS Compilation1108910
-Ref: VMS Compilation-Footnote-11110132
-Node: VMS Dynamic Extensions1110190
-Node: VMS Installation Details1111563
-Node: VMS Running1113815
-Node: VMS GNV1116649
-Node: VMS Old Gawk1117372
-Node: Bugs1117842
-Node: Other Versions1121846
-Node: Installation summary1128073
-Node: Notes1129129
-Node: Compatibility Mode1129994
-Node: Additions1130776
-Node: Accessing The Source1131701
-Node: Adding Code1133137
-Node: New Ports1139315
-Node: Derived Files1143796
-Ref: Derived Files-Footnote-11148877
-Ref: Derived Files-Footnote-21148911
-Ref: Derived Files-Footnote-31149507
-Node: Future Extensions1149621
-Node: Implementation Limitations1150227
-Node: Extension Design1151475
-Node: Old Extension Problems1152629
-Ref: Old Extension Problems-Footnote-11154146
-Node: Extension New Mechanism Goals1154203
-Ref: Extension New Mechanism Goals-Footnote-11157563
-Node: Extension Other Design Decisions1157752
-Node: Extension Future Growth1159858
-Node: Old Extension Mechanism1160694
-Node: Notes summary1162456
-Node: Basic Concepts1163642
-Node: Basic High Level1164323
-Ref: figure-general-flow1164595
-Ref: figure-process-flow1165194
-Ref: Basic High Level-Footnote-11168423
-Node: Basic Data Typing1168608
-Node: Glossary1171936
-Node: Copying1197088
-Node: GNU Free Documentation License1234644
-Node: Index1259780
+Node: Long77455
+Node: Executable Scripts78849
+Ref: Executable Scripts-Footnote-181650
+Node: Comments81752
+Node: Quoting84225
+Node: DOS Quoting89538
+Node: Sample Data Files90213
+Node: Very Simple92820
+Node: Two Rules97705
+Node: More Complex99599
+Ref: More Complex-Footnote-1102513
+Node: Statements/Lines102598
+Ref: Statements/Lines-Footnote-1107054
+Node: Other Features107319
+Node: When108247
+Ref: When-Footnote-1110003
+Node: Intro Summary110068
+Node: Invoking Gawk110951
+Node: Command Line112466
+Node: Options113257
+Ref: Options-Footnote-1128904
+Node: Other Arguments128929
+Node: Naming Standard Input131757
+Node: Environment Variables132850
+Node: AWKPATH Variable133408
+Ref: AWKPATH Variable-Footnote-1136274
+Ref: AWKPATH Variable-Footnote-2136319
+Node: AWKLIBPATH Variable136579
+Node: Other Environment Variables137338
+Node: Exit Status140790
+Node: Include Files141465
+Node: Loading Shared Libraries145043
+Node: Obsolete146427
+Node: Undocumented147124
+Node: Invoking Summary147391
+Node: Regexp148991
+Node: Regexp Usage150450
+Node: Escape Sequences152483
+Node: Regexp Operators158554
+Ref: Regexp Operators-Footnote-1165985
+Ref: Regexp Operators-Footnote-2166132
+Node: Bracket Expressions166230
+Ref: table-char-classes168248
+Node: Leftmost Longest171188
+Node: Computed Regexps172392
+Node: GNU Regexp Operators175770
+Node: Case-sensitivity179476
+Ref: Case-sensitivity-Footnote-1182366
+Ref: Case-sensitivity-Footnote-2182601
+Node: Regexp Summary182709
+Node: Reading Files184178
+Node: Records186270
+Node: awk split records186992
+Node: gawk split records191850
+Ref: gawk split records-Footnote-1196371
+Node: Fields196408
+Ref: Fields-Footnote-1199372
+Node: Nonconstant Fields199458
+Ref: Nonconstant Fields-Footnote-1201688
+Node: Changing Fields201890
+Node: Field Separators207844
+Node: Default Field Splitting210546
+Node: Regexp Field Splitting211663
+Node: Single Character Fields214990
+Node: Command Line Field Separator216049
+Node: Full Line Fields219475
+Ref: Full Line Fields-Footnote-1219983
+Node: Field Splitting Summary220029
+Ref: Field Splitting Summary-Footnote-1223161
+Node: Constant Size223262
+Node: Splitting By Content227868
+Ref: Splitting By Content-Footnote-1231941
+Node: Multiple Line231981
+Ref: Multiple Line-Footnote-1237837
+Node: Getline238016
+Node: Plain Getline240227
+Node: Getline/Variable242933
+Node: Getline/File244080
+Node: Getline/Variable/File245464
+Ref: Getline/Variable/File-Footnote-1247063
+Node: Getline/Pipe247150
+Node: Getline/Variable/Pipe249836
+Node: Getline/Coprocess250943
+Node: Getline/Variable/Coprocess252195
+Node: Getline Notes252932
+Node: Getline Summary255736
+Ref: table-getline-variants256144
+Node: Read Timeout257056
+Ref: Read Timeout-Footnote-1260883
+Node: Command-line directories260941
+Node: Input Summary261845
+Node: Input Exercises264982
+Node: Printing265710
+Node: Print267432
+Node: Print Examples268925
+Node: Output Separators271704
+Node: OFMT273720
+Node: Printf275078
+Node: Basic Printf275984
+Node: Control Letters277523
+Node: Format Modifiers281514
+Node: Printf Examples287541
+Node: Redirection290005
+Node: Special Files296977
+Node: Special FD297510
+Ref: Special FD-Footnote-1301107
+Node: Special Network301181
+Node: Special Caveats302031
+Node: Close Files And Pipes302827
+Ref: Close Files And Pipes-Footnote-1309988
+Ref: Close Files And Pipes-Footnote-2310136
+Node: Output Summary310286
+Node: Output Exercises311283
+Node: Expressions311963
+Node: Values313148
+Node: Constants313824
+Node: Scalar Constants314504
+Ref: Scalar Constants-Footnote-1315363
+Node: Nondecimal-numbers315613
+Node: Regexp Constants318613
+Node: Using Constant Regexps319138
+Node: Variables322210
+Node: Using Variables322865
+Node: Assignment Options324589
+Node: Conversion326464
+Node: Strings And Numbers326988
+Ref: Strings And Numbers-Footnote-1330050
+Node: Locale influences conversions330159
+Ref: table-locale-affects332876
+Node: All Operators333464
+Node: Arithmetic Ops334094
+Node: Concatenation336599
+Ref: Concatenation-Footnote-1339418
+Node: Assignment Ops339524
+Ref: table-assign-ops344507
+Node: Increment Ops345810
+Node: Truth Values and Conditions349248
+Node: Truth Values350331
+Node: Typing and Comparison351380
+Node: Variable Typing352173
+Node: Comparison Operators355825
+Ref: table-relational-ops356235
+Node: POSIX String Comparison359785
+Ref: POSIX String Comparison-Footnote-1360869
+Node: Boolean Ops361007
+Ref: Boolean Ops-Footnote-1365346
+Node: Conditional Exp365437
+Node: Function Calls367164
+Node: Precedence371044
+Node: Locales374713
+Node: Expressions Summary376344
+Node: Patterns and Actions378885
+Node: Pattern Overview380001
+Node: Regexp Patterns381678
+Node: Expression Patterns382221
+Node: Ranges386001
+Node: BEGIN/END389107
+Node: Using BEGIN/END389869
+Ref: Using BEGIN/END-Footnote-1392605
+Node: I/O And BEGIN/END392711
+Node: BEGINFILE/ENDFILE394982
+Node: Empty397913
+Node: Using Shell Variables398230
+Node: Action Overview400513
+Node: Statements402840
+Node: If Statement404688
+Node: While Statement406186
+Node: Do Statement408230
+Node: For Statement409386
+Node: Switch Statement412538
+Node: Break Statement414926
+Node: Continue Statement416967
+Node: Next Statement418792
+Node: Nextfile Statement421182
+Node: Exit Statement423839
+Node: Built-in Variables426243
+Node: User-modified427370
+Ref: User-modified-Footnote-1435059
+Node: Auto-set435121
+Ref: Auto-set-Footnote-1448040
+Ref: Auto-set-Footnote-2448245
+Node: ARGC and ARGV448301
+Node: Pattern Action Summary452205
+Node: Arrays454428
+Node: Array Basics455977
+Node: Array Intro456803
+Ref: figure-array-elements458776
+Ref: Array Intro-Footnote-1461300
+Node: Reference to Elements461428
+Node: Assigning Elements463878
+Node: Array Example464369
+Node: Scanning an Array466101
+Node: Controlling Scanning469102
+Ref: Controlling Scanning-Footnote-1474275
+Node: Delete474591
+Ref: Delete-Footnote-1477342
+Node: Numeric Array Subscripts477399
+Node: Uninitialized Subscripts479582
+Node: Multidimensional481209
+Node: Multiscanning484322
+Node: Arrays of Arrays485911
+Node: Arrays Summary490574
+Node: Functions492679
+Node: Built-in493552
+Node: Calling Built-in494630
+Node: Numeric Functions496618
+Ref: Numeric Functions-Footnote-1501454
+Ref: Numeric Functions-Footnote-2501811
+Ref: Numeric Functions-Footnote-3501859
+Node: String Functions502128
+Ref: String Functions-Footnote-1525125
+Ref: String Functions-Footnote-2525254
+Ref: String Functions-Footnote-3525502
+Node: Gory Details525589
+Ref: table-sub-escapes527362
+Ref: table-sub-proposed528882
+Ref: table-posix-sub530246
+Ref: table-gensub-escapes531786
+Ref: Gory Details-Footnote-1532962
+Node: I/O Functions533113
+Ref: I/O Functions-Footnote-1540223
+Node: Time Functions540370
+Ref: Time Functions-Footnote-1550834
+Ref: Time Functions-Footnote-2550902
+Ref: Time Functions-Footnote-3551060
+Ref: Time Functions-Footnote-4551171
+Ref: Time Functions-Footnote-5551283
+Ref: Time Functions-Footnote-6551510
+Node: Bitwise Functions551776
+Ref: table-bitwise-ops552338
+Ref: Bitwise Functions-Footnote-1556583
+Node: Type Functions556767
+Node: I18N Functions557909
+Node: User-defined559554
+Node: Definition Syntax560358
+Ref: Definition Syntax-Footnote-1565671
+Node: Function Example565740
+Ref: Function Example-Footnote-1568380
+Node: Function Caveats568402
+Node: Calling A Function568920
+Node: Variable Scope569875
+Node: Pass By Value/Reference572863
+Node: Return Statement576373
+Node: Dynamic Typing579357
+Node: Indirect Calls580286
+Node: Functions Summary589999
+Node: Library Functions592538
+Ref: Library Functions-Footnote-1596156
+Ref: Library Functions-Footnote-2596299
+Node: Library Names596470
+Ref: Library Names-Footnote-1599943
+Ref: Library Names-Footnote-2600163
+Node: General Functions600249
+Node: Strtonum Function601277
+Node: Assert Function604151
+Node: Round Function607477
+Node: Cliff Random Function609018
+Node: Ordinal Functions610034
+Ref: Ordinal Functions-Footnote-1613099
+Ref: Ordinal Functions-Footnote-2613351
+Node: Join Function613562
+Ref: Join Function-Footnote-1615333
+Node: Getlocaltime Function615533
+Node: Readfile Function619269
+Ref: Readfile Function-Footnote-1621147
+Node: Data File Management621375
+Node: Filetrans Function622007
+Node: Rewind Function626076
+Node: File Checking627634
+Ref: File Checking-Footnote-1628766
+Node: Empty Files628967
+Node: Ignoring Assigns630946
+Node: Getopt Function632500
+Ref: Getopt Function-Footnote-1643764
+Node: Passwd Functions643967
+Ref: Passwd Functions-Footnote-1652946
+Node: Group Functions653034
+Ref: Group Functions-Footnote-1660965
+Node: Walking Arrays661178
+Node: Library Functions Summary662781
+Node: Library Exercises664169
+Node: Sample Programs665449
+Node: Running Examples666219
+Node: Clones666947
+Node: Cut Program668171
+Node: Egrep Program678029
+Ref: Egrep Program-Footnote-1685616
+Node: Id Program685726
+Node: Split Program689380
+Ref: Split Program-Footnote-1692918
+Node: Tee Program693046
+Node: Uniq Program695833
+Node: Wc Program703254
+Ref: Wc Program-Footnote-1707519
+Node: Miscellaneous Programs707611
+Node: Dupword Program708824
+Node: Alarm Program710855
+Node: Translate Program715659
+Ref: Translate Program-Footnote-1720050
+Ref: Translate Program-Footnote-2720320
+Node: Labels Program720454
+Ref: Labels Program-Footnote-1723815
+Node: Word Sorting723899
+Node: History Sorting727942
+Node: Extract Program729778
+Node: Simple Sed737314
+Node: Igawk Program740376
+Ref: Igawk Program-Footnote-1754680
+Ref: Igawk Program-Footnote-2754881
+Node: Anagram Program755019
+Node: Signature Program758087
+Node: Programs Summary759334
+Node: Programs Exercises760549
+Node: Advanced Features764200
+Node: Nondecimal Data766148
+Node: Array Sorting767725
+Node: Controlling Array Traversal768422
+Node: Array Sorting Functions776702
+Ref: Array Sorting Functions-Footnote-1780609
+Node: Two-way I/O780803
+Ref: Two-way I/O-Footnote-1785747
+Ref: Two-way I/O-Footnote-2785926
+Node: TCP/IP Networking786008
+Node: Profiling788853
+Node: Advanced Features Summary796404
+Node: Internationalization798268
+Node: I18N and L10N799748
+Node: Explaining gettext800434
+Ref: Explaining gettext-Footnote-1805460
+Ref: Explaining gettext-Footnote-2805644
+Node: Programmer i18n805809
+Ref: Programmer i18n-Footnote-1810603
+Node: Translator i18n810652
+Node: String Extraction811446
+Ref: String Extraction-Footnote-1812579
+Node: Printf Ordering812665
+Ref: Printf Ordering-Footnote-1815447
+Node: I18N Portability815511
+Ref: I18N Portability-Footnote-1817960
+Node: I18N Example818023
+Ref: I18N Example-Footnote-1820729
+Node: Gawk I18N820801
+Node: I18N Summary821439
+Node: Debugger822778
+Node: Debugging823800
+Node: Debugging Concepts824241
+Node: Debugging Terms826097
+Node: Awk Debugging828694
+Node: Sample Debugging Session829586
+Node: Debugger Invocation830106
+Node: Finding The Bug831439
+Node: List of Debugger Commands837921
+Node: Breakpoint Control839253
+Node: Debugger Execution Control842917
+Node: Viewing And Changing Data846277
+Node: Execution Stack849635
+Node: Debugger Info851148
+Node: Miscellaneous Debugger Commands855142
+Node: Readline Support860326
+Node: Limitations861218
+Node: Debugging Summary863492
+Node: Arbitrary Precision Arithmetic864660
+Node: Computer Arithmetic866147
+Ref: Computer Arithmetic-Footnote-1870534
+Node: Math Definitions870591
+Ref: table-ieee-formats873880
+Ref: Math Definitions-Footnote-1874420
+Node: MPFR features874523
+Node: FP Math Caution876140
+Ref: FP Math Caution-Footnote-1877190
+Node: Inexactness of computations877559
+Node: Inexact representation878507
+Node: Comparing FP Values879862
+Node: Errors accumulate880826
+Node: Getting Accuracy882259
+Node: Try To Round884918
+Node: Setting precision885817
+Ref: table-predefined-precision-strings886499
+Node: Setting the rounding mode888292
+Ref: table-gawk-rounding-modes888656
+Ref: Setting the rounding mode-Footnote-1892110
+Node: Arbitrary Precision Integers892289
+Ref: Arbitrary Precision Integers-Footnote-1896062
+Node: POSIX Floating Point Problems896211
+Ref: POSIX Floating Point Problems-Footnote-1900087
+Node: Floating point summary900125
+Node: Dynamic Extensions902329
+Node: Extension Intro903881
+Node: Plugin License905146
+Node: Extension Mechanism Outline905831
+Ref: figure-load-extension906255
+Ref: figure-load-new-function907740
+Ref: figure-call-new-function908742
+Node: Extension API Description910726
+Node: Extension API Functions Introduction912176
+Node: General Data Types917043
+Ref: General Data Types-Footnote-1922736
+Node: Requesting Values923035
+Ref: table-value-types-returned923772
+Node: Memory Allocation Functions924730
+Ref: Memory Allocation Functions-Footnote-1927477
+Node: Constructor Functions927573
+Node: Registration Functions929331
+Node: Extension Functions930016
+Node: Exit Callback Functions932318
+Node: Extension Version String933566
+Node: Input Parsers934216
+Node: Output Wrappers944030
+Node: Two-way processors948546
+Node: Printing Messages950750
+Ref: Printing Messages-Footnote-1951827
+Node: Updating `ERRNO'951979
+Node: Accessing Parameters952718
+Node: Symbol Table Access953948
+Node: Symbol table by name954462
+Node: Symbol table by cookie956438
+Ref: Symbol table by cookie-Footnote-1960571
+Node: Cached values960634
+Ref: Cached values-Footnote-1964138
+Node: Array Manipulation964229
+Ref: Array Manipulation-Footnote-1965327
+Node: Array Data Types965366
+Ref: Array Data Types-Footnote-1968069
+Node: Array Functions968161
+Node: Flattening Arrays972035
+Node: Creating Arrays978887
+Node: Extension API Variables983618
+Node: Extension Versioning984254
+Node: Extension API Informational Variables986155
+Node: Extension API Boilerplate987241
+Node: Finding Extensions991045
+Node: Extension Example991605
+Node: Internal File Description992335
+Node: Internal File Ops996426
+Ref: Internal File Ops-Footnote-11007858
+Node: Using Internal File Ops1007998
+Ref: Using Internal File Ops-Footnote-11010345
+Node: Extension Samples1010613
+Node: Extension Sample File Functions1012137
+Node: Extension Sample Fnmatch1019705
+Node: Extension Sample Fork1021187
+Node: Extension Sample Inplace1022400
+Node: Extension Sample Ord1024075
+Node: Extension Sample Readdir1024911
+Ref: table-readdir-file-types1025767
+Node: Extension Sample Revout1026566
+Node: Extension Sample Rev2way1027157
+Node: Extension Sample Read write array1027898
+Node: Extension Sample Readfile1029777
+Node: Extension Sample API Tests1030877
+Node: Extension Sample Time1031402
+Node: gawkextlib1032717
+Node: Extension summary1035530
+Node: Extension Exercises1039223
+Node: Language History1039945
+Node: V7/SVR3.11041588
+Node: SVR41043908
+Node: POSIX1045350
+Node: BTL1046736
+Node: POSIX/GNU1047470
+Node: Feature History1053246
+Node: Common Extensions1066337
+Node: Ranges and Locales1067649
+Ref: Ranges and Locales-Footnote-11072266
+Ref: Ranges and Locales-Footnote-21072293
+Ref: Ranges and Locales-Footnote-31072527
+Node: Contributors1072748
+Node: History summary1078173
+Node: Installation1079542
+Node: Gawk Distribution1080493
+Node: Getting1080977
+Node: Extracting1081801
+Node: Distribution contents1083443
+Node: Unix Installation1089213
+Node: Quick Installation1089830
+Node: Additional Configuration Options1092272
+Node: Configuration Philosophy1094010
+Node: Non-Unix Installation1096361
+Node: PC Installation1096819
+Node: PC Binary Installation1098130
+Node: PC Compiling1099978
+Ref: PC Compiling-Footnote-11102977
+Node: PC Testing1103082
+Node: PC Using1104258
+Node: Cygwin1108410
+Node: MSYS1109219
+Node: VMS Installation1109733
+Node: VMS Compilation1110529
+Ref: VMS Compilation-Footnote-11111751
+Node: VMS Dynamic Extensions1111809
+Node: VMS Installation Details1113182
+Node: VMS Running1115434
+Node: VMS GNV1118268
+Node: VMS Old Gawk1118991
+Node: Bugs1119461
+Node: Other Versions1123465
+Node: Installation summary1129692
+Node: Notes1130748
+Node: Compatibility Mode1131613
+Node: Additions1132395
+Node: Accessing The Source1133320
+Node: Adding Code1134756
+Node: New Ports1140934
+Node: Derived Files1145415
+Ref: Derived Files-Footnote-11150496
+Ref: Derived Files-Footnote-21150530
+Ref: Derived Files-Footnote-31151126
+Node: Future Extensions1151240
+Node: Implementation Limitations1151846
+Node: Extension Design1153094
+Node: Old Extension Problems1154248
+Ref: Old Extension Problems-Footnote-11155765
+Node: Extension New Mechanism Goals1155822
+Ref: Extension New Mechanism Goals-Footnote-11159182
+Node: Extension Other Design Decisions1159371
+Node: Extension Future Growth1161477
+Node: Old Extension Mechanism1162313
+Node: Notes summary1164075
+Node: Basic Concepts1165261
+Node: Basic High Level1165942
+Ref: figure-general-flow1166214
+Ref: figure-process-flow1166813
+Ref: Basic High Level-Footnote-11170042
+Node: Basic Data Typing1170227
+Node: Glossary1173555
+Node: Copying1198707
+Node: GNU Free Documentation License1236263
+Node: Index1261399

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index b418d4cf..74dd35f8 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -526,10 +526,10 @@ particular records in a file and perform operations upon them.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -1774,6 +1774,7 @@ They also appear in the index under the heading ``dark corner.''
As noted by the opening quote, though, any coverage of dark corners is,
by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
@ifclear FOR_PRINT
@@ -2341,24 +2342,19 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming (@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
@quotation NOTE
As a side note, if you use Bash as your shell, you should execute the
@@ -3046,6 +3042,9 @@ awk '@{ if (length($0) > max) max = length($0) @}
END @{ print max @}' data
@end example
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
+
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@@ -4130,6 +4129,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
The distinction between @value{FN} arguments and variable-assignment
@@ -4765,10 +4769,10 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
@end menu
@@ -4979,8 +4983,11 @@ However, using more than two hexadecimal digits produces
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4988,8 +4995,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -5550,6 +5559,204 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@ifdocbook
+@docbook
+<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
+@end docbook
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
+
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end cartouche
+@end ifnotdocbook
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5825,204 +6032,6 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp} or a @dfn{computed regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @value{BULLET}
-@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
-
-@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
-
-@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@ifdocbook
-@docbook
-<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
-@end docbook
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-
-@docbook
-</sidebar>
-@end docbook
-@end ifdocbook
-
-@ifnotdocbook
-@cartouche
-@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
-
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end cartouche
-@end ifnotdocbook
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
-
@node Regexp Summary
@section Summary
@@ -7965,32 +7974,48 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
@@ -8681,7 +8706,7 @@ including abstentions, for each item.
comments (@samp{/* @dots{} */}) from the input. That program
does not work if one comment ends on one line and another one
starts later on the same line.
-Write a program that does handle multiple comments on the line.
+That can be fixed by making one simple change. What is it?
@end enumerate
@c EXCLUDE END
@@ -10511,7 +10536,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -12302,7 +12328,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -12322,6 +12348,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -14114,7 +14150,8 @@ starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
@@ -14124,13 +14161,10 @@ would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -16152,7 +16186,7 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@@ -16176,7 +16210,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -20455,8 +20489,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
@@ -20468,6 +20503,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -21070,7 +21107,12 @@ function readfile(file, tmp, contents)
This function reads from @code{file} one record at a time, building
up the full contents of the file in the local variable @code{contents}.
-It works, but is not necessarily efficient.
+It works, but is not necessarily
+@c 8/2014. Thanks to BWK for pointing this out:
+efficient.@footnote{Execution time grows quadratically in the size of
+the input; for each record, @command{awk} has to allocate a bigger
+internal buffer for @code{contents}, copy the old contents into it,
+and then append the contents of the new record.}
The following function, based on a suggestion by Denis Shirokov,
reads the entire contents of the named file in one shot:
@@ -21743,8 +21785,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index da427d9b..8091e16c 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -521,10 +521,10 @@ particular records in a file and perform operations upon them.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -1741,6 +1741,7 @@ They also appear in the index under the heading ``dark corner.''
As noted by the opening quote, though, any coverage of dark corners is,
by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
@ifclear FOR_PRINT
@@ -2308,24 +2309,19 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming (@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
@quotation NOTE
As a side note, if you use Bash as your shell, you should execute the
@@ -2957,6 +2953,9 @@ awk '@{ if (length($0) > max) max = length($0) @}
END @{ print max @}' data
@end example
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
+
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@@ -4041,6 +4040,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
The distinction between @value{FN} arguments and variable-assignment
@@ -4676,10 +4680,10 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
@end menu
@@ -4890,8 +4894,11 @@ However, using more than two hexadecimal digits produces
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4899,8 +4906,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -5378,6 +5387,160 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end sidebar
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5653,160 +5816,6 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp} or a @dfn{computed regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @value{BULLET}
-@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
-
-@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
-
-@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end sidebar
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
-
@node Regexp Summary
@section Summary
@@ -7567,32 +7576,48 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
@@ -8283,7 +8308,7 @@ including abstentions, for each item.
comments (@samp{/* @dots{} */}) from the input. That program
does not work if one comment ends on one line and another one
starts later on the same line.
-Write a program that does handle multiple comments on the line.
+That can be fixed by making one simple change. What is it?
@end enumerate
@c EXCLUDE END
@@ -9984,7 +10009,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -11636,7 +11662,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -11656,6 +11682,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -13448,7 +13484,8 @@ starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
@@ -13458,13 +13495,10 @@ would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -15440,7 +15474,7 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@@ -15464,7 +15498,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -19582,8 +19616,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
@@ -19595,6 +19630,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -20197,7 +20234,12 @@ function readfile(file, tmp, contents)
This function reads from @code{file} one record at a time, building
up the full contents of the file in the local variable @code{contents}.
-It works, but is not necessarily efficient.
+It works, but is not necessarily
+@c 8/2014. Thanks to BWK for pointing this out:
+efficient.@footnote{Execution time grows quadratically in the size of
+the input; for each record, @command{awk} has to allocate a bigger
+internal buffer for @code{contents}, copy the old contents into it,
+and then append the contents of the new record.}
The following function, based on a suggestion by Denis Shirokov,
reads the entire contents of the named file in one shot:
@@ -20841,8 +20883,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0