summaryrefslogtreecommitdiff
path: root/pod
diff options
context:
space:
mode:
Diffstat (limited to 'pod')
-rw-r--r--pod/Makefile45
-rw-r--r--pod/perl.pod45
-rw-r--r--pod/perlbook.pod10
-rw-r--r--pod/perlbot.pod16
-rw-r--r--pod/perldata.pod116
-rw-r--r--pod/perldiag.pod58
-rw-r--r--pod/perldsc.pod348
-rw-r--r--pod/perlform.pod3
-rw-r--r--pod/perlfunc.pod477
-rw-r--r--pod/perlipc.pod842
-rw-r--r--pod/perllol.pod353
-rw-r--r--pod/perlmod.pod280
-rw-r--r--pod/perlop.pod62
-rw-r--r--pod/perlpod.pod8
-rw-r--r--pod/perlre.pod15
-rw-r--r--pod/perlref.pod3
-rw-r--r--pod/perlsyn.pod250
-rw-r--r--pod/perltrap.pod25
-rw-r--r--pod/perlvar.pod13
-rw-r--r--pod/perlxs.pod19
-rw-r--r--pod/perlxstut.pod529
-rw-r--r--pod/pod2html.PL550
-rwxr-xr-xpod/pod2html.SH490
-rw-r--r--[-rwxr-xr-x]pod/pod2latex.PL (renamed from pod/pod2latex.SH)63
-rw-r--r--[-rwxr-xr-x]pod/pod2man.PL (renamed from pod/pod2man.SH)63
25 files changed, 3600 insertions, 1083 deletions
diff --git a/pod/Makefile b/pod/Makefile
index 6ef971db45..38d5b0fd11 100644
--- a/pod/Makefile
+++ b/pod/Makefile
@@ -1,21 +1,24 @@
-all: man
+CONVERTERS = pod2html pod2latex pod2man
+
+all: $(CONVERTERS) man
PERL = ../miniperl
POD = \
perl.pod \
- perlxs.pod \
perlbook.pod \
perlbot.pod \
perlcall.pod \
perldata.pod \
perldebug.pod \
perldiag.pod \
+ perldsc.pod \
perlembed.pod \
perlform.pod \
perlfunc.pod \
perlguts.pod \
perlipc.pod \
+ perllol.pod \
perlmod.pod \
perlobj.pod \
perlop.pod \
@@ -29,22 +32,25 @@ POD = \
perlsub.pod \
perlsyn.pod \
perltrap.pod \
- perlvar.pod
+ perlvar.pod \
+ perlxs.pod \
+ perlxstut.pod
MAN = \
perl.man \
- perlxs.man \
perlbook.man \
perlbot.man \
perlcall.man \
perldata.man \
perldebug.man \
perldiag.man \
+ perldsc.man \
perlembed.man \
perlform.man \
perlfunc.man \
perlguts.man \
perlipc.man \
+ perllol.man \
perlmod.man \
perlobj.man \
perlop.man \
@@ -58,22 +64,25 @@ MAN = \
perlsub.man \
perlsyn.man \
perltrap.man \
- perlvar.man
+ perlvar.man \
+ perlxs.man \
+ perlxstut.man
HTML = \
perl.html \
- perlxs.html \
perlbook.html \
perlbot.html \
perlcall.html \
perldata.html \
perldebug.html \
perldiag.html \
+ perldsc.html \
perlembed.html \
perlform.html \
perlfunc.html \
perlguts.html \
perlipc.html \
+ perllol.html \
perlmod.html \
perlobj.html \
perlop.html \
@@ -87,22 +96,25 @@ HTML = \
perlsub.html \
perlsyn.html \
perltrap.html \
- perlvar.html
+ perlvar.html \
+ perlxs.html \
+ perlxstut.html
TEX = \
perl.tex \
- perlxs.tex \
perlbook.tex \
perlbot.tex \
perlcall.tex \
perldata.tex \
perldebug.tex \
perldiag.tex \
+ perldsc.tex \
perlembed.tex \
perlform.tex \
perlfunc.tex \
perlguts.tex \
perlipc.tex \
+ perllol.tex \
perlmod.tex \
perlobj.tex \
perlop.tex \
@@ -116,8 +128,9 @@ TEX = \
perlsub.tex \
perlsyn.tex \
perltrap.tex \
- perlvar.tex
-
+ perlvar.tex \
+ perlxs.tex \
+ perlxstut.tex
man: pod2man $(MAN)
@@ -152,11 +165,11 @@ realclean: clean
distclean: realclean
# Dependencies.
-pod2latex: pod2latex.SH ../config.sh
- sh pod2latex.SH
+pod2latex: pod2latex.PL ../lib/Config.pm
+ $(PERL) -I../lib pod2latex.PL
-pod2html: pod2html.SH ../config.sh
- sh pod2html.SH
+pod2html: pod2html.PL ../lib/Config.pm
+ $(PERL) -I ../lib pod2html.PL
-pod2man: pod2man.SH ../config.sh
- sh pod2man.SH
+pod2man: pod2man.PL ../lib/Config.pm
+ $(PERL) -I ../lib pod2man.PL
diff --git a/pod/perl.pod b/pod/perl.pod
index 3664ab6402..f0504c4271 100644
--- a/pod/perl.pod
+++ b/pod/perl.pod
@@ -17,7 +17,9 @@ of sections:
perlvar Perl predefined variables
perlsub Perl subroutines
perlmod Perl modules
- perlref Perl references and nested data structures
+ perlref Perl references
+ perldsc Perl data structures intro
+ perllol Perl data structures: lists of lists
perlobj Perl objects
perlbot Perl OO tricks and examples
perldebug Perl debugging
@@ -28,6 +30,7 @@ of sections:
perltrap Perl traps for the unwary
perlstyle Perl style guide
perlxs Perl XS application programming interface
+ perlxstut Perl XS tutorial
perlguts Perl internal functions for those doing extensions
perlcall Perl calling conventions from C
perlovl Perl overloading semantics
@@ -38,21 +41,21 @@ of sections:
(If you're intending to read these straight through for the first time,
the suggested order will tend to reduce the number of forward references.)
-Additional documentation for perl modules is available in
-the F</usr/local/lib/perl5/man/man3> directory. You can view this
-with a man(1) program by including the following in the
-appropriate start-up files. (You may have to adjust the path to
-match $Config{'man3dir'}.)
+Additional documentation for Perl modules is available in the
+F</usr/local/man/> directory. Some of this is distributed standard with
+Perl, but you'll also find third-party modules there. You should be able
+to view this with your man(1) program by including the proper directories
+in the appropriate start-up files. To find out where these are, type:
- .profile (for sh, bash or ksh users):
- MANPATH=$MANPATH:/usr/local/lib/perl5/man
- export MANPATH
+ perl -le 'use Config; print "@Config{man1dir,man3dir}"'
- .login (for csh or tcsh users):
- setenv MANPATH $MANPATH:/usr/local/lib/perl5/man
+If the directories were F</usr/local/man/man1> and F</usr/local/man/man3>,
+you would only need to add F</usr/local/man> to your MANPATH. If
+they are different, you'll have to add both stems.
If that doesn't work for some reason, you can still use the
-supplied perldoc script to view module information.
+supplied F<perldoc> script to view module information. You might
+also look into getting a replacement man program.
If something strange has gone wrong with your program and you're not
sure where you should look for help, try the B<-w> switch first. It
@@ -202,7 +205,12 @@ used.
A colon-separated list of directories in which to look for Perl library
files before looking in the standard library and the current
-directory. If PERL5LIB is not defined, PERLLIB is used.
+directory. If PERL5LIB is not defined, PERLLIB is used. When running
+taint checks (because the script was running setuid or setgid, or the
+B<-T> switch was used), neither variable is used. The script should
+instead say
+
+ use lib "/my/directory";
=item PERL5DB
@@ -216,7 +224,6 @@ A colon-separated list of directories in which to look for Perl library
files before looking in the standard library and the current
directory. If PERL5LIB is defined, PERLLIB is not used.
-
=back
Apart from these, Perl uses no other environment variables, except
@@ -231,7 +238,7 @@ honest:
=head1 AUTHOR
-Larry Wall <F<lwall@netlabs.com.>, with the help of oodles of other folks.
+Larry Wall E<lt><F<lwall@netlabs.com>E<gt>, with the help of oodles of other folks.
=head1 FILES
@@ -241,6 +248,7 @@ Larry Wall <F<lwall@netlabs.com.>, with the help of oodles of other folks.
=head1 SEE ALSO
a2p awk to perl translator
+
s2p sed to perl translator
=head1 DIAGNOSTICS
@@ -265,7 +273,8 @@ switch?
The B<-w> switch is not mandatory.
Perl is at the mercy of your machine's definitions of various
-operations such as type casting, atof() and sprintf().
+operations such as type casting, atof() and sprintf(). The latter
+can even trigger a coredump when passed ludicrous input values.
If your stdio requires a seek or eof between reads and writes on a
particular stream, so does Perl. (This doesn't apply to sysread()
@@ -277,6 +286,8 @@ given identifier may not be longer than 255 characters, and no
component of your PATH may be longer than 255 if you use B<-S>. A regular
expression may not compile to more than 32767 bytes internally.
+See the perl bugs database at L<http://perl.com/perl/bugs/>.
+
Perl actually stands for Pathologically Eclectic Rubbish Lister, but
don't tell anyone I said that.
@@ -285,6 +296,6 @@ don't tell anyone I said that.
The Perl motto is "There's more than one way to do it." Divining
how many more is left as an exercise to the reader.
-The three principle virtues of a programmer are Laziness,
+The three principal virtues of a programmer are Laziness,
Impatience, and Hubris. See the Camel Book for why.
diff --git a/pod/perlbook.pod b/pod/perlbook.pod
index 16f74df403..5bb4bfb0b5 100644
--- a/pod/perlbook.pod
+++ b/pod/perlbook.pod
@@ -12,9 +12,11 @@ I<Learning Perl> is a tutorial that covers the most frequently used subset
of the language.
Programming Perl (the Camel Book):
- ISBN 0-937175-64-1 (English)
- ISBN 4-89052-384-7 (Japanese)
+ ISBN 0-937175-64-1 (English)
+ ISBN 4-89052-384-7 (Japanese)
Learning Perl (the Llama Book):
- ISBN 1-56592-042-2 (English)
-
+ ISBN 1-56592-042-2 (English)
+ ISBN 4-89502-678-1 (Japanese)
+ ISBN 2-84177-005-2 (French)
+ ISBN 3-930673-08-8 (German)
diff --git a/pod/perlbot.pod b/pod/perlbot.pod
index de2207a961..61a37266a2 100644
--- a/pod/perlbot.pod
+++ b/pod/perlbot.pod
@@ -199,11 +199,10 @@ relationships between objects.
=head1 OVERRIDING SUPERCLASS METHODS
-The following example demonstrates how one might override a superclass
-method and then call the method after it has been overridden. The
-Foo::Inherit class allows the programmer to call an overridden superclass
-method without actually knowing where that method is defined.
-
+The following example demonstrates how to override a superclass method and
+then call the overridden method. The B<SUPER> pseudo-class allows the
+programmer to call an overridden superclass method without actually knowing
+where that method is defined.
package Buz;
sub goo { print "here's the goo\n" }
@@ -216,7 +215,6 @@ method without actually knowing where that method is defined.
package Foo;
@ISA = qw( Bar Baz );
- @Foo::Inherit::ISA = @ISA; # Access to overridden methods.
sub new {
my $type = shift;
@@ -225,15 +223,15 @@ method without actually knowing where that method is defined.
sub grr { print "grumble\n" }
sub goo {
my $self = shift;
- $self->Foo::Inherit::goo();
+ $self->SUPER::goo();
}
sub mumble {
my $self = shift;
- $self->Foo::Inherit::mumble();
+ $self->SUPER::mumble();
}
sub google {
my $self = shift;
- $self->Foo::Inherit::google();
+ $self->SUPER::google();
}
package main;
diff --git a/pod/perldata.pod b/pod/perldata.pod
index 4042ecf74e..648f0922e1 100644
--- a/pod/perldata.pod
+++ b/pod/perldata.pod
@@ -108,17 +108,27 @@ lists. See L<perlfunc/wantarray>.
=head2 Scalar values
+All data in Perl is a scalar or an array of scalars or a hash of scalars.
Scalar variables may contain various kinds of singular data, such as
-numbers, strings and references. In general, conversion from one form
-to another is transparent. (A scalar may not contain multiple values,
-but may contain a reference to an array or hash containing multiple
-values.) Because of the automatic conversion of scalars, operations and
-functions that return scalars don't need to care (and, in fact, can't
-care) whether the context is looking for a string or a number.
+numbers, strings, and references. In general, conversion from one form to
+another is transparent. (A scalar may not contain multiple values, but
+may contain a reference to an array or hash containing multiple values.)
+Because of the automatic conversion of scalars, operations and functions
+that return scalars don't need to care (and, in fact, can't care) whether
+the context is looking for a string or a number.
+
+Scalars aren't necessarily one thing or another. There's no place to
+declare a scalar variable to be of type "string", or of type "number", or
+type "filehandle", or anything else. Perl is a contextually polymorphic
+language whose scalars can be strings, numbers, or references (which
+includes objects). While strings and numbers are considered the pretty
+much same thing for nearly all purposes, but references are strongly-typed
+uncastable pointers with built-in reference-counting and destructor
+invocation.
A scalar value is interpreted as TRUE in the Boolean sense if it is not
the null string or the number 0 (or its string equivalent, "0"). The
-Boolean context is just a special kind of scalar context.
+Boolean context is just a special kind of scalar context.
There are actually two varieties of null scalars: defined and
undefined. Undefined null scalars are returned when there is no real
@@ -128,6 +138,15 @@ array. An undefined null scalar may become defined the first time you
use it as if it were defined, but prior to that you can use the
defined() operator to determine whether the value is defined or not.
+To find out whether a given string is a valid non-zero number, it's usally
+enough to test it against both numeric 0 and also lexical "0" (although
+this will cause B<-w> noises). That's because strings that aren't
+numbers count as 0, just as the do in I<awk>:
+
+ if ($str == 0 && $str ne "0") {
+ warn "That doesn't look like a number";
+ }
+
The length of an array is a scalar value. You may find the length of
array @days by evaluating C<$#days>, as in B<csh>. (Actually, it's not
the length of the array, it's the subscript of the last element, since
@@ -158,6 +177,11 @@ So in general you can just assume that
scalar(@whatever) == $#whatever + 1;
+Some programmer choose to use an explcit conversion so nothing's
+left to doubt:
+
+ $element_count = scalar(@whatever);
+
If you evaluate a hash in a scalar context, it returns a value which is
true if and only if the hash contains any key/value pairs. (If there
are any key/value pairs, the value returned is a string consisting of
@@ -174,7 +198,6 @@ isn't supposed to happen.)
Numeric literals are specified in any of the customary floating point or
integer formats:
-
12345
12345.67
.23E-10
@@ -182,7 +205,7 @@ integer formats:
0377 # octal
4_294_967_296 # underline for legibility
-String literals are delimited by either single or double quotes. They
+String literals are usually delimited by either single or double quotes. They
work much like shell quotes: double-quoted string literals are subject
to backslash and variable substitution; single-quoted strings are not
(except for "C<\'>" and "C<\\>"). The usual Unix backslash rules apply for making
@@ -229,7 +252,7 @@ logical end of the script before the actual end of file. Any following
text is ignored, but may be read via the DATA filehandle. (The DATA
filehandle may read data only from the main script, but not from any
required file or evaluated string.) The two control characters ^D and
-^Z are synonyms for __END__.
+^Z are synonyms for __END__ (or __DATA__ in a module).
A word that has no other interpretation in the grammar will
be treated as if it were a quoted string. These are known as
@@ -248,7 +271,8 @@ by saying C<no strict 'subs'>.
Array variables are interpolated into double-quoted strings by joining all
the elements of the array with the delimiter specified in the C<$">
-variable, space by default. The following are equivalent:
+variable ($LIST_SEPARATOR in English), space by default. The following
+are equivalent:
$temp = join($",@ARGV);
system "echo $temp";
@@ -286,9 +310,6 @@ whitespace) on the terminating line.
The price is $Price.
EOF
- print << x 10; # Legal but discouraged. Use <<"".
- Merry Christmas!
-
print <<`EOC`; # execute commands
echo hi there
echo lo there
@@ -359,7 +380,8 @@ identity in a LIST--the list
(@foo,@bar,&SomeSub)
contains all the elements of @foo followed by all the elements of @bar,
-followed by all the elements returned by the subroutine named SomeSub.
+followed by all the elements returned by the subroutine named SomeSub when
+it's called in a list context.
To make a list reference that does I<NOT> interpolate, see L<perlref>.
The null list is represented by (). Interpolating it in a list
@@ -373,6 +395,9 @@ put the list in parentheses to avoid ambiguity. Examples:
# Stat returns list value.
$time = (stat($file))[8];
+ # SYNTAX ERROR HERE.
+ $time = stat($file)[8]; # OOPS, FORGOT PARENS
+
# Find a hex digit.
$hexdigit = ('a','b','c','d','e','f')[$digit-10];
@@ -386,12 +411,22 @@ is legal to assign to:
($map{'red'}, $map{'blue'}, $map{'green'}) = (0x00f, 0x0f0, 0xf00);
+Array assignment in a scalar context returns the number of elements
+produced by the expression on the right side of the assignment:
+
+ $x = (($foo,$bar) = (3,2,1)); # set $x to 3, not 2
+ $x = (($foo,$bar) = f()); # set $x to f()'s return count
+
+This is very handy when you want to do a list assignment in a Boolean
+context, since most list functions return a null list when finished,
+which when assigned produces a 0, which is interpreted as FALSE.
+
The final element may be an array or a hash:
($a, $b, @rest) = split;
local($a, $b, %rest) = @_;
-You can actually put an array anywhere in the list, but the first array
+You can actually put an array or hash anywhere in the list, but the first one
in the list will soak up all the values, and anything after it will get
a null value. This may be useful in a local() or my().
@@ -401,21 +436,38 @@ as a key and a value:
# same as map assignment above
%map = ('red',0x00f,'blue',0x0f0,'green',0xf00);
-It is often more readable to use the C<=E<gt>> operator between key/value pairs
-(the C<=E<gt>> operator is actually nothing more than a more visually
-distinctive synonym for a comma):
+While literal lists and named arrays are usually interchangeable, that's
+not the case for hashes. Just because you can subscript a list value like
+a normal array does not mean that you can subscript a list value as a
+hash. Likewise, hashes included as parts of other lists (including
+parameters lists and return lists from functions) always flatten out into
+key/value pairs. That's why it's good to use references sometimes.
- %map = (
- 'red' => 0x00f,
- 'blue' => 0x0f0,
- 'green' => 0xf00,
- );
-
-Array assignment in a scalar context returns the number of elements
-produced by the expression on the right side of the assignment:
+It is often more readable to use the C<=E<gt>> operator between key/value
+pairs. The C<=E<gt>> operator is mostly just a more visually distinctive
+synonym for a comma, but it also quotes its left-hand operand, which makes
+it nice for initializing hashes:
- $x = (($foo,$bar) = (3,2,1)); # set $x to 3, not 2
-
-This is very handy when you want to do a list assignment in a Boolean
-context, since most list functions return a null list when finished,
-which when assigned produces a 0, which is interpreted as FALSE.
+ %map = (
+ red => 0x00f,
+ blue => 0x0f0,
+ green => 0xf00,
+ );
+
+or for initializing hash references to be used as records:
+
+ $rec = {
+ witch => 'Mable the Merciless',
+ cat => 'Fluffy the Ferocious',
+ date => '10/31/1776',
+ };
+
+or for using call-by-named-parameter to complicated functions:
+
+ $field = $query->radio_group(
+ name => 'group_name',
+ values => ['eenie','meenie','minie'],
+ default => 'meenie',
+ linebreak => 'true',
+ labels => \%labels
+ );
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index e41c29939a..ad4a532aaf 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -194,13 +194,6 @@ could indicate that SvREFCNT_dec() was called too many times, or that
SvREFCNT_inc() was called too few times, or that the SV was mortalized
when it shouldn't have been, or that memory has been corrupted.
-=item Attempt to use reference as hash key
-
-(W) References as not very meaningful as hash keys. You probably forgot to
-dereference the reference before using it in a hash list, or got mixed up
-and used C<{}> or C<[]> instead of C<()>. Or perhaps a missing key in the
-hash list is causing values to be treated as keys.
-
=item Bad arg length for %s, is %d, should be %d
(F) You passed a buffer of the wrong size to one of msgctl(), semctl() or
@@ -262,6 +255,10 @@ Compilation stops immediately and the interpreter is exited.
(W) You tried to do a bind on a closed socket. Did you forget to check
the return value of your socket() call? See L<perlfunc/bind>.
+=item Bizarre copy of %s in %s
+
+(P) Perl detected an attempt to copy an internal value that is not copiable.
+
=item Callback called exit
(F) A subroutine invoked from an external package via perl_call_sv()
@@ -490,6 +487,13 @@ call for another. It can't manufacture one out of whole cloth. In general
you should only be calling it out of an AUTOLOAD routine anyway. See
L<perlfunc/goto>.
+=item Can't localize a reference
+
+(F) You said something like C<local $$ref>, which is not allowed because
+the compiler can't determine whether $ref will end up pointing to anything
+with a symbol table entry, and a symbol table entry is necessary to
+do a local.
+
=item Can't localize lexical variable %s
(F) You used local on a variable name that was previous declared as a
@@ -740,6 +744,10 @@ times than it has returned. This probably indicates an infinite
recursion, unless you're writing strange benchmark programs, in which
case it indicates something else.
+=item Did you mean &%s instead?
+
+(W) You probably referred to an imported subroutine &FOO as $FOO or some such.
+
=item Did you mean $ or @ instead of %?
(W) You probably said %hash{$key} when you meant $hash{$key} or @hash{@keys}.
@@ -770,6 +778,13 @@ declaration.
(S) An internal routine called free() on something that had already
been freed.
+=item elseif should be elsif
+
+(S) There is no keyword "elseif" in Perl because Larry thinks it's
+ugly. Your code will be interpreted as an attempt to call a method
+named "elseif" for the class returned by the following block. This is
+unlikely to be what you want.
+
=item END failed--cleanup aborted
(F) An untrapped exception was raised while executing an END subroutine.
@@ -1570,6 +1585,11 @@ last argument of the previous construct, for example:
open FOO || die;
+=item Prototype mismatch: (%s) vs (%s)
+
+(S) The subroutine being defined had a predeclared (forward) declaration
+with a different function prototype.
+
=item Read on closed filehandle <%s>
(W) The filehandle you're reading from got itself closed sometime before now.
@@ -1742,7 +1762,7 @@ But before sort was a keyword, people sometimes used it as a filehandle.
=item Sort subroutine didn't return a numeric value
(F) A sort comparison routine must return a number. You probably blew
-it by not using C<E<lt>=E<gt> or C<cmp>, or by not using them correctly.
+it by not using C<E<lt>=E<gt>> or C<cmp>, or by not using them correctly.
See L<perlfunc/sort>.
=item Sort subroutine didn't return single value
@@ -1931,6 +1951,10 @@ certain type. Arrays must be @NAME or @{EXPR}. Hashes must be
(W) A umask of 222 is incorrect. It should be 0222, since octal literals
always start with 0 in Perl, as in C.
+=item Unable to create sub named "%s"
+
+(F) You attempted to create or access a subroutine with an illegal name.
+
=item Unbalanced context: %d more PUSHes than POPs
(W) The exit code detected an internal inconsistency in how many execution
@@ -1976,6 +2000,11 @@ or if it was, it has since been undefined.
(F) The sort comparison routine specified is declared but doesn't seem to
have been defined yet. See L<perlfunc/sort>.
+=item Undefined top format "%s" called
+
+(F) The format indicated doesn't seem to exist. Perhaps it's really in
+another package? See L<perlform>.
+
=item unexec of %s into %s failed!
(F) The unexec() routine failed for some reason. See your local FSF
@@ -2076,6 +2105,11 @@ from C. This usually means there's a better way to do it in Perl.
because there's a better way to do it, and also because the old way has
bad side effects.
+=item Use of bare << to mean <<"" is deprecated
+
+(D) You are now encouraged to use the explicitly quoted form if you
+wish to use a blank line as the terminator of the here-document.
+
=item Use of implicit split to @_ is deprecated
(D) It makes a lot of work for the compiler when you clobber a
@@ -2119,6 +2153,14 @@ a scalar context, the comma is treated like C's comma operator, which
throws away the left argument, which is not what you want. See
L<perlref> for more on this.
+=item Variable "%s" is not exported
+
+(F) While "use strict" in effect, you referred to a global variable
+that you apparently thought was imported from another module, because
+something else of the same name (usually a subroutine) is exported
+by that module. It usually means you put the wrong funny character
+on the front of your variable.
+
=item Warning: unable to close filehandle %s properly.
(S) The implicit close() done by an open() got an error indication on the
diff --git a/pod/perldsc.pod b/pod/perldsc.pod
new file mode 100644
index 0000000000..1d51af8ab3
--- /dev/null
+++ b/pod/perldsc.pod
@@ -0,0 +1,348 @@
+=head1 TITLE
+
+perldsc - Manipulating Complex Data Structures in Perl
+
+=head1 INTRODUCTION
+
+The single feature most sorely lacking in the Perl programming language
+prior to its 5.0 release was complex data structures. Even without direct
+language support, some valiant programmers did manage to emulate them, but
+it was hard work and not for the faint of heart. You could occasionally
+get away with the C<$m{$LoL,$b}> notation borrowed from I<awk> in which the
+keys are actually more like a single concatenated string C<"$LoL$b">, but
+traversal and sorting were difficult. More desperate programmers even
+hacked Perl's internal symbol table directly, a strategy that proved hard
+to develop and maintain--to put it mildly.
+
+The 5.0 release of Perl let us have complex data structures. You
+may now write something like this and all of a sudden, you'd have a array
+with three dimensions!
+
+ for $x (1 .. 10) {
+ for $y (1 .. 10) {
+ for $z (1 .. 10) {
+ $LoL[$x][$y][$z] =
+ $x ** $y + $z;
+ }
+ }
+ }
+
+Alas, however simple this may appear, underneath it's a much more
+elaborate construct than meets the eye!
+
+How do you print it out? Why can't you just say C<print @LoL>? How do
+you sort it? How can you pass it to a function or get one of these back
+from a function? Is is an object? Can you save it to disk to read
+back later? How do you access whole rows or columns of that matrix? Do
+all the values have to be numeric?
+
+As you see, it's quite easy to become confused. While some small portion
+of the blame for this can be attributed to the reference-based
+implementation, it's really more due to a lack of existing documentation with
+examples designed for the beginner.
+
+This document is meant to be a detailed but understandable treatment of
+the many different sorts of data structures you might want to develop. It should
+also serve as a cookbook of examples. That way, when you need to create one of these
+complex data structures, you can just pinch, pilfer, or purloin
+a drop-in example from here.
+
+Let's look at each of these possible constructs in detail. There are separate
+documents on each of the following:
+
+=over 5
+
+=item * arrays of arrays
+
+=item * hashes of arrays
+
+=item * arrays of hashes
+
+=item * hashes of hashes
+
+=item * more elaborate constructs
+
+=item * recursive and self-referential data structures
+
+=item * objects
+
+=back
+
+But for now, let's look at some of the general issues common to all
+of these types of data structures.
+
+=head1 REFERENCES
+
+The most important thing to understand about all data structures in Perl
+-- including multidimensional arrays--is that even though they might
+appear otherwise, Perl C<@ARRAY>s and C<%HASH>es are all internally
+one-dimensional. They can only hold scalar values (meaning a string,
+number, or a reference). They cannot directly contain other arrays or
+hashes, but instead contain I<references> to other arrays or hashes.
+
+You can't use a reference to a array or hash in quite the same way that
+you would a real array or hash. For C or C++ programmers unused to distinguishing
+between arrays and pointers to the same, this can be confusing. If so,
+just think of it as the difference between a structure and a pointer to a
+structure.
+
+You can (and should) read more about references in the perlref(1) man
+page. Briefly, references are rather like pointers that know what they
+point to. (Objects are also a kind of reference, but we won't be needing
+them right away--if ever.) That means that when you have something that
+looks to you like an access to two-or-more-dimensional array and/or hash,
+that what's really going on is that in all these cases, the base type is
+merely a one-dimensional entity that contains references to the next
+level. It's just that you can I<use> it as though it were a
+two-dimensional one. This is actually the way almost all C
+multidimensional arrays work as well.
+
+ $list[7][12] # array of arrays
+ $list[7]{string} # array of hashes
+ $hash{string}[7] # hash of arrays
+ $hash{string}{'another string'} # hash of hashes
+
+Now, because the top level only contains references, if you try to print
+out your array in with a simple print() function, you'll get something
+that doesn't look very nice, like this:
+
+ @LoL = ( [2, 3], [4, 5, 7], [0] );
+ print $LoL[1][2];
+ 7
+ print @LoL;
+ ARRAY(0x83c38)ARRAY(0x8b194)ARRAY(0x8b1d0)
+
+
+That's because Perl doesn't (ever) implicitly dereference your variables.
+If you want to get at the thing a reference is referring to, then you have
+to do this yourself using either prefix typing indicators, like
+C<${$blah}>, C<@{$blah}>, C<@{$blah[$i]}>, or else postfix pointer arrows,
+like C<$a-E<gt>[3]>, C<$h-E<gt>{fred}>, or even C<$ob-E<gt>method()-E<gt>[3]>.
+
+=head1 COMMON MISTAKES
+
+The two most common mistakes made in constructing something like
+an array of arrays is either accidentally counting the number of
+elements or else taking a reference to the same memory location
+repeatedly. Here's the case where you just get the count instead
+of a nested array:
+
+ for $i (1..10) {
+ @list = somefunc($i);
+ $LoL[$i] = @list; # WRONG!
+ }
+
+That's just the simple case of assigning a list to a scalar and getting
+its element count. If that's what you really and truly want, then you
+might do well to consider being a tad more explicit about it, like this:
+
+ for $i (1..10) {
+ @list = somefunc($i);
+ $counts[$i] = scalar @list;
+ }
+
+Here's the case of taking a reference to the same memory location
+again and again:
+
+ for $i (1..10) {
+ @list = somefunc($i);
+ $LoL[$i] = \@list; # WRONG!
+ }
+
+So, just what's the big problem with that? It looks right, doesn't it?
+After all, I just told you that you need an array of references, so by
+golly, you've made me one!
+
+Unfortunately, while this is true, it's still broken. All the references
+in @LoL refer to the I<very same place>, and they will therefore all hold
+whatever was last in @list! It's similar to the problem demonstrated in
+the following C program:
+
+ #include <pwd.h>
+ main() {
+ struct passwd *getpwnam(), *rp, *dp;
+ rp = getpwnam("root");
+ dp = getpwnam("daemon");
+
+ printf("daemon name is %s\nroot name is %s\n",
+ dp->pw_name, rp->pw_name);
+ }
+
+Which will print
+
+ daemon name is daemon
+ root name is daemon
+
+The problem is that both C<rp> and C<dp> are pointers to the same location
+in memory! In C, you'd have to remember to malloc() yourself some new
+memory. In Perl, you'll want to use the array constructor C<[]> or the
+hash constructor C<{}> instead. Here's the right way to do the preceding
+broken code fragments
+
+ for $i (1..10) {
+ @list = somefunc($i);
+ $LoL[$i] = [ @list ];
+ }
+
+The square brackets make a reference to a new array with a I<copy>
+of what's in @list at the time of the assignment. This is what
+you want.
+
+Note that this will produce something similar, but it's
+much harder to read:
+
+ for $i (1..10) {
+ @list = 0 .. $i;
+ @{$LoL[$i]} = @list;
+ }
+
+Is it the same? Well, maybe so--and maybe not. The subtle difference
+is that when you assign something in square brackets, you know for sure
+it's always a brand new reference with a new I<copy> of the data.
+Something else could be going on in this new case with the C<@{$LoL[$i]}}>
+dereference on the left-hand-side of the assignment. It all depends on
+whether C<$LoL[$i]> had been undefined to start with, or whether it
+already contained a reference. If you had already populated @LoL with
+references, as in
+
+ $LoL[3] = \@another_list;
+
+Then the assignment with the indirection on the left-hand-side would
+use the existing reference that was already there:
+
+ @{$LoL[3]} = @list;
+
+Of course, this I<would> have the "interesting" effect of clobbering
+@another_list. (Have you ever noticed how when a programmer says
+something is "interesting", that rather than meaning "intriguing",
+they're disturbingly more apt to mean that it's "annoying",
+"difficult", or both? :-)
+
+So just remember to always use the array or hash constructors with C<[]>
+or C<{}>, and you'll be fine, although it's not always optimally
+efficient.
+
+Surprisingly, the following dangerous-looking construct will
+actually work out fine:
+
+ for $i (1..10) {
+ my @list = somefunc($i);
+ $LoL[$i] = \@list;
+ }
+
+That's because my() is more of a run-time statement than it is a
+compile-time declaration I<per se>. This means that the my() variable is
+remade afresh each time through the loop. So even though it I<looks> as
+though you stored the same variable reference each time, you actually did
+not! This is a subtle distinction that can produce more efficient code at
+the risk of misleading all but the most experienced of programmers. So I
+usually advise against teaching it to beginners. In fact, except for
+passing arguments to functions, I seldom like to see the gimme-a-reference
+operator (backslash) used much at all in code. Instead, I advise
+beginners that they (and most of the rest of us) should try to use the
+much more easily understood constructors C<[]> and C<{}> instead of
+relying upon lexical (or dynamic) scoping and hidden reference-counting to
+do the right thing behind the scenes.
+
+In summary:
+
+ $LoL[$i] = [ @list ]; # usually best
+ $LoL[$i] = \@list; # perilous; just how my() was that list?
+ @{ $LoL[$i] } = @list; # way too tricky for most programmers
+
+
+=head1 CAVEAT ON PRECEDENCE
+
+Speaking of things like C<@{$LoL[$i]}>, the following are actually the
+same thing:
+
+ $listref->[2][2] # clear
+ $$listref[2][2] # confusing
+
+That's because Perl's precedence rules on its five prefix dereferencers
+(which look like someone swearing: C<$ @ * % &>) make them bind more
+tightly than the postfix subscripting brackets or braces! This will no
+doubt come as a great shock to the C or C++ programmer, who is quite
+accustomed to using C<*a[i]> to mean what's pointed to by the I<i'th>
+element of C<a>. That is, they first take the subscript, and only then
+dereference the thing at that subscript. That's fine in C, but this isn't C.
+
+The seemingly equivalent construct in Perl, C<$$listref[$i]> first does
+the deref of C<$listref>, making it take $listref as a reference to an
+array, and then dereference that, and finally tell you the I<i'th> value
+of the array pointed to by $LoL. If you wanted the C notion, you'd have to
+write C<${$LoL[$i]}> to force the C<$LoL[$i]> to get evaluated first
+before the leading C<$> dereferencer.
+
+=head1 WHY YOU SHOULD ALWAYS C<use strict>
+
+If this is starting to sound scarier than it's worth, relax. Perl has
+some features to help you avoid its most common pitfalls. The best
+way to avoid getting confused is to start every program like this:
+
+ #!/usr/bin/perl -w
+ use strict;
+
+This way, you'll be forced to declare all your variables with my() and
+also disallow accidental "symbolic dereferencing". Therefore if you'd done
+this:
+
+ my $listref = [
+ [ "fred", "barney", "pebbles", "bambam", "dino", ],
+ [ "homer", "bart", "marge", "maggie", ],
+ [ "george", "jane", "alroy", "judy", ],
+ ];
+
+ print $listref[2][2];
+
+The compiler would immediately flag that as an error I<at compile time>,
+because you were accidentally accessing C<@listref>, an undeclared
+variable, and it would thereby remind you to instead write:
+
+ print $listref->[2][2]
+
+=head1 DEBUGGING
+
+The standard Perl debugger in 5.001 doesn't do a very nice job of
+printing out complex data structures. However, the perl5db that
+Ilya Zakharevich E<lt>F<ilya@math.ohio-state.edu>E<gt>
+wrote, which is accessible at
+
+ ftp://ftp.perl.com/pub/perl/ext/perl5db-kit-0.9.tar.gz
+
+has several new features, including command line editing as well
+as the C<x> command to dump out complex data structures. For example,
+given the assignment to $LoL above, here's the debugger output:
+
+ DB<1> X $LoL
+ $LoL = ARRAY(0x13b5a0)
+ 0 ARRAY(0x1f0a24)
+ 0 'fred'
+ 1 'barney'
+ 2 'pebbles'
+ 3 'bambam'
+ 4 'dino'
+ 1 ARRAY(0x13b558)
+ 0 'homer'
+ 1 'bart'
+ 2 'marge'
+ 3 'maggie'
+ 2 ARRAY(0x13b540)
+ 0 'george'
+ 1 'jane'
+ 2 'alroy'
+ 3 'judy'
+
+There's also a lower-case B<x> command which is nearly the same.
+
+=head1 SEE ALSO
+
+perlref(1), perldata(1)
+
+=head1 AUTHOR
+
+Tom Christiansen E<lt>F<tchrist@perl.com>E<gt>
+
+Last update:
+Sat Oct 7 22:41:09 MDT 1995
+
diff --git a/pod/perlform.pod b/pod/perlform.pod
index 99e0746c1a..c4bb78cfcd 100644
--- a/pod/perlform.pod
+++ b/pod/perlform.pod
@@ -310,4 +310,5 @@ is to printf(), do this:
Lexical variables (declared with "my") are not visible within a
format unless the format is declared within the scope of the lexical
-variable. (They weren't visiblie at all before version 5.001.)
+variable. (They weren't visible at all before version 5.001.) See
+L<perlfunc/my> for other issues.
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index 42ec30fb55..2cc480cfe8 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -150,7 +150,9 @@ are found, it's a C<-B> file, otherwise it's a C<-T> file. Also, any file
containing null in the first block is considered a binary file. If C<-T>
or C<-B> is used on a filehandle, the current stdio buffer is examined
rather than the first block. Both C<-T> and C<-B> return TRUE on a null
-file, or a file at EOF when testing a filehandle.
+file, or a file at EOF when testing a filehandle. Because you have to
+read a file to do the C<-T> test, on most occasions you want to use a C<-f>
+against the file first, as in C<next unless -f $file && -T $file>.
If any of the file tests (or either the stat() or lstat() operators) are given the
special filehandle consisting of a solitary underline, then the stat
@@ -179,7 +181,7 @@ Returns the absolute value of its argument.
Accepts an incoming socket connect, just as the accept(2) system call
does. Returns the packed address if it succeeded, FALSE otherwise.
-See example in L<perlipc>.
+See example in L<perlipc/"Sockets: Client/Server Communication">.
=item alarm SECONDS
@@ -192,9 +194,10 @@ argument of 0 may be supplied to cancel the previous timer without
starting a new one. The returned value is the amount of time remaining
on the previous timer.
-For sleeps of finer granularity than one second, you may use Perl's
+For delays of finer granularity than one second, you may use Perl's
syscall() interface to access setitimer(2) if your system supports it,
-or else see L</select()> below.
+or else see L</select()> below. It is not advised to intermix alarm()
+and sleep() calls.
=item atan2 Y,X
@@ -204,8 +207,8 @@ Returns the arctangent of Y/X in the range -PI to PI.
Binds a network address to a socket, just as the bind system call
does. Returns TRUE if it succeeded, FALSE otherwise. NAME should be a
-packed address of the appropriate type for the socket. See example in
-L<perlipc>.
+packed address of the appropriate type for the socket. See the examples in
+L<perlipc/"Sockets: Client/Server Communication">.
=item binmode FILEHANDLE
@@ -213,18 +216,21 @@ Arranges for the file to be read or written in "binary" mode in
operating systems that distinguish between binary and text files.
Files that are not in binary mode have CR LF sequences translated to LF
on input and LF translated to CR LF on output. Binmode has no effect
-under Unix; in DOS, it may be imperative. If FILEHANDLE is an expression,
+under Unix; in DOS, it may be imperative--otherwise your DOS C library
+may mangle your file. If FILEHANDLE is an expression,
the value is taken as the name of the filehandle.
-=item bless REF,PACKAGE
+=item bless REF,CLASSNAME
=item bless REF
This function tells the referenced object (passed as REF) that it is now
-an object in PACKAGE--or the current package if no PACKAGE is specified,
-which is the usual case. It returns the reference for convenience, since
-a bless() is often the last thing in a constructor. See L<perlobj> for
-more about the blessing (and blessings) of objects.
+an object in the CLASSNAME package--or the current package if no CLASSNAME
+is specified, which is often the case. It returns the reference for
+convenience, since a bless() is often the last thing in a constructor.
+Always use the two-argument version if the function doing the blessing
+might be inherited by a derived class. See L<perlobj> for more about the
+blessing (and blessings) of objects.
=item caller EXPR
@@ -244,7 +250,7 @@ to go back before the current one.
$subroutine, $hasargs, $wantargs) = caller($i);
Furthermore, when called from within the DB package, caller returns more
-detailed information: it sets sets the list variable @DB:args to be the
+detailed information: it sets the list variable @DB::args to be the
arguments with which that subroutine was invoked.
=item chdir EXPR
@@ -256,8 +262,8 @@ otherwise. See example under die().
=item chmod LIST
Changes the permissions of a list of files. The first element of the
-list must be the numerical mode. Returns the number of files
-successfully changed.
+list must be the numerical mode, which should probably be an octal
+number. Returns the number of files successfully changed.
$cnt = chmod 0755, 'foo', 'bar';
chmod 0755, @executables;
@@ -342,6 +348,11 @@ Here's an example that looks up non-numeric uids in the passwd file:
@ary = <${pattern}>; # expand filenames
chown $uid, $gid, @ary;
+On most systems, you are not allowed to change the ownership of the
+file unless you're the superuser, although you should be able to change
+the group to any of your secondary groups. On insecure systems, these
+restrictions may be relaxed, but this is not a portable assumption.
+
=item chr NUMBER
Returns the character represented by that NUMBER in the character set.
@@ -349,16 +360,19 @@ For example, C<chr(65)> is "A" in ASCII.
=item chroot FILENAME
-Does the same as the system call of that name. If you don't know what
-it does, don't worry about it. If FILENAME is omitted, does chroot to
-$_.
+This function works as the system call by the same name: it makes the
+named directory the new root directory for all further pathnames that
+begin with a "/" by your process and all of its children. (It doesn't
+change your current working directory is unaffected.) For security
+reasons, this call is restricted to the superuser. If FILENAME is
+omitted, does chroot to $_.
=item close FILEHANDLE
Closes the file or pipe associated with the file handle, returning TRUE
only if stdio successfully flushes buffers and closes the system file
descriptor. You don't have to close FILEHANDLE if you are immediately
-going to do another open on it, since open will close it for you. (See
+going to do another open() on it, since open() will close it for you. (See
open().) However, an explicit close on an input file resets the line
counter ($.), while the implicit close done by open() does not. Also,
closing a pipe will wait for the process executing on the pipe to
@@ -381,8 +395,8 @@ Closes a directory opened by opendir().
Attempts to connect to a remote socket, just as the connect system call
does. Returns TRUE if it succeeded, FALSE otherwise. NAME should be a
-packed address of the appropriate type for the socket. See example in
-L<perlipc>.
+packed address of the appropriate type for the socket. See the examples in
+L<perlipc/"Sockets: Client/Server Communication">.
=item cos EXPR
@@ -391,9 +405,11 @@ takes cosine of $_.
=item crypt PLAINTEXT,SALT
-Encrypts a string exactly like the crypt(3) function in the C library.
-Useful for checking the password file for lousy passwords, amongst
-other things. Only the guys wearing white hats should do this.
+Encrypts a string exactly like the crypt(3) function in the C library
+(assuming that you actually have a version there that has not been
+extirpated as a potential munition). This can prove useful for checking
+the password file for lousy passwords, amongst other things. Only the
+guys wearing white hats should do this.
Here's an example that makes sure that whoever runs this program knows
their own password:
@@ -426,15 +442,16 @@ Breaks the binding between a DBM file and an associative array.
[This function has been superseded by the tie() function.]
-This binds a dbm(3) or ndbm(3) file to an associative array. ASSOC is the
+This binds a dbm(3), ndbm(3), sdbm(3), gdbm(), or Berkeley DB file to an associative array. ASSOC is the
name of the associative array. (Unlike normal open, the first argument
is I<NOT> a filehandle, even though it looks like one). DBNAME is the
-name of the database (without the F<.dir> or F<.pag> extension). If the
+name of the database (without the F<.dir> or F<.pag> extension if any). If the
database does not exist, it is created with protection specified by
MODE (as modified by the umask()). If your system only supports the
older DBM functions, you may perform only one dbmopen() in your program.
-If your system has neither DBM nor ndbm, calling dbmopen() produces a
-fatal error.
+In order versions of Perl,
+if your system had neither DBM nor ndbm, calling dbmopen() produced a
+fatal error; it now falls back to sdbm(3).
If you don't have write access to the DBM file, you can only read
associative array variables, not set them. If you want to test whether
@@ -452,6 +469,8 @@ function to iterate over large DBM files. Example:
}
dbmclose(%HIST);
+See also L<DB_File> for many other interesting possibilities.
+
=item defined EXPR
Returns a boolean value saying whether the lvalue EXPR has a real value
@@ -501,10 +520,11 @@ a hash key lookup:
=item die LIST
Outside of an eval(), prints the value of LIST to C<STDERR> and exits with
-the current value of $! (errno). If $! is 0, exits with the value of
+the current value of $! (errno). If $! is 0, exits with the value of
C<($? E<gt>E<gt> 8)> (backtick `command` status). If C<($? E<gt>E<gt> 8)> is 0,
exits with 255. Inside an eval(), the error message is stuffed into C<$@>,
-and the eval() is terminated with the undefined value.
+and the eval() is terminated with the undefined value; this makes die()
+the way to raise an exception.
Equivalent examples:
@@ -558,7 +578,8 @@ reparse the file every time you call it, so you probably don't want to
do this inside a loop.
Note that inclusion of library modules is better done with the
-use() and require() operators.
+use() and require() operators, which also do error checking
+and raise an exception if there's a problem.
=item dump LABEL
@@ -595,7 +616,7 @@ Example:
=item each ASSOC_ARRAY
-Returns a 2 element array consisting of the key and value for the next
+Returns a 2-element array consisting of the key and value for the next
value of an associative array, so that you can iterate over it.
Entries are returned in an apparently random order. When the array is
entirely read, a null array is returned (which when assigned produces a
@@ -615,6 +636,8 @@ See also keys() and values().
=item eof FILEHANDLE
+=item eof ()
+
=item eof
Returns 1 if the next read on FILEHANDLE will return end of file, or if
@@ -627,7 +650,7 @@ as terminals may lose the end-of-file condition if you do.
An C<eof> without an argument uses the last file read as argument.
Empty parentheses () may be used to indicate
-the pseudo file formed of the files listed on the command line, i.e.
+the pseudofile formed of the files listed on the command line, i.e.
C<eof()> is reasonable to use inside a while (<>) loop to detect the end
of only the last file. Use C<eof(ARGV)> or eof without the parentheses to
test I<EACH> file in a while (<>) loop. Examples:
@@ -649,7 +672,7 @@ test I<EACH> file in a while (<>) loop. Examples:
}
Practical hint: you almost never need to use C<eof> in Perl, because the
-input operators return undef when they run out of data.
+input operators return undef when they run out of data. Testing C<eof>
=item eval EXPR
@@ -668,7 +691,7 @@ string. If EXPR is omitted, evaluates $_. The final semicolon, if
any, may be omitted from the expression.
Note that, since eval() traps otherwise-fatal errors, it is useful for
-determining whether a particular feature (such as dbmopen() or symlink())
+determining whether a particular feature (such as socket() or symlink())
is implemented. It is also Perl's exception trapping mechanism, where
the die operator is used to raise exceptions.
@@ -797,10 +820,12 @@ value is taken as the name of the filehandle.
=item flock FILEHANDLE,OPERATION
-Calls flock(2) on FILEHANDLE. See L<flock(2)> for
-definition of OPERATION. Returns TRUE for success, FALSE on failure.
-Will produce a fatal error if used on a machine that doesn't implement
-flock(2). Here's a mailbox appender for BSD systems.
+Calls flock(2) on FILEHANDLE. See L<flock(2)> for definition of
+OPERATION. Returns TRUE for success, FALSE on failure. Will produce a
+fatal error if used on a machine that doesn't implement either flock(2) or
+fcntl(2). (fcntl(2) will be automatically used if flock(2) is missing.)
+
+Here's a mailbox appender for BSD systems.
$LOCK_SH = 1;
$LOCK_EX = 2;
@@ -825,13 +850,13 @@ flock(2). Here's a mailbox appender for BSD systems.
print MBOX $msg,"\n\n";
unlock();
-Note that flock() can't lock things over the network. You need to do
-locking with fcntl() for that.
+Note that many versions of flock() cannot lock things over the network.
+You need to do locking with fcntl() for that.
=item fork
Does a fork(2) system call. Returns the child pid to the parent process
-and 0 to the child process, or undef if the fork is unsuccessful.
+and 0 to the child process, or C<undef> if the fork is unsuccessful.
Note: unflushed buffers remain unflushed in both processes, which means
you may need to set C<$|> ($AUTOFLUSH in English) or call the
autoflush() FileHandle method to avoid duplicate output.
@@ -839,7 +864,7 @@ autoflush() FileHandle method to avoid duplicate output.
If you fork() without ever waiting on your children, you will accumulate
zombies:
- $SIG{'CHLD'} = sub { wait };
+ $SIG{CHLD} = sub { wait };
There's also the double-fork trick (error checking on
fork() returns omitted);
@@ -849,7 +874,7 @@ fork() returns omitted);
exec "what you really wanna do";
die "no exec";
# ... or ...
- some_perl_code_here;
+ ## (some_perl_code_here)
exit 0;
}
exit 0;
@@ -859,21 +884,22 @@ fork() returns omitted);
=item formline PICTURE, LIST
-This is an internal function used by formats, though you may call it
+This is an internal function used by C<format>s, though you may call it
too. It formats (see L<perlform>) a list of values according to the
contents of PICTURE, placing the output into the format output
-accumulator, C<$^A>. Eventually, when a write() is done, the contents of
+accumulator, C<$^A> (or $ACCUMULATOR in English).
+Eventually, when a write() is done, the contents of
C<$^A> are written to some filehandle, but you could also read C<$^A>
yourself and then set C<$^A> back to "". Note that a format typically
does one formline() per line of form, but the formline() function itself
doesn't care how many newlines are embedded in the PICTURE. This means
-that the ~ and ~~ tokens will treat the entire PICTURE as a single line.
+that the C<~> and C<~~> tokens will treat the entire PICTURE as a single line.
You may therefore need to use multiple formlines to implement a single
record format, just like the format compiler.
Be careful if you put double quotes around the picture, since an "C<@>"
character may be taken to mean the beginning of an array name.
-formline() always returns TRUE.
+formline() always returns TRUE. See L<perlform> for other examples.
=item getc FILEHANDLE
@@ -881,27 +907,55 @@ formline() always returns TRUE.
Returns the next character from the input file attached to FILEHANDLE,
or a null string at end of file. If FILEHANDLE is omitted, reads from STDIN.
+This is not particularly efficient. It cannot be used to get unbuffered
+single-character
+
+ if ($BSD_STYLE) {
+ system "stty cbreak </dev/tty >/dev/tty 2>&1";
+ }
+ else {
+ system "stty", '-icanon',
+ system "stty", 'eol', "\001";
+ }
+
+ $key = getc(STDIN);
+
+ if ($BSD_STYLE) {
+ system "stty -cbreak </dev/tty >/dev/tty 2>&1";
+ }
+ else {
+ system "stty", 'icanon';
+ system "stty", 'eol', '^@'; # ascii null
+ }
+ print "\n";
+
+Determination of whether to whether $BSD_STYLE should be set
+is left as an exercise to the reader.
=item getlogin
Returns the current login from F</etc/utmp>, if any. If null, use
-getpwuid().
+getpwuid().
$login = getlogin || (getpwuid($<))[0] || "Kilroy";
+Do not consider getlogin() for authorentication: it is not as
+secure as getpwuid().
+
=item getpeername SOCKET
Returns the packed sockaddr address of other end of the SOCKET connection.
- # An internet sockaddr
- $sockaddr = 'S n a4 x8';
- $hersockaddr = getpeername(S);
- ($family, $port, $heraddr) = unpack($sockaddr,$hersockaddr);
+ use Socket;
+ $hersockaddr = getpeername(SOCK);
+ ($port, $iaddr) = unpack_sockaddr_in($hersockaddr);
+ $herhostname = gethostbyaddr($iaddr, AF_INET);
+ $herstraddr = inet_ntoa($iaddr);
=item getpgrp PID
Returns the current process group for the specified PID, 0 for the
-current process. Will produce a fatal error if used on a machine that
+current process. Will raise an exception if used on a machine that
doesn't implement getpgrp(2). If PID is omitted, returns process
group of current process.
@@ -911,8 +965,8 @@ Returns the process id of the parent process.
=item getpriority WHICH,WHO
-Returns the current priority for a process, a process group, or a
-user. (See L<getpriority(2)>.) Will produce a fatal error if used on a
+Returns the current priority for a process, a process group, or a user.
+(See L<getpriority(2)>.) Will raise a fatal exception if used on a
machine that doesn't implement getpriority(2).
=item getpwnam NAME
@@ -1017,11 +1071,9 @@ by saying something like:
Returns the packed sockaddr address of this end of the SOCKET connection.
- # An internet sockaddr
- $sockaddr = 'S n a4 x8';
- $mysockaddr = getsockname(S);
- ($family, $port, $myaddr) =
- unpack($sockaddr,$mysockaddr);
+ use Socket;
+ $mysockaddr = getsockname(SOCK);
+ ($port, $myaddr) = unpack_sockaddr_in($mysockaddr);
=item getsockopt SOCKET,LEVEL,OPTNAME
@@ -1031,13 +1083,13 @@ Returns the socket option requested, or undefined if there is an error.
Returns the value of EXPR with filename expansions such as a shell
would do. This is the internal function implementing the <*.*>
-operator.
+operator, except it's easier to use.
=item gmtime EXPR
Converts a time as returned by the time function to a 9-element array
-with the time localized for the Greenwich timezone. Typically used as
-follows:
+with the time localized for the standard Greenwich timezone.
+Typically used as follows:
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
@@ -1098,25 +1150,25 @@ array.
=item hex EXPR
-Returns the decimal value of EXPR interpreted as an hex string. (To
-interpret strings that might start with 0 or 0x see oct().) If EXPR is
-omitted, uses $_.
+Interprets EXPR as a hex string and returns the corresponding decimal
+value. (To convert strings that might start with 0 or 0x see
+oct().) If EXPR is omitted, uses $_.
=item import
There is no built-in import() function. It is merely an ordinary
-method subroutine defined (or inherited) by modules that wish to export
+method (subroutine) defined (or inherited) by modules that wish to export
names to another module. The use() function calls the import() method
-for the package used. See also L</use> and L<perlmod>.
+for the package used. See also L</use>, L<perlmod>, and L<Exporter>.
=item index STR,SUBSTR,POSITION
=item index STR,SUBSTR
-Returns the position of the first occurrence of SUBSTR in STR at or
-after POSITION. If POSITION is omitted, starts searching from the
-beginning of the string. The return value is based at 0, or whatever
-you've set the $[ variable to. If the substring is not found, returns
+Returns the position of the first occurrence of SUBSTR in STR at or after
+POSITION. If POSITION is omitted, starts searching from the beginning of
+the string. The return value is based at 0 (or whatever you've set the $[
+variable to--but don't do that). If the substring is not found, returns
one less than the base, ordinarily -1.
=item int EXPR
@@ -1127,28 +1179,30 @@ Returns the integer portion of EXPR. If EXPR is omitted, uses $_.
Implements the ioctl(2) function. You'll probably have to say
- require "ioctl.ph"; # probably /usr/local/lib/perl/ioctl.ph
+ require "ioctl.ph"; # probably in /usr/local/lib/perl/ioctl.ph
-first to get the correct function definitions. If ioctl.ph doesn't
+first to get the correct function definitions. If F<ioctl.ph> doesn't
exist or doesn't have the correct definitions you'll have to roll your
-own, based on your C header files such as <sys/ioctl.h>. (There is a
-Perl script called B<h2ph> that comes with the Perl kit which may help you
-in this.) SCALAR will be read and/or written depending on the
-FUNCTION--a pointer to the string value of SCALAR will be passed as the
-third argument of the actual ioctl call. (If SCALAR has no string
-value but does have a numeric value, that value will be passed rather
-than a pointer to the string value. To guarantee this to be TRUE, add
-a 0 to the scalar before using it.) The pack() and unpack() functions
-are useful for manipulating the values of structures used by ioctl().
-The following example sets the erase character to DEL.
+own, based on your C header files such as F<E<lt>sys/ioctl.hE<gt>>.
+(There is a Perl script called B<h2ph> that comes with the Perl kit which
+may help you in this, but it's non-trivial.) SCALAR will be read and/or
+written depending on the FUNCTION--a pointer to the string value of SCALAR
+will be passed as the third argument of the actual ioctl call. (If SCALAR
+has no string value but does have a numeric value, that value will be
+passed rather than a pointer to the string value. To guarantee this to be
+TRUE, add a 0 to the scalar before using it.) The pack() and unpack()
+functions are useful for manipulating the values of structures used by
+ioctl(). The following example sets the erase character to DEL.
require 'ioctl.ph';
+ $getp = &TIOCGETP;
+ die "NO TIOCGETP" if $@ || !$getp;
$sgttyb_t = "ccccs"; # 4 chars and a short
- if (ioctl(STDIN,$TIOCGETP,$sgttyb)) {
+ if (ioctl(STDIN,$getp,$sgttyb)) {
@ary = unpack($sgttyb_t,$sgttyb);
$ary[2] = 127;
$sgttyb = pack($sgttyb_t,@ary);
- ioctl(STDIN,$TIOCSETP,$sgttyb)
+ ioctl(STDIN,&TIOCSETP,$sgttyb)
|| die "Can't ioctl: $!";
}
@@ -1197,20 +1251,27 @@ or how about sorted by key:
print $key, '=', $ENV{$key}, "\n";
}
+To sort an array by value, you'll need to use a C<sort{}>
+function. Here's a descending numeric sort by value:
+
+ foreach $key (sort { $hash{$b} <=> $hash{$a} } keys %hash)) {
+ printf "%4d %s\n", $hash{$key}, $key;
+ }
+
=item kill LIST
-Sends a signal to a list of processes. The first element of the list
-must be the signal to send. Returns the number of processes
-successfully signaled.
+Sends a signal to a list of processes. The first element of
+the list must be the signal to send. Returns the number of
+processes successfully signaled.
$cnt = kill 1, $child1, $child2;
kill 9, @goners;
-Unlike in the shell, in Perl
-if the I<SIGNAL> is negative, it kills process groups instead of processes.
-(On System V, a negative I<PROCESS> number will also kill process
-groups, but that's not portable.) That means you usually want to use
-positive not negative signals. You may also use a signal name in quotes.
+Unlike in the shell, in Perl if the I<SIGNAL> is negative, it kills
+process groups instead of processes. (On System V, a negative I<PROCESS>
+number will also kill process groups, but that's not portable.) That
+means you usually want to use positive not negative signals. You may also
+use a signal name in quotes. See the L<perlipc/"Signals"> man page for details.
=item last LABEL
@@ -1221,20 +1282,22 @@ loops); it immediately exits the loop in question. If the LABEL is
omitted, the command refers to the innermost enclosing loop. The
C<continue> block, if any, is not executed:
- line: while (<STDIN>) {
- last line if /^$/; # exit when done with header
+ LINE: while (<STDIN>) {
+ last LINE if /^$/; # exit when done with header
...
}
=item lc EXPR
Returns an lowercased version of EXPR. This is the internal function
-implementing the \L escape in double-quoted strings.
+implementing the \L escape in double-quoted strings.
+Should respect any POSIX setlocale() settings.
=item lcfirst EXPR
Returns the value of EXPR with the first character lowercased. This is
the internal function implementing the \l escape in double-quoted strings.
+Should respect any POSIX setlocale() settings.
=item length EXPR
@@ -1249,7 +1312,7 @@ success, 0 otherwise.
=item listen SOCKET,QUEUESIZE
Does the same thing that the listen system call does. Returns TRUE if
-it succeeded, FALSE otherwise. See example in L<perlipc>.
+it succeeded, FALSE otherwise. See example in L<perlipc/"Sockets: Client/Server Communication">.
=item local EXPR
@@ -1371,13 +1434,13 @@ may produce zero, one, or more elements in the returned value.
translates a list of numbers to the corresponding characters. And
- %hash = map {&key($_), $_} @array;
+ %hash = map { getkey($_) => $_ } @array;
is just a funny way to write
%hash = ();
foreach $_ (@array) {
- $hash{&key($_)} = $_;
+ $hash{getkey($_)} = $_;
}
=item mkdir FILENAME,MODE
@@ -1388,14 +1451,14 @@ it returns 0 and sets $! (errno).
=item msgctl ID,CMD,ARG
-Calls the System V IPC function msgctl. If CMD is &IPC_STAT, then ARG
+Calls the System V IPC function msgctl(2). If CMD is &IPC_STAT, then ARG
must be a variable which will hold the returned msqid_ds structure.
Returns like ioctl: the undefined value for error, "0 but true" for
zero, or the actual return value otherwise.
=item msgget KEY,FLAGS
-Calls the System V IPC function msgget. Returns the message queue id,
+Calls the System V IPC function msgget(2). Returns the message queue id,
or the undefined value if there is an error.
=item msgsnd ID,MSG,FLAGS
@@ -1417,18 +1480,20 @@ an error.
=item my EXPR
A "my" declares the listed variables to be local (lexically) to the
-enclosing block, subroutine, eval or "do". If more than one value is
+enclosing block, subroutine, C<eval>, or C<do/require/use>'d file. If more than one value is
listed, the list must be placed in parens. All the listed elements
must be legal lvalues. Only alphanumeric identifiers may be lexically
scoped--magical builtins like $/ must be localized with "local"
-instead. In particular, you're not allowed to say
+instead. You also cannot use my() on a package variable.
+In particular, you're not allowed to say
- my $_; # Illegal.
+ my $_; # Illegal!
+ my $pack::$var; # Illegal!
Unlike the "local" declaration, variables declared with "my"
are totally hidden from the outside world, including any called
subroutines (even if it's the same subroutine--every call gets its own
-copy).
+copy).
(An eval(), however, can see the lexical variables of the scope it is
being evaluated in so long as the names aren't hidden by declarations within
@@ -1505,6 +1570,40 @@ block must either refer to a lexical variable, or must be fully
qualified with the package name. A compilation error results
otherwise. An inner block may countermand this with S<"no strict 'vars'">.
+Variables declared with "my" are not part of any package and
+are therefore never fully qualified with the package name.
+However, you may declare a "my" variable at the outer most
+scope of a file to totally hide any such identifiers from the
+outside world. This is similar to a C's static variables
+at the file level. To do this with a subroutine requires the
+use of a closure (anonymous function):
+
+ my $secret_version = '1.001-beta';
+ my $secret_sub = { print $secret_version };
+ &$secret_sub();
+
+This does not work with object methods, however;
+all object methods have to be in the symbol table of some
+package to be found.
+
+Just because the "my" variable is lexically scoped doesn't mean that
+within a function it works like a C static. Here's a mechanism for giving
+a function private variables with both lexical scoping and a static
+lifetime.
+
+ #!/usr/bin/perl -l
+ $var = "global";
+ { my $count = 0;
+ my $var = "static";
+ sub foo {
+ $count++;
+ print "$var (call # $count)";
+ }
+ }
+ print $var; foo();
+ print $var; foo();
+ print $var; foo();
+
=item next LABEL
=item next
@@ -1512,8 +1611,8 @@ otherwise. An inner block may countermand this with S<"no strict 'vars'">.
The C<next> command is like the C<continue> statement in C; it starts
the next iteration of the loop:
- line: while (<STDIN>) {
- next line if /^#/; # discard comments
+ LINE: while (<STDIN>) {
+ next LINE if /^#/; # discard comments
...
}
@@ -1527,10 +1626,10 @@ See the "use" function, which "no" is the opposite of.
=item oct EXPR
-Returns the decimal value of EXPR interpreted as an octal string. (If
-EXPR happens to start off with 0x, interprets it as a hex string
-instead.) The following will handle decimal, octal, and hex in the
-standard Perl or C notation:
+Interprets EXPR as an octal string and returns the corresponding
+decimal value. (If EXPR happens to start off with 0x, interprets it as
+a hex string instead.) The following will handle decimal, octal, and
+hex in the standard Perl or C notation:
$val = oct($val) if $val =~ /^0/;
@@ -1541,21 +1640,23 @@ If EXPR is omitted, uses $_.
=item open FILEHANDLE
Opens the file whose filename is given by EXPR, and associates it with
-FILEHANDLE. If FILEHANDLE is an expression, its value is used as the
-name of the real filehandle wanted. If EXPR is omitted, the scalar
-variable of the same name as the FILEHANDLE contains the filename. If
-the filename begins with "<" or nothing, the file is opened for input.
-If the filename begins with ">", the file is opened for output. If the
-filename begins with ">>", the file is opened for appending. (You can
-put a '+' in front of the '>' or '<' to indicate that you want both
-read and write access to the file.) If the filename begins with "|",
-the filename is interpreted as a command to which output is to be
-piped, and if the filename ends with a "|", the filename is interpreted
-as command which pipes input to us. (You may not have a command that
-pipes both in and out.) Opening '-' opens STDIN and opening '>-'
-opens STDOUT. Open returns non-zero upon success, the undefined
-value otherwise. If the open involved a pipe, the return value happens
-to be the pid of the subprocess. Examples:
+FILEHANDLE. If FILEHANDLE is an expression, its value is used as the name
+of the real filehandle wanted. If EXPR is omitted, the scalar variable of
+the same name as the FILEHANDLE contains the filename. If the filename
+begins with "<" or nothing, the file is opened for input. If the filename
+begins with ">", the file is opened for output. If the filename begins
+with ">>", the file is opened for appending. (You can put a '+' in front
+of the '>' or '<' to indicate that you want both read and write access to
+the file.) If the filename begins with "|", the filename is interpreted
+as a command to which output is to be piped, and if the filename ends with
+a "|", the filename is interpreted See L<perlipc/"Using open() for IPC">
+for more examples of this. as command which pipes input to us. (You may
+not have a command that pipes both in and out, but see See L<open2>,
+L<open3>, and L<perlipc/"Bidirectional Communication"> for alternatives.)
+Opening '-' opens STDIN and opening '>-' opens STDOUT. Open returns
+non-zero upon success, the undefined value otherwise. If the open
+involved a pipe, the return value happens to be the pid of the
+subprocess. Examples:
$ARTICLE = 100;
open ARTICLE or die "Can't find article $ARTICLE: $!\n";
@@ -1563,9 +1664,9 @@ to be the pid of the subprocess. Examples:
open(LOG, '>>/usr/spool/news/twitlog'); # (log is reserved)
- open(article, "caesar <$article |"); # decrypt article
+ open(ARTICLE, "caesar <$article |"); # decrypt article
- open(extract, "|sort >/tmp/Tmp$$"); # $$ is our process id
+ open(EXTRACT, "|sort >/tmp/Tmp$$"); # $$ is our process id
# process argument list of files along with any includes
@@ -1622,7 +1723,8 @@ STDERR:
If you specify "<&=N", where N is a number, then Perl will do an
-equivalent of C's fdopen() of that file descriptor. For example:
+equivalent of C's fdopen() of that file descriptor; this is more
+parsimonious of file descriptors. For example:
open(FILEHANDLE, "<&=$fd")
@@ -1636,8 +1738,8 @@ In the child process the filehandle isn't opened--i/o happens from/to
the new STDOUT or STDIN. Typically this is used like the normal
piped open when you want to exercise more control over just how the
pipe command gets executed, such as when you are running setuid, and
-don't want to have to scan shell commands for metacharacters. The
-following pairs are more or less equivalent:
+don't want to have to scan shell commands for metacharacters.
+The following pairs are more or less equivalent:
open(FOO, "|tr '[a-z]' '[A-Z]'");
open(FOO, "|-") || exec 'tr', '[a-z]', '[A-Z]';
@@ -1645,6 +1747,8 @@ following pairs are more or less equivalent:
open(FOO, "cat -n '$file'|");
open(FOO, "-|") || exec 'cat', '-n', $file;
+See L<perlipc/"Safe Pipe Opens"> for more examples of this.
+
Explicitly closing any piped filehandle causes the parent process to
wait for the child to finish, and returns the status value in $?.
Note: on any operation which may do a fork, unflushed buffers remain
@@ -1770,6 +1874,9 @@ unless you are very careful. In addition, note that Perl's pipes use
stdio buffering, so you may need to set $| to flush your WRITEHANDLE
after each command, depending on the application.
+See L<open2>, L<open3>, and L<perlipc/"Bidirectional Communication">
+for examples of such things.
+
=item pop ARRAY
Pops and returns the last value of the array, shortening the array by
@@ -1781,7 +1888,7 @@ If there are no elements in the array, returns the undefined value.
=item pos SCALAR
-Returns the offset of where the last m//g search left off for the variable
+Returns the offset of where the last C<m//g> search left off for the variable
in question. May be modified to change that offset.
=item print FILEHANDLE LIST
@@ -1807,6 +1914,12 @@ keyword with a left parenthesis unless you want the corresponding right
parenthesis to terminate the arguments to the print--interpose a + or
put parens around all the arguments.
+Note that if you're storing FILEHANDLES in an array or other expression,
+you will have to use a block returning its value instead
+
+ print { $files[$i] } "stuff\n";
+ print { $OK ? STDOUT : STDERR } "stuff\n";
+
=item printf FILEHANDLE LIST
=item printf LIST
@@ -1891,7 +2004,8 @@ data into variable SCALAR from the specified SOCKET filehandle.
Actually does a C recvfrom(), so that it can returns the address of the
sender. Returns the undefined value if there's an error. SCALAR will
be grown or shrunk to the length actually read. Takes the same flags
-as the system call of the same name.
+as the system call of the same name.
+See L<perlipc/"UDP: Message Passing"> for examples.
=item redo LABEL
@@ -1905,7 +2019,7 @@ themselves about what was just input:
# a simpleminded Pascal comment stripper
# (warning: assumes no { or } in strings)
- line: while (<STDIN>) {
+ LINE: while (<STDIN>) {
while (s|({.*}.*){.*}|$1 |) {}
s|{.*}| |;
if (s|{.*| |) {
@@ -1913,7 +2027,7 @@ themselves about what was just input:
while (<STDIN>) {
if (/}/) { # end of comment?
s|^|$front{|;
- redo line;
+ redo LINE;
}
}
}
@@ -2022,7 +2136,7 @@ so anymore you probably want to use them instead. See L</my>.
=item return LIST
Returns from a subroutine or eval with the value specified. (Note that
-in the absence of a return a subroutine or eval will automatically
+in the absence of a return a subroutine or eval() will automatically
return the value of the last expression evaluated.)
=item reverse LIST
@@ -2030,7 +2144,12 @@ return the value of the last expression evaluated.)
In a list context, returns a list value consisting of the elements
of LIST in the opposite order. In a scalar context, returns a string
value consisting of the bytes of the first element of LIST in the
-opposite order.
+opposite order.
+
+ print reverse <>; # line tac
+
+ undef $/;
+ print scalar reverse scalar <>; # byte tac
=item rewinddir DIRHANDLE
@@ -2067,7 +2186,7 @@ call of stdio. FILEHANDLE may be an expression whose value gives the name
of the filehandle. The values for WHENCE are 0 to set the file pointer to
POSITION, 1 to set the it to current plus POSITION, and 2 to set it to EOF
plus offset. You may use the values SEEK_SET, SEEK_CUR, and SEEK_END for
-this is using the POSIX module. Returns 1 upon success, 0 otherwise.
+this from POSIX module. Returns 1 upon success, 0 otherwise.
=item seekdir DIRHANDLE,POS
@@ -2098,15 +2217,15 @@ actual filehandle. Thus:
$oldfh = select(STDERR); $| = 1; select($oldfh);
-With Perl 5, filehandles are objects with methods, and the last example
-is preferably written
+Some programmers may prefer to think of filehandles as objects with
+methods, preferring to write the last example as:
use FileHandle;
STDERR->autoflush(1);
=item select RBITS,WBITS,EBITS,TIMEOUT
-This calls the select system(2) call with the bitmasks specified, which
+This calls the select(2) system call with the bitmasks specified, which
can be constructed using fileno() and vec(), along these lines:
$rin = $win = $ein = '';
@@ -2125,7 +2244,7 @@ subroutine:
}
$bits;
}
- $rin = &fhbits('STDIN TTY SOCK');
+ $rin = fhbits('STDIN TTY SOCK');
The usual idiom is:
@@ -2141,7 +2260,7 @@ in seconds, which may be fractional. Note: not all implementations are
capable of returning the $timeleft. If not, they always return
$timeleft equal to the supplied $timeout.
-You can effect a 250 microsecond sleep this way:
+You can effect a 250-microsecond sleep this way:
select(undef, undef, undef, 0.25);
@@ -2183,6 +2302,7 @@ of the same name. On unconnected sockets you must specify a
destination to send TO, in which case it does a C sendto(). Returns
the number of characters sent, or the undefined value if there is an
error.
+See L<perlipc/"UDP: Message Passing"> for examples.
=item setpgrp PID,PGRP
@@ -2265,7 +2385,7 @@ always sleep the full amount.
Opens a socket of the specified kind and attaches it to filehandle
SOCKET. DOMAIN, TYPE and PROTOCOL are specified the same as for the
system call of the same name. You should "use Socket;" first to get
-the proper definitions imported. See the example in L<perlipc>.
+the proper definitions imported. See the example in L<perlipc/"Sockets: Client/Server Communication">.
=item socketpair SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL
@@ -2377,15 +2497,14 @@ using C<??> as the pattern delimiters, but it still returns the array
value.) The use of implicit split to @_ is deprecated, however.
If EXPR is omitted, splits the $_ string. If PATTERN is also omitted,
-splits on whitespace (after skipping any leading whitespace).
-Anything matching PATTERN is taken
-to be a delimiter separating the fields. (Note that the delimiter may
-be longer than one character.) If LIMIT is specified and is not
-negative, splits into no more than that many fields (though it may
-split into fewer). If LIMIT is unspecified, trailing null fields are
-stripped (which potential users of pop() would do well to remember).
-If LIMIT is negative, it is treated as if an arbitrarily large LIMIT
-had been specified.
+splits on whitespace (after skipping any leading whitespace). Anything
+matching PATTERN is taken to be a delimiter separating the fields. (Note
+that the delimiter may be longer than one character.) If LIMIT is
+specified and is not negative, splits into no more than that many fields
+(though it may split into fewer). If LIMIT is unspecified, trailing null
+fields are stripped (which potential users of pop() would do well to
+remember). If LIMIT is negative, it is treated as if an arbitrarily large
+LIMIT had been specified.
A pattern matching the null string (not to be confused with
a null pattern C<//>, which is just one member of the set of patterns
@@ -2415,6 +2534,12 @@ produces the list value
(1, '-', 10, ',', 20)
+If you had the entire header of a normal Unix email message in $header,
+you could split it up into fields and their values this way:
+
+ $header =~ s/\n\s+/ /g; # fix continuation lines
+ %hdrs = (UNIX_FROM => split /^(.*?):\s*/m, $header);
+
The pattern C</PATTERN/> may be replaced with an expression to specify
patterns that vary at runtime. (To do runtime compilation only once,
use C</$variable/o>.)
@@ -2444,7 +2569,8 @@ L</chomp>, and L</join>.)
Returns a string formatted by the usual printf conventions of the C
language. (The * character for an indirectly specified length is not
supported, but you can get the same effect by interpolating a variable
-into the pattern.)
+into the pattern.) Some C libraries' implementations of sprintf() can dump core
+when fed ludiocrous arguments.
=item sqrt EXPR
@@ -2633,30 +2759,34 @@ Value may be given to seekdir() to access a particular location in a
directory. Has the same caveats about possible directory compaction as
the corresponding system library routine.
-=item tie VARIABLE,PACKAGENAME,LIST
+=item tie VARIABLE,CLASSNAME,LIST
-This function binds a variable to a package that will provide the
-implementation for the variable. VARIABLE is the name of the variable to
-be enchanted. PACKAGENAME is the name of a package implementing objects
-of correct type. Any additional arguments are passed to the "new" method
-of the package (meaning TIESCALAR, TIEARRAY, or TIEHASH). Typically these
-are arguments such as might be passed to the dbm_open() function of C.
+This function binds a variable to a package class that will provide the
+implementation for the variable. VARIABLE is the name of the variable
+to be enchanted. CLASSNAME is the name of a class implementing objects
+of correct type. Any additional arguments are passed to the "new"
+method of the class (meaning TIESCALAR, TIEARRAY, or TIEHASH).
+Typically these are arguments such as might be passed to the dbm_open()
+function of C. The object returned by the "new" method +is also
+returned by the tie() function, which would be useful if you +want to
+access other methods in CLASSNAME.
Note that functions such as keys() and values() may return huge array
values when used on large objects, like DBM files. You may prefer to
use the each() function to iterate over such. Example:
# print out history file offsets
+ use NDBM_File;
tie(%HIST, NDBM_File, '/usr/lib/news/history', 1, 0);
while (($key,$val) = each %HIST) {
print $key, ' = ', unpack('L',$val), "\n";
}
untie(%HIST);
-A package implementing an associative array should have the following
+A class implementing an associative array should have the following
methods:
- TIEHASH objectname, LIST
+ TIEHASH classname, LIST
DESTROY this
FETCH this, key
STORE this, key, value
@@ -2665,21 +2795,25 @@ methods:
FIRSTKEY this
NEXTKEY this, lastkey
-A package implementing an ordinary array should have the following methods:
+A class implementing an ordinary array should have the following methods:
- TIEARRAY objectname, LIST
+ TIEARRAY classname, LIST
DESTROY this
FETCH this, key
STORE this, key, value
[others TBD]
-A package implementing a scalar should have the following methods:
+A class implementing a scalar should have the following methods:
- TIESCALAR objectname, LIST
+ TIESCALAR classname, LIST
DESTROY this
FETCH this,
STORE this, value
+Unlike dbmopen(), the tie() function will not use or require a module
+for you--you need to do that explicitly yourself. See L<DB_File>
+or the F<Config> module for interesting tie() implementations.
+
=item time
Returns the number of non-leap seconds since 00:00:00 UTC, January 1,
@@ -2708,11 +2842,13 @@ on your system.
Returns an uppercased version of EXPR. This is the internal function
implementing the \U escape in double-quoted strings.
+Should respect any POSIX setlocale() settings.
=item ucfirst EXPR
Returns the value of EXPR with the first character uppercased. This is
the internal function implementing the \u escape in double-quoted strings.
+Should respect any POSIX setlocale() settings.
=item umask EXPR
@@ -2826,6 +2962,7 @@ Because this is a wide-open interface, pragmas (compiler directives)
are also implemented this way. Currently implemented pragmas are:
use integer;
+ use diagnostics;
use sigtrap qw(SEGV BUS);
use strict qw(subs vars refs);
use subs qw(afunc blurfl);
@@ -2913,7 +3050,7 @@ for a scalar.
=item warn LIST
Produces a message on STDERR just like die(), but doesn't exit or
-throw an exception.
+on an exception.
=item write FILEHANDLE
diff --git a/pod/perlipc.pod b/pod/perlipc.pod
index 5a43660fb2..3166f1a75e 100644
--- a/pod/perlipc.pod
+++ b/pod/perlipc.pod
@@ -1,108 +1,751 @@
=head1 NAME
-perlipc - Perl interprocess communication
+perlipc - Perl interprocess communication (signals, fifos, pipes, safe
+subprocceses, sockets, and semaphores)
=head1 DESCRIPTION
-The IPC facilities of Perl are built on the Berkeley socket mechanism.
-If you don't have sockets, you can ignore this section. The calls have
-the same names as the corresponding system calls, but the arguments
-tend to differ, for two reasons. First, Perl file handles work
-differently than C file descriptors. Second, Perl already knows the
-length of its strings, so you don't need to pass that information.
+The basic IPC facilities of Perl are built out of the good old Unix
+signals, named pipes, pipe opens, the Berkeley socket routines, and SysV
+IPC calls. Each is used in slightly different situations.
+
+=head1 Signals
+
+Perl uses a simple signal handling model: the %SIG hash contains names or
+references of user-installed signal handlers. These handlers will be called
+with an argument which is the name of the signal that triggered it. A
+signal may be generated intentionally from a particular keyboard sequence like
+control-C or control-Z, sent to you from an another process, or
+triggered automatically by the kernel when special events transpire, like
+a child process exiting, your process running out of stack space, or
+hitting file size limit.
+
+For example, to trap an interrupt signal, set up a handler like this.
+Notice how all we do is set with a global variable and then raise an
+exception. That's because on most systems libraries are not
+re-entrant, so calling any print() functions (or even anything that needs to
+malloc(3) more memory) could in theory trigger a memory fault
+and subsequent core dump.
+
+ sub catch_zap {
+ my $signame = shift;
+ $shucks++;
+ die "Somebody sent me a SIG$signame";
+ }
+ $SIG{INT} = 'catch_zap'; # could fail in modules
+ $SIG{INT} = \&catch_zap; # best strategy
+
+The names of the signals are the ones listed out by C<kill -l> on your
+system, or you can retrieve them from the Config module. Set up an
+@signame list indexed by number to get the name and a %signo table
+indexed by name to get the number:
+
+ use Config;
+ defined $Config{sig_name} || die "No sigs?";
+ foreach $name (split(' ', $Config{sig_name})) {
+ $signo{$name} = $i;
+ $signame[$i] = $name;
+ $i++;
+ }
+
+So to check whether signal 17 and SIGALRM were the same, just do this:
+
+ print "signal #17 = $signame[17]\n";
+ if ($signo{ALRM}) {
+ print "SIGALRM is $signo{ALRM}\n";
+ }
+
+You may also choose to assign the strings C<'IGNORE'> or C<'DEFAULT'> as
+the handler, in which case Perl will try to discard the signal or do the
+default thing. Some signals can be neither trapped nor ignored, such as
+the KILL and STOP (but not the TSTP) signals. One strategy for
+temporarily ignoring signals is to use a local() statement, which will be
+automatically restored once your block is exited. (Remember that local()
+values are "inherited" by functions called from within that block.)
+
+ sub precious {
+ local $SIG{INT} = 'IGNORE';
+ &more_functions;
+ }
+ sub more_functions {
+ # interrupts still ignored, for now...
+ }
+
+Sending a signal to a negative process ID means that you send the signal
+to the entire Unix process-group. This code send a hang-up signal to all
+processes in the current process group I<except for> the current process
+itself:
+
+ {
+ local $SIG{HUP} = 'IGNORE';
+ kill HUP => -$$;
+ # snazzy writing of: kill('HUP', -$$)
+ }
-=head2 Client/Server Communication
+Another interesting signal to send is signal number zero. This doesn't
+actually affect another process, but instead checks whether it's alive
+or has changed its UID.
-Here's a sample TCP client.
+ unless (kill 0 => $kid_pid) {
+ warn "something wicked happened to $kid_pid";
+ }
- ($them,$port) = @ARGV;
- $port = 2345 unless $port;
- $them = 'localhost' unless $them;
+You might also want to employ anonymous functions for simple signal
+handlers:
- $SIG{'INT'} = 'dokill';
- sub dokill { kill 9,$child if $child; }
+ $SIG{INT} = sub { die "\nOutta here!\n" };
- use Socket;
+But that will be problematic for the more complicated handlers that need
+to re-install themselves. Because Perl's signal mechanism is currently
+based on the signal(3) function from the C library, you may somtimes be so
+misfortunate as to run on systems where that function is "broken", that
+is, it behaves in the old unreliable SysV way rather than the newer, more
+reasonable BSD and POSIX fashion. So you'll see defensive people writing
+signal handlers like this:
- $sockaddr = 'S n a4 x8';
- chop($hostname = `hostname`);
+ sub REAPER {
+ $SIG{CHLD} = \&REAPER; # loathe sysV
+ $waitedpid = wait;
+ }
+ $SIG{CHLD} = \&REAPER;
+ # now do something that forks...
+
+or even the more elaborate:
+
+ use POSIX "wait_h";
+ sub REAPER {
+ my $child;
+ $SIG{CHLD} = \&REAPER; # loathe sysV
+ while ($child = waitpid(-1,WNOHANG)) {
+ $Kid_Status{$child} = $?;
+ }
+ }
+ $SIG{CHLD} = \&REAPER;
+ # do something that forks...
+
+Signal handling is also used for timeouts in Unix, While safely
+protected within an C<eval{}> block, you set a signal handler to trap
+alarm signals and then schedule to have one delivered to you in some
+number of seconds. Then try your blocking operation, clearing the alarm
+when it's done but not before you've exited your C<eval{}> block. If it
+goes off, you'll use die() to jump out of the block, much as you might
+using longjmp() or throw() in other languages.
+
+Here's an example:
+
+ eval {
+ local $SIG{ALRM} = sub { die "alarm clock restart" };
+ alarm 10;
+ flock(FH, 2); # blocking write lock
+ alarm 0;
+ };
+ if ($@ and $@ !~ /alarm clock restart/) { die }
+
+For more complex signal handling, you might see the standard POSIX
+module. Lamentably, this is almost entirely undocumented, but
+the F<t/lib/posix.t> file from the Perl source distribution has some
+examples in it.
+
+=head1 Named Pipes
+
+A named pipe (often referred to as a FIFO) is an old Unix IPC
+mechanism for processes communicating on the same machine. It works
+just like a regular, connected anonymous pipes, except that the
+processes rendezvous using a filename and don't have to be related.
+
+To create a named pipe, use the Unix command mknod(1) or on some
+systems, mkfifo(1). These may not be in your normal path.
+
+ # system return val is backwards, so && not ||
+ #
+ $ENV{PATH} .= ":/etc:/usr/etc";
+ if ( system('mknod', $path, 'p')
+ && system('mkfifo', $path) )
+ {
+ die "mk{nod,fifo} $path failed;
+ }
+
+
+A fifo is convenient when you want to connect a process to an unrelated
+one. When you open a fifo, the program will block until there's something
+on the other end.
+
+For example, let's say you'd like to have your F<.signature> file be a
+named pipe that has a Perl program on the other end. Now every time any
+program (like a mailer, newsreader, finger program, etc.) tries to read
+from that file, the reading program will block and your program will
+supply the the new signature. We'll use the pipe-checking file test B<-p>
+to find out whether anyone (or anything) has accidentally removed our fifo.
+
+ chdir; # go home
+ $FIFO = '.signature';
+ $ENV{PATH} .= ":/etc:/usr/games";
+
+ while (1) {
+ unless (-p $FIFO) {
+ unlink $FIFO;
+ system('mknod', $FIFO, 'p')
+ && die "can't mknod $FIFO: $!";
+ }
+
+ # next line blocks until there's a reader
+ open (FIFO, "> $FIFO") || die "can't write $FIFO: $!";
+ print FIFO "John Smith (smith\@host.org)\n", `fortune -s`;
+ close FIFO;
+ sleep 2; # to avoid dup sigs
+ }
- ($name, $aliases, $proto) = getprotobyname('tcp');
- ($name, $aliases, $port) = getservbyname($port, 'tcp')
- unless $port =~ /^\d+$/;
- ($name, $aliases, $type, $len, $thisaddr) =
- gethostbyname($hostname);
- ($name, $aliases, $type, $len, $thataddr) = gethostbyname($them);
- $this = pack($sockaddr, AF_INET, 0, $thisaddr);
- $that = pack($sockaddr, AF_INET, $port, $thataddr);
+=head1 Using open() for IPC
+
+Perl's basic open() statement can also be used for unidirectional interprocess
+communication by either appending or prepending a pipe symbol to the second
+argument to open(). Here's how to start something up a child process you
+intend to write to:
+
+ open(SPOOLER, "| cat -v | lpr -h 2>/dev/null")
+ || die "can't fork: $!";
+ local $SIG{PIPE} = sub { die "spooler pipe broke" };
+ print SPOOLER "stuff\n";
+ close SPOOLER || die "bad spool: $! $?";
+
+And here's how to start up a child process you intend to read from:
+
+ open(STATUS, "netstat -an 2>&1 |")
+ || die "can't fork: $!";
+ while (<STATUS>) {
+ next if /^(tcp|udp)/;
+ print;
+ }
+ close SPOOLER || die "bad netstat: $! $?";
+
+If one can be sure that a particular program is a Perl script that is
+expecting filenames in @ARGV, the clever programmer can write something
+like this:
+
+ $ program f1 "cmd1|" - f2 "cmd2|" f3 < tmpfile
+
+and irrespective of which shell it's called from, the Perl program will
+read from the file F<f1>, the process F<cmd1>, standard input (F<tmpfile>
+in this case), the F<f2> file, the F<cmd2> command, and finally the F<f3>
+file. Pretty nifty, eh?
+
+You might notice that you could use backticks for much the
+same effect as opening a pipe for reading:
+
+ print grep { !/^(tcp|udp)/ } `netstat -an 2>&1`;
+ die "bad netstat" if $?;
+
+While this is true on the surface, it's much more efficient to process the
+file one line or record at a time because then you don't have to read the
+whole thing into memory at once. It also gives you finer control of the
+whole process, letting you to kill off the child process early if you'd
+like.
+
+Be careful to check both the open() and the close() return values. If
+you're I<writing> to a pipe, you should also trap SIGPIPE. Otherwise,
+think of what happens when you start up a pipe to a command that doesn't
+exist: the open() will in all likelihood succeed (it only reflects the
+fork()'s success), but then your output will fail--spectacularly. Perl
+can't know whether the command worked because your command is actually
+running in a separate process whose exec() might have failed. Therefore,
+while readers of bogus commands just return a quick end of file, writers
+to bogus command will trigger a signal they'd better be prepared to
+handle. Consider:
+
+ open(FH, "|bogus");
+ print FH "bang\n";
+ close FH;
+
+=head2 Safe Pipe Opens
+
+Another interesting approach to IPC is making your single program go
+multiprocess and communicate between (or even amongst) yourselves. The
+open() function will accept a file argument of either C<"-|"> or C<"|-">
+to do a very interesting thing: it forks a child connected to the
+filehandle you've opened. The child is running the same program as the
+parent. This is useful for safely opening a file when running under an
+assumed UID or GID, for example. If you open a pipe I<to> minus, you can
+write to the filehandle you opened and your kid will find it in his
+STDIN. If you open a pipe I<from> minus, you can read from the filehandle
+you opened whatever your kid writes to his STDOUT.
+
+ use English;
+ my $sleep_count = 0;
+
+ do {
+ $pid = open(KID, "-|");
+ unless (defined $pid) {
+ warn "cannot fork: $!";
+ die "bailing out" if $sleep_count++ > 6;
+ sleep 10;
+ }
+ } until defined $pid;
+
+ if ($pid) { # parent
+ print KID @some_data;
+ close(KID) || warn "kid exited $?";
+ } else { # child
+ ($EUID, $EGID) = ($UID, $GID); # suid progs only
+ open (FILE, "> /safe/file")
+ || die "can't open /safe/file: $!";
+ while (<STDIN>) {
+ print FILE; # child's STDIN is parent's KID
+ }
+ exit; # don't forget this
+ }
+
+Another common use for this construct is when you need to execute
+something without the shell's interference. With system(), it's
+straigh-forward, but you can't use a pipe open or backticks safely.
+That's because there's no way to stop the shell from getting its hands on
+your arguments. Instead, use lower-level control to call exec() directly.
+
+Here's a safe backtick or pipe open for read:
+
+ # add error processing as above
+ $pid = open(KID, "-|");
+
+ if ($pid) { # parent
+ while (<KID>) {
+ # do something interesting
+ }
+ close(KID) || warn "kid exited $?";
+
+ } else { # child
+ ($EUID, $EGID) = ($UID, $GID); # suid only
+ exec($program, @options, @args)
+ || die "can't exec program: $!";
+ # NOTREACHED
+ }
+
+
+And here's a safe pipe open for writing:
+
+ # add error processing as above
+ $pid = open(KID, "|-");
+ $SIG{ALRM} = sub { die "whoops, $program pipe broke" };
+
+ if ($pid) { # parent
+ for (@data) {
+ print KID;
+ }
+ close(KID) || warn "kid exited $?";
+
+ } else { # child
+ ($EUID, $EGID) = ($UID, $GID);
+ exec($program, @options, @args)
+ || die "can't exec program: $!";
+ # NOTREACHED
+ }
+
+Note that these operations are full Unix forks, which means they may not be
+correctly implemented on alien systems. Additionally, these are not true
+multithreading. If you'd like to learn more about threading, see the
+F<modules> file mentioned below in the L<SEE ALSO> section.
+
+=head2 Bidirectional Communication
+
+While this works reasonably well for unidirectional communication, what
+about bidirectional communication? The obvious thing you'd like to do
+doesn't actually work:
+
+ open(KID, "| some program |")
+
+and if you forgot to use the B<-w> flag, then you'll miss out
+entirely on the diagnostic message:
+
+ Can't do bidirectional pipe at -e line 1.
+
+If you really want to, you can use the standard open2() library function
+to catch both ends. There's also an open3() for tridirectional I/O so you
+can also catch your child's STDERR, but doing so would then require an
+awkward select() loop and wouldn't allow you to use normal Perl input
+operations.
+
+If you look at its source, you'll see that open2() uses low-level
+primitives like Unix pipe() and exec() to create all the connections.
+While it might have been slightly more efficient by using socketpair(), it
+would have then been even less portable than it already is. The open2()
+and open3() functions are unlikely to work anywhere except on a Unix
+system or some other one purporting to be POSIX compliant.
+
+Here's an example of using open2():
+
+ use FileHandle;
+ use IPC::Open2;
+ $pid = open2( \*Reader, \*Writer, "cat -u -n" );
+ Writer->autoflush(); # default here, actually
+ print Writer "stuff\n";
+ $got = <Reader>;
+
+The problem with this is that Unix buffering is going to really
+ruin your day. Even though your C<Writer> filehandle is autoflushed,
+and the process on the other end will get your data in a timely manner,
+you can't usually do anything to force it to actually give it back to you
+in a similarly quick fashion. In this case, we could, because we
+gave I<cat> a B<-u> flag to make it unbuffered. But very few Unix
+commands are designed to operate over pipes, so this seldom works
+unless you yourself wrote the program on the other end of the
+double-ended pipe.
+
+A solution to this is the non-standard F<Comm.pl> library. It uses
+pseudo-ttys to make your program behave more reasonably:
+
+ require 'Comm.pl';
+ $ph = open_proc('cat -n');
+ for (1..10) {
+ print $ph "a line\n";
+ print "got back ", scalar <$ph>;
+ }
- socket(S, PF_INET, SOCK_STREAM, $proto) || die "socket: $!";
- bind(S, $this) || die "bind: $!";
- connect(S, $that) || die "connect: $!";
+This way you don't have to have control over the source code of the
+program you're using. The F<Comm> library also has expect()
+and interact() functions. Find the library (and hopefully its
+successor F<IPC::Chat>) at your nearest CPAN archive as detailed
+in the L<SEE ALSO> section below.
- select(S); $| = 1; select(stdout);
+=head1 Sockets: Client/Server Communication
- if ($child = fork) {
- while (<>) {
- print S;
- }
- sleep 3;
- do dokill();
- }
- else {
- while (<S>) {
- print;
- }
- }
+While not limited to Unix-derived operating systems (e.g. WinSock on PCs
+provides socket support, as do some VMS libraries), you may not have
+sockets on your system, in which this section probably isn't going to do
+you much good. With sockets, you can do both virtual circuits (i.e. TCP
+streams) and datagrams (i.e. UDP packets). You may be able to do even more
+depending on your system.
+
+The Perl function calls for dealing with sockets have the same names as
+the corresponding system calls in C, but their arguments tend to differ
+for two reasons: first, Perl filehandles work differently than C file
+descriptors. Second, Perl already knows the length of its strings, so you
+don't need to pass that information.
-And here's a server:
+One of the major problems with old socket code in Perl was that it used
+hard-coded values for some of the constants, which severely hurt
+portability. If you ever see code that does anything like explicitly
+setting C<$AF_INET = 2>, you know you're in for big trouble: An
+immeasurably superior approach is to use the C<Socket> module, which more
+reliably grants access to various constants and functions you'll need.
- ($port) = @ARGV;
- $port = 2345 unless $port;
+=head2 Internet TCP Clients and Servers
+Use Internet-domain sockets when you want to do client-server
+communication that might extend to machines outside of your own system.
+
+Here's a sample TCP client using Internet-domain sockets:
+
+ #!/usr/bin/perl -w
+ require 5.002;
+ use strict;
+ use Socket;
+ my ($remote,$port, $iaddr, $paddr, $proto, $line);
+
+ $remote = shift || 'localhost';
+ $port = shift || 2345; # random port
+ if ($port =~ /\D/) { $port = getservbyname($port, 'tcp') }
+ die "No port" unless $port;
+ $iaddr = inet_aton($remote) || die "no host: $remote";
+ $paddr = sockaddr_in($port, $iaddr);
+
+ $proto = getprotobyname('tcp');
+ socket(SOCK, PF_INET, SOCK_STREAM, $proto) || die "socket: $!";
+ connect(SOCK, $paddr) || die "connect: $!";
+ while ($line = <SOCK>) {
+ print $line;
+ }
+
+ close (SOCK) || die "close: $!";
+ exit;
+
+And here's a corresponding server to go along with it. We'll
+leave the address as INADDR_ANY so that the kernel can choose
+the appropriate interface on multihomed hosts:
+
+ #!/usr/bin/perl -Tw
+ require 5.002;
+ use strict;
+ BEGIN { $ENV{PATH} = '/usr/ucb:/bin' }
use Socket;
+ use Carp;
- $sockaddr = 'S n a4 x8';
+ sub spawn; # forward declaration
+ sub logmsg { print "$0 $$: @_ at ", scalar localtime, "\n" }
- ($name, $aliases, $proto) = getprotobyname('tcp');
- ($name, $aliases, $port) = getservbyname($port, 'tcp')
- unless $port =~ /^\d+$/;
+ my $port = shift || 2345;
+ my $proto = getprotobyname('tcp');
+ socket(SERVER, PF_INET, SOCK_STREAM, $proto) || die "socket: $!";
+ setsockopt(SERVER, SOL_SOCKET, SO_REUSEADDR, 1) || die "setsockopt: $!";
+ bind(SERVER, sockaddr_in($port, INADDR_ANY)) || die "bind: $!";
+ listen(SERVER,5) || die "listen: $!";
- $this = pack($sockaddr, AF_INET, $port, "\0\0\0\0");
+ logmsg "server started on port $port";
- select(NS); $| = 1; select(stdout);
+ my $waitedpid = 0;
+ my $paddr;
- socket(S, PF_INET, SOCK_STREAM, $proto) || die "socket: $!";
- bind(S, $this) || die "bind: $!";
- listen(S, 5) || die "connect: $!";
+ sub REAPER {
+ $SIG{CHLD} = \&REAPER; # loathe sysV
+ $waitedpid = wait;
+ logmsg "reaped $waitedpid" . ($? ? " with exit $?" : '');
+ }
+
+ $SIG{CHLD} = \&REAPER;
+
+ for ( $waitedpid = 0;
+ ($paddr = accept(CLIENT,SERVER)) || $waitedpid;
+ $waitedpid = 0, close CLIENT)
+ {
+ next if $waitedpid;
+ my($port,$iaddr) = sockaddr_in($paddr);
+ my $name = gethostbyaddr($iaddr,AF_INET);
+
+ logmsg "connection from $name [",
+ inet_ntoa($iaddr), "]
+ at port $port";
- select(S); $| = 1; select(stdout);
+ spawn sub {
+ print "Hello there, $name, it's now ", scalar localtime, "\n";
+ exec '/usr/games/fortune'
+ or confess "can't exec fortune: $!";
+ };
- for (;;) {
- print "Listening again\n";
- ($addr = accept(NS,S)) || die $!;
- print "accept ok\n";
+ }
- ($af,$port,$inetaddr) = unpack($sockaddr,$addr);
- @inetaddr = unpack('C4',$inetaddr);
- print "$af $port @inetaddr\n";
+ sub spawn {
+ my $coderef = shift;
- while (<NS>) {
- print;
- print NS;
+ unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') {
+ confess "usage: spawn CODEREF";
}
+
+ my $pid;
+ if (!defined($pid = fork)) {
+ logmsg "cannot fork: $!";
+ return;
+ } elsif ($pid) {
+ logmsg "begat $pid";
+ return; # i'm the parent
+ }
+ # else i'm the child -- go spawn
+
+ open(STDIN, "<&CLIENT") || die "can't dup client to stdin";
+ open(STDOUT, ">&CLIENT") || die "can't dup client to stdout";
+ ## open(STDERR, ">&STDOUT") || die "can't dup stdout to stderr";
+ exit &$coderef();
+ }
+
+This server takes the trouble to clone off a child version via fork() for
+each incoming request. That way it can handle many requests at once,
+which you might not always want. Even if you don't fork(), the listen()
+will allow that many pending connections. Forking servers have to be
+particularly careful about cleaning up their dead children (called
+"zombies" in Unix parlance), because otherwise you'll quickly fill up your
+process table.
+
+We suggest that you use the B<-T> flag to use taint checking (see L<perlsec>)
+even if we aren't running setuid or setgid. This is always a good idea
+for servers and other programs run on behalf of someone else (like CGI
+scripts), because it lessens the chances that people from the outside will
+be able to compromise your system.
+
+Let's look at another TCP client. This one connects to the TCP "time"
+service on a number of different machines and shows how far their clocks
+differ from the system on which it's being run:
+
+ #!/usr/bin/perl -w
+ require 5.002;
+ use strict;
+ use Socket;
+
+ my $SECS_of_70_YEARS = 2208988800;
+ sub ctime { scalar localtime(shift) }
+
+ my $iaddr = gethostbyname('localhost');
+ my $proto = getprotobyname('tcp');
+ my $port = getservbyname('time', 'tcp');
+ my $paddr = sockaddr_in(0, $iaddr);
+ my($host);
+
+ $| = 1;
+ printf "%-24s %8s %s\n", "localhost", 0, ctime(time());
+
+ foreach $host (@ARGV) {
+ printf "%-24s ", $host;
+ my $hisiaddr = inet_aton($host) || die "unknown host";
+ my $hispaddr = sockaddr_in($port, $hisiaddr);
+ socket(SOCKET, PF_INET, SOCK_STREAM, $proto) || die "socket: $!";
+ connect(SOCKET, $hispaddr) || die "bind: $!";
+ my $rtime = ' ';
+ read(SOCKET, $rtime, 4);
+ close(SOCKET);
+ my $histime = unpack("N", $rtime) - $SECS_of_70_YEARS ;
+ printf "%8d %s\n", $histime - time, ctime($histime);
}
-=head2 SysV IPC
+=head2 Unix-Domain TCP Clients and Servers
+
+That's fine for Internet-domain clients and servers, but what local
+communications? While you can use the same setup, sometimes you don't
+want to. Unix-domain sockets are local to the current host, and are often
+used internally to implement pipes. Unlike Internet domain sockets, UNIX
+domain sockets can show up in the file system with an ls(1) listing.
+
+ $ ls -l /dev/log
+ srw-rw-rw- 1 root 0 Oct 31 07:23 /dev/log
-Here's a small example showing shared memory usage:
+You can test for these with Perl's B<-S> file test:
+
+ unless ( -S '/dev/log' ) {
+ die "something's wicked with the print system";
+ }
+
+Here's a sample Unix-domain client:
+
+ #!/usr/bin/perl -w
+ require 5.002;
+ use Socket;
+ use strict;
+ my ($rendezvous, $line);
+
+ $rendezvous = shift || '/tmp/catsock';
+ socket(SOCK, PF_UNIX, SOCK_STREAM, 0) || die "socket: $!";
+ connect(SOCK, sockaddr_un($remote)) || die "connect: $!";
+ while ($line = <SOCK>) {
+ print $line;
+ }
+ exit;
+
+And here's a corresponding server.
+
+ #!/usr/bin/perl -Tw
+ require 5.002;
+ use strict;
+ use Socket;
+ use Carp;
+
+ BEGIN { $ENV{PATH} = '/usr/ucb:/bin' }
+
+ my $NAME = '/tmp/catsock';
+ my $uaddr = sockaddr_un($NAME);
+ my $proto = getprotobyname('tcp');
+
+ socket(SERVER,PF_UNIX,SOCK_STREAM,0) || die "socket: $!";
+ unlink($NAME);
+ bind (SERVER, $uaddr) || die "bind: $!";
+ listen(SERVER,5) || die "listen: $!";
+
+ logmsg "server started on $NAME";
+
+ $SIG{CHLD} = \&REAPER;
+
+ for ( $waitedpid = 0;
+ accept(CLIENT,SERVER) || $waitedpid;
+ $waitedpid = 0, close CLIENT)
+ {
+ next if $waitedpid;
+ logmsg "connection on $NAME";
+ spawn sub {
+ print "Hello there, it's now ", scalar localtime, "\n";
+ exec '/usr/games/fortune' or die "can't exec fortune: $!";
+ };
+ }
+
+As you see, it's remarkably similar to the Internet domain TCP server, so
+much so, in fact, that we've omitted several duplicate functions--spawn(),
+logmsg(), ctime(), and REAPER()--which are exactly the same as in the
+other server.
+
+So why would you ever want to use a Unix domain socket instead of a
+simpler named pipe? Because a named pipe doesn't give you sessions. You
+can't tell one process's data from another's. With socket programming,
+you get a separate session for each client: that's why accept() takes two
+arguments.
+
+For example, let's say that you have a long running database server daemon
+that you want folks from the World Wide Web to be able to access, but only
+if they go through a CGI interface. You'd have a small, simple CGI
+program that does whatever checks and logging you feel like, and then acts
+as a Unix-domain client and connects to your private server.
+
+=head2 UDP: Message Passing
+
+Another kind of client-server setup is one that uses not connections, but
+messages. UDP communications involve much lower overhead but also provide
+less reliability, as there are no promises that messages will arrive at
+all, let alone in order and unmangled. Still, UDP offers some advantages
+over TCP, including being able to "broadcast" or "multicast" to a whole
+bunch of destination hosts at once (usually on your local subnet). If you
+find yourself overly concerned about reliability and start building checks
+into your message system, then you probably should just use TCP to start
+with.
+
+Here's a UDP program similar to the sample Internet TCP client given
+above. However, instead of checking one host at a time, the UDP version
+will check many of them asynchronously by simulating a multicast and then
+using select() to do a timed-out wait for I/O. To do something similar
+with TCP, you'd have to use a different socket handle for each host.
+
+ #!/usr/bin/perl -w
+ use strict;
+ require 5.002;
+ use Socket;
+ use Sys::Hostname;
+
+ my ( $count, $hisiaddr, $hispaddr, $histime,
+ $host, $iaddr, $paddr, $port, $proto,
+ $rin, $rout, $rtime, $SECS_of_70_YEARS);
+
+ $SECS_of_70_YEARS = 2208988800;
+
+ $iaddr = gethostbyname(hostname());
+ $proto = getprotobyname('udp');
+ $port = getservbyname('time', 'udp');
+ $paddr = sockaddr_in(0, $iaddr); # 0 means let kernel pick
+
+ socket(SOCKET, PF_INET, SOCK_DGRAM, $proto) || die "socket: $!";
+ bind(SOCKET, $paddr) || die "bind: $!";
+
+ $| = 1;
+ printf "%-12s %8s %s\n", "localhost", 0, scalar localtime time;
+ $count = 0;
+ for $host (@ARGV) {
+ $count++;
+ $hisiaddr = inet_aton($host) || die "unknown host";
+ $hispaddr = sockaddr_in($port, $hisiaddr);
+ defined(send(SOCKET, 0, 0, $hispaddr)) || die "send $host: $!";
+ }
+
+ $rin = '';
+ vec($rin, fileno(SOCKET), 1) = 1;
+
+ # timeout after 10.0 seconds
+ while ($count && select($rout = $rin, undef, undef, 10.0)) {
+ $rtime = '';
+ ($hispaddr = recv(SOCKET, $rtime, 4, 0)) || die "recv: $!";
+ ($port, $hisiaddr) = sockaddr_in($hispaddr);
+ $host = gethostbyaddr($hisiaddr, AF_INET);
+ $histime = unpack("N", $rtime) - $SECS_of_70_YEARS ;
+ printf "%-12s ", $host;
+ printf "%8d %s\n", $histime - time, scalar localtime($histime);
+ $count--;
+ }
+
+=head1 SysV IPC
+
+While System V IPC isn't so widely used as sockets, it still has some
+interesting uses. You can't, however, effectively use SysV IPC or
+Berkeley mmap() to have shared memory so as to share a variable amongst
+several processes. That's because Perl would reallocate your string when
+you weren't wanting it to.
+
+
+Here's a small example showing shared memory usage.
$IPC_PRIVATE = 0;
$IPC_RMID = 0;
$size = 2000;
$key = shmget($IPC_PRIVATE, $size , 0777 );
- die if !defined($key);
+ die unless defined $key;
$message = "Message #1";
shmwrite($key, $message, 0, 60 ) || die "$!";
@@ -149,7 +792,7 @@ Call the file F<take>:
Put this code in a separate file to be run in more that one process
Call this file F<give>:
- #'give' the semaphore
+ # 'give' the semaphore
# run this in the original process and you will see
# that the second process continues
@@ -166,3 +809,66 @@ Call this file F<give>:
semop($key,$opstring) || die "$!";
+=head1 WARNING
+
+The SysV IPC code above was written long ago, and it's definitely clunky
+looking. It should at the very least be made to C<use strict> and
+C<require "sys/ipc.ph">. Better yet, perhaps someone should create an
+C<IPC::SysV> module the way we have the C<Socket> module for normal
+client-server communications.
+
+(... time passes)
+
+Voila! Check out the IPC::SysV modules written by Jack Shirazi. You can
+find them at a CPAN store near you.
+
+=head1 NOTES
+
+If you are running under version 5.000 (dubious) or 5.001, you can still
+use most of the examples in this document. You may have to remove the
+C<use strict> and some of the my() statements for 5.000, and for both
+you'll have to load in version 1.2 of the F<Socket.pm> module, which
+was/is/shall-be included in I<perl5.001o>.
+
+Most of these routines quietly but politely return C<undef> when they fail
+instead of causing your program to die right then and there due to an
+uncaught exception. (Actually, some of the new I<Socket> conversion
+functions croak() on bad arguments.) It is therefore essential
+that you should check the return values fo these functions. Always begin
+your socket programs this way for optimal success, and don't forget to add
+B<-T> taint checking flag to the pound-bang line for servers:
+
+ #!/usr/bin/perl -w
+ require 5.002;
+ use strict;
+ use sigtrap;
+ use Socket;
+
+=head1 BUGS
+
+All these routines create system-specific portability problems. As noted
+elsewhere, Perl is at the mercy of your C libraries for much of its system
+behaviour. It's probably safest to assume broken SysV semantics for
+signals and to stick with simple TCP and UDP socket operations; e.g. don't
+try to pass open filedescriptors over a local UDP datagram socket if you
+want your code to stand a chance of being portable.
+
+Because few vendors provide C libraries that are safely
+re-entrant, the prudent programmer will do little else within
+a handler beyond die() to raise an exception and longjmp(3) out.
+
+=head1 AUTHOR
+
+Tom Christiansen, with occasional vestiges of Larry Wall's original
+version.
+
+=head1 SEE ALSO
+
+Besides the obvious functions in L<perlfunc>, you should also check out
+the F<modules> file at your nearest CPAN site. (See L<perlmod> or best
+yet, the F<Perl FAQ> for a description of what CPAN is and where to get it.)
+Section 5 of the F<modules> file is devoted to "Networking, Device Control
+(modems) and Interprocess Communication", and contains numerous unbundled
+modules numerous networking modules, Chat and Expect operations, CGI
+programming, DCE, FTP, IPC, NNTP, Proxy, Ptty, RPC, SNMP, SMTP, Telnet,
+Threads, and ToolTalk--just to name a few.
diff --git a/pod/perllol.pod b/pod/perllol.pod
new file mode 100644
index 0000000000..4b58bee0b2
--- /dev/null
+++ b/pod/perllol.pod
@@ -0,0 +1,353 @@
+=head1 TITLE
+
+perlLoL - Manipulating Lists of Lists in Perl
+
+=head1 Declaration and Access
+
+The simplest thing to build is a list of lists (sometimes called an array
+of arrays). It's reasonably easy to understand, and almost everything
+that applies here will also be applicable later on with the fancier data
+structures.
+
+A list of lists, or an array of an array if you would, is just a regular
+old array @LoL that you can get at with two subscripts, like $LoL[3][2]. Here's
+a declaration of the array:
+
+ # assign to our array a list of list references
+ @LoL = (
+ [ "fred", "barney" ],
+ [ "george", "jane", "elroy" ],
+ [ "homer", "marge", "bart" ],
+ );
+
+ print $LoL[2][2];
+ bart
+
+Now you should be very careful that the outer bracket type
+is a round one, that is, parentheses. That's because you're assigning to
+an @list, so you need parens. If you wanted there I<not> to be an @LoL,
+but rather just a reference to it, you could do something more like this:
+
+ # assign a reference to list of list references
+ $ref_to_LoL = [
+ [ "fred", "barney", "pebbles", "bambam", "dino", ],
+ [ "homer", "bart", "marge", "maggie", ],
+ [ "george", "jane", "alroy", "judy", ],
+ ];
+
+ print $ref_to_LoL->[2][2];
+
+Notice that the outer bracket type has changed, and so our access syntax
+has also changed. That's because unlike C, in perl you can't freely
+interchange arrays and references thereto. $ref_to_LoL is a reference to an
+array, whereas @LoL is an array proper. Likewise, $LoL[2] is not an
+array, but an array ref. So how come you can write these:
+
+ $LoL[2][2]
+ $ref_to_LoL->[2][2]
+
+instead of having to write these:
+
+ $LoL[2]->[2]
+ $ref_to_LoL->[2]->[2]
+
+Well, that's because the rule is that on adjacent brackets only (whether
+square or curly), you are free to omit the pointer dereferencing array.
+But you need not do so for the very first one if it's a scalar containing
+a reference, which means that $ref_to_LoL always needs it.
+
+=head1 Growing Your Own
+
+That's all well and good for declaration of a fixed data structure,
+but what if you wanted to add new elements on the fly, or build
+it up entirely from scratch?
+
+First, let's look at reading it in from a file. This is something like
+adding a row at a time. We'll assume that there's a flat file in which
+each line is a row and each word an element. If you're trying to develop an
+@LoL list containing all these, here's the right way to do that:
+
+ while (<>) {
+ @tmp = split;
+ push @LoL, [ @tmp ];
+ }
+
+You might also have loaded that from a function:
+
+ for $i ( 1 .. 10 ) {
+ $LoL[$i] = [ somefunc($i) ];
+ }
+
+Or you might have had a temporary variable sitting around with the
+list in it.
+
+ for $i ( 1 .. 10 ) {
+ @tmp = somefunc($i);
+ $LoL[$i] = [ @tmp ];
+ }
+
+It's very important that you make sure to use the C<[]> list reference
+constructor. That's because this will be very wrong:
+
+ $LoL[$i] = @tmp;
+
+You see, assigning a named list like that to a scalar just counts the
+number of elements in @tmp, which probably isn't what you want.
+
+If you are running under C<use strict>, you'll have to add some
+declarations to make it happy:
+
+ use strict;
+ my(@LoL, @tmp);
+ while (<>) {
+ @tmp = split;
+ push @LoL, [ @tmp ];
+ }
+
+Of course, you don't need the temporary array to have a name at all:
+
+ while (<>) {
+ push @LoL, [ split ];
+ }
+
+You also don't have to use push(). You could just make a direct assignment
+if you knew where you wanted to put it:
+
+ my (@LoL, $i, $line);
+ for $i ( 0 .. 10 )
+ $line = <>;
+ $LoL[$i] = [ split ' ', $line ];
+ }
+
+or even just
+
+ my (@LoL, $i);
+ for $i ( 0 .. 10 )
+ $LoL[$i] = [ split ' ', <> ];
+ }
+
+You should in general be leary of using potential list functions
+in a scalar context without explicitly stating such.
+This would be clearer to the casual reader:
+
+ my (@LoL, $i);
+ for $i ( 0 .. 10 )
+ $LoL[$i] = [ split ' ', scalar(<>) ];
+ }
+
+If you wanted to have a $ref_to_LoL variable as a reference to an array,
+you'd have to do something like this:
+
+ while (<>) {
+ push @$ref_to_LoL, [ split ];
+ }
+
+Actually, if you were using strict, you'd not only have to declare $ref_to_LoL as
+you had to declare @LoL, but you'd I<also> having to initialize it to a
+reference to an empty list. (This was a bug in 5.001m that's been fixed
+for the 5.002 release.)
+
+ my $ref_to_LoL = [];
+ while (<>) {
+ push @$ref_to_LoL, [ split ];
+ }
+
+Ok, now you can add new rows. What about adding new columns? If you're
+just dealing with matrices, it's often easiest to use simple assignment:
+
+ for $x (1 .. 10) {
+ for $y (1 .. 10) {
+ $LoL[$x][$y] = func($x, $y);
+ }
+ }
+
+ for $x ( 3, 7, 9 ) {
+ $LoL[$x][20] += func2($x);
+ }
+
+It doesn't matter whether those elements are already
+there or not: it'll gladly create them for you, setting
+intervening elements to C<undef> as need be.
+
+If you just wanted to append to a row, you'd have
+to do something a bit funnier looking:
+
+ # add new columns to an existing row
+ push @{ $LoL[0] }, "wilma", "betty";
+
+Notice that I I<couldn't> just say:
+
+ push $LoL[0], "wilma", "betty"; # WRONG!
+
+In fact, that wouldn't even compile. How come? Because the argument
+to push() must be a real array, not just a reference to such.
+
+=head1 Access and Printing
+
+Now it's time to print your data structure out. How
+are you going to do that? Well, if you only want one
+of the elements, it's trivial:
+
+ print $LoL[0][0];
+
+If you want to print the whole thing, though, you can't
+just say
+
+ print @LoL; # WRONG
+
+because you'll just get references listed, and perl will never
+automatically dereference things for you. Instead, you have to
+roll yourself a loop or two. This prints the whole structure,
+using the shell-style for() construct to loop across the outer
+set of subscripts.
+
+ for $aref ( @LoL ) {
+ print "\t [ @$aref ],\n";
+ }
+
+If you wanted to keep track of subscripts, you might do this:
+
+ for $i ( 0 .. $#LoL ) {
+ print "\t elt $i is [ @{$LoL[$i]} ],\n";
+ }
+
+or maybe even this. Notice the inner loop.
+
+ for $i ( 0 .. $#LoL ) {
+ for $j ( 0 .. $#{$LoL[$i]} ) {
+ print "elt $i $j is $LoL[$i][$j]\n";
+ }
+ }
+
+As you can see, it's getting a bit complicated. That's why
+sometimes is easier to take a temporary on your way through:
+
+ for $i ( 0 .. $#LoL ) {
+ $aref = $LoL[$i];
+ for $j ( 0 .. $#{$aref} ) {
+ print "elt $i $j is $LoL[$i][$j]\n";
+ }
+ }
+
+Hm... that's still a bit ugly. How about this:
+
+ for $i ( 0 .. $#LoL ) {
+ $aref = $LoL[$i];
+ $n = @$aref - 1;
+ for $j ( 0 .. $n ) {
+ print "elt $i $j is $LoL[$i][$j]\n";
+ }
+ }
+
+=head1 Slices
+
+If you want to get at a slide (part of a row) in a multidimensional
+array, you're going to have to do some fancy subscripting. That's
+because while we have a nice synonym for single elements via the
+pointer arrow for dereferencing, no such convenience exists for slices.
+(Remember, of course, that you can always write a loop to do a slice
+operation.)
+
+Here's how to do one operation using a loop. We'll assume an @LoL
+variable as before.
+
+ @part = ();
+ $x = 4;
+ for ($y = 7; $y < 13; $y++) {
+ push @part, $LoL[$x][$y];
+ }
+
+That same loop could be replaced with a slice operation:
+
+ @part = @{ $LoL[4] } [ 7..12 ];
+
+but as you might well imagine, this is pretty rough on the reader.
+
+Ah, but what if you wanted a I<two-dimensional slice>, such as having
+$x run from 4..8 and $y run from 7 to 12? Hm... here's the simple way:
+
+ @newLoL = ();
+ for ($startx = $x = 4; $x <= 8; $x++) {
+ for ($starty = $y = 7; $x <= 12; $y++) {
+ $newLoL[$x - $startx][$y - $starty] = $LoL[$x][$y];
+ }
+ }
+
+We can reduce some of the looping through slices
+
+ for ($x = 4; $x <= 8; $x++) {
+ push @newLoL, [ @{ $LoL[$x] } [ 7..12 ] ];
+ }
+
+If you were into Schwartzian Transforms, you would probably
+have selected map for that
+
+ @newLoL = map { [ @{ $LoL[$_] } [ 7..12 ] ] } 4 .. 8;
+
+Although if your manager accused of seeking job security (or rapid
+insecurity) through inscrutable code, it would be hard to argue. :-)
+If I were you, I'd put that in a function:
+
+ @newLoL = splice_2D( \@LoL, 4 => 8, 7 => 12 );
+ sub splice_2D {
+ my $lrr = shift; # ref to list of list refs!
+ my ($x_lo, $x_hi,
+ $y_lo, $y_hi) = @_;
+
+ return map {
+ [ @{ $lrr->[$_] } [ $y_lo .. $y_hi ] ]
+ } $x_lo .. $x_hi;
+ }
+
+
+=head1 Passing Arguments
+
+One place where a list of lists crops up is when you pass
+in several list references to a function. Consider:
+
+ @tailings = popmany ( \@a, \@b, \@c, \@d );
+
+ sub popmany {
+ my $aref;
+ my @retlist = ();
+ foreach $aref ( @_ ) {
+ push @retlist, pop @$aref;
+ }
+ return @retlist;
+ }
+
+This function was designed to pop off the last element from each of
+its arguments and return those in a list. In this function,
+you can think of @_ as a list of lists.
+
+Just as a side note, what happens if the function is called with the
+"wrong" types of arguments? Normally nothing, but in the case of
+references, we can be a bit pickier. This isn't detectable at
+compile-time (yet--Larry does have a prototype prototype in the works for
+5.002), but you could check it at run time using the ref() function.
+
+ use Carp;
+ for $i ( 0 .. $#_) {
+ if (ref($_[$i]) ne 'ARRAY') {
+ confess "popmany: arg $i not an array reference\n";
+ }
+ }
+
+However, that's not usually necessary unless you want to trap it. It's
+also dubious in that it would fail on a real array references blessed into
+its own class (an object). But since you're all going to be using
+C<strict refs>, it would raise an exception anyway even without the die.
+
+This will matter more to you later on when you start building up
+more complex data structures that all aren't woven of the same
+cloth, so to speak.
+
+=head1 SEE ALSO
+
+perldata(1), perlref(1), perldsc(1)
+
+=head1 AUTHOR
+
+Tom Christiansen <tchrist@perl.com>
+
+Last udpate: Sat Oct 7 19:35:26 MDT 1995
diff --git a/pod/perlmod.pod b/pod/perlmod.pod
index d557e68ff7..c5ab08a07c 100644
--- a/pod/perlmod.pod
+++ b/pod/perlmod.pod
@@ -66,7 +66,7 @@ name is thus C<%main::>, or C<%::> for short. Likewise the nested package
mentioned earlier is named C<%OUTER::INNER::>.
The value in each entry of the associative array is what you are
-referring to when you use the C<*name> notation. In fact, the following
+referring to when you use the C<*name> typeglob notation. In fact, the following
have the same effect, though the first is more efficient because it
does the symbol table lookups at compile time:
@@ -108,7 +108,7 @@ Note that even though the subroutine is compiled in package C<dumpvar>,
the name of the subroutine is qualified so that its name is inserted
into package C<main>.
-Assignment to a symbol table entry performs an aliasing operation,
+Assignment to a typeglob performs an aliasing operation,
i.e.,
*dick = *richard;
@@ -149,16 +149,16 @@ and C<END> work just as they do in B<awk>, as a degenerate case.
=head2 Perl Classes
-There is no special class syntax in Perl 5, but a package may function
+There is no special class syntax in Perl, but a package may function
as a class if it provides subroutines that function as methods. Such a
package may also derive some of its methods from another class package
-by listing the other package name in its @ISA array. For more on
-this, see L<perlobj>.
+by listing the other package name in its @ISA array.
+
+For more on this, see L<perlobj>.
=head2 Perl Modules
-In Perl 5, the notion of packages has been extended into the notion of
-modules. A module is a package that is defined in a library file of
+A module is a just package that is defined in a library file of
the same name, and is designed to be reusable. It may do this by
providing a mechanism for exporting some of its symbols into the symbol
table of any package using it. Or it may function as a class
@@ -166,7 +166,21 @@ definition and make its semantics available implicitly through method
calls on the class and its objects, without explicit exportation of any
symbols. Or it can do a little of both.
-Perl modules are included by saying
+For example, to start a normal module called Fred, create
+a file called Fred.pm and put this at the start of it:
+
+ package Fred;
+ require Exporter;
+ @ISA = qw(Exporter);
+ @EXPORT = qw(func1 func2);
+ @EXPORT_OK = qw($sally @listabob %harry func3);
+
+Then go on to declare and use your variables in functions
+without any qualifications.
+See L<Exporter> and the I<Perl Modules File> for details on
+mechanics and style issues in module creation.
+
+Perl modules are included into your program by saying
use Module;
@@ -269,206 +283,204 @@ The following programs are defined (and have their own documentation).
=over 12
+=item C<diagnostics>
+
+Pragma to produce enhanced diagnostics
+
=item C<integer>
-Perl pragma to compute arithmetic in integer instead of double
+Pragma to compute arithmetic in integer instead of double
=item C<less>
-Perl pragma to request less of something from the compiler
+Pragma to request less of something from the compiler
=item C<sigtrap>
-Perl pragma to enable stack backtrace on unexpected signals
+Pragma to enable stack backtrace on unexpected signals
=item C<strict>
-Perl pragma to restrict unsafe constructs
+Pragma to restrict unsafe constructs
=item C<subs>
-Perl pragma to predeclare sub names
+Pragma to predeclare sub names
=back
=head2 Standard Modules
-The following modules are all expected to behave in a well-defined
+Standard, bundled modules are all expected to behave in a well-defined
manner with respect to namespace pollution because they use the
-Exporter module.
-See their own documentation for details.
-
-=over 12
-
-=item C<Abbrev>
-
-create an abbreviation table from a list
-
-=item C<AnyDBM_File>
-
-provide framework for multiple DBMs
-
-=item C<AutoLoader>
-
-load functions only on demand
-
-=item C<AutoSplit>
-
-split a package for autoloading
-
-=item C<Basename>
-
-parse file name and path from a specification
-
-=item C<Benchmark>
-
-benchmark running times of code
-
-=item C<Carp>
-
-warn or die of errors (from perspective of caller)
+Exporter module. See their own documentation for details.
-=item C<CheckTree>
+To find out all the modules installed on your system, do this:
-run many filetest checks on a tree
+ find `perl -e 'print "@INC"'` -name '*.pm' -print
-=item C<Collate>
+They should all have their own documentation installed and accessible via
+your system man(1) command. If that fails, try the I<perldoc> program.
-compare 8-bit scalar data according to the current locale
-
-=item C<Config>
-
-access Perl configuration option
+=head2 Extension Modules
-=item C<Cwd>
+Extension modules are written in C (or a mix of Perl and C) and get
+dynamically loaded into Perl if and when you need them. Supported
+extension modules include the Socket, Fcntl, and POSIX modules.
-get pathname of current working directory
+Many popular C extension modules
+do not come bundled (at least, not completely)
+due to their size, volatility, or simply lack of time for adequate testing
+and configuration across the multitude of platforms on which Perl was
+beta-tested. You are encouraged to look for them in archie(1L), the Perl
+FAQ or Meta-FAQ, the WWW page, and even with their authors before randomly
+posting asking for their present condition and disposition.
-=item C<DynaLoader>
+=head2 CPAN
-Dynamically load C libraries into Perl code
+CPAN stands for the Comprehensive Perl Archive Network. This is a globally
+replicated collection of all known Perl materials, including hundreds
+of unbunded modules. Here are the major categories of modules:
-=item C<English>
+=over
-use nice English (or B<awk>) names for ugly punctuation variables
+=item *
+Language Extensions and Documentation Tools
-=item C<Env>
+=item *
+Development Support
-Perl module that imports environment variables
+=item *
+Operating System Interfaces
-=item C<Exporter>
+=item *
+Networking, Device Control (modems) and InterProcess Communication
-module to control namespace manipulations
+=item *
+Data Types and Data Type Utilities
-=item C<Fcntl>
+=item *
+Database Interfaces
-load the C Fcntl.h defines
+=item *
+User Interfaces
-=item C<FileHandle>
+=item *
+Interfaces to / Emulations of Other Programming Languages
-supply object methods for filehandles
+=item *
+File Names, File Systems and File Locking (see also File Handles)
-=item C<Find>
+=item *
+String Processing, Language Text Processing, Parsing and Searching
-traverse a file tree
+=item *
+Option, Argument, Parameter and Configuration File Processing
-=item C<Finddepth>
+=item *
+Internationalization and Locale
-traverse a directory structure depth-first
+=item *
+Authentication, Security and Encryption
-=item C<Getopt>
+=item *
+World Wide Web, HTML, HTTP, CGI, MIME
-basic and extended getopt(3) processing
+=item *
+Server and Daemon Utilities
-=item C<MakeMaker>
+=item *
+Archiving and Compression
-generate a Makefile for Perl extension
+=item *
+Images, Pixmap and Bitmap Manipulation, Drawing and Graphing
-=item C<Open2>
+=item *
+Mail and Usenet News
-open a process for both reading and writing
+=item *
+Control Flow Utilities (callbacks and exceptions etc)
-=item C<Open3>
+=item *
+File Handle and Input/Output Stream Utilities
-open a process for reading, writing, and error handling
+=item *
+Miscellaneous Modules
-=item C<POSIX>
+=back
-Perl interface to IEEE 1003.1 namespace
+Some of the reguster CPAN sites as of this writing include the following.
+You should try to choose one close to you:
-=item C<Ping>
+=over
-check a host for upness
+=item *
+ftp://ftp.sterling.com/programming/languages/perl/
-=item C<Socket>
+=item *
+ftp://ftp.sedl.org/pub/mirrors/CPAN/
-load the C socket.h defines
+=item *
+ftp://ftp.uoknor.edu/mirrors/CPAN/
-=back
+=item *
+ftp://ftp.delphi.com/pub/mirrors/packages/perl/CPAN/
-=head2 Extension Modules
+=item *
+ftp://uiarchive.cso.uiuc.edu/pub/lang/perl/CPAN/
-Extension modules are written in C (or a mix of Perl and C) and get
-dynamically loaded into Perl if and when you need them. Supported
-extension modules include the Socket, Fcntl, and POSIX modules.
+=item *
+ftp://ftp.cis.ufl.edu/pub/perl/CPAN/
-The following are popular C extension modules, which while available at
-Perl 5.0 release time, do not come bundled (at least, not completely)
-due to their size, volatility, or simply lack of time for adequate testing
-and configuration across the multitude of platforms on which Perl was
-beta-tested. You are encouraged to look for them in archie(1L), the Perl
-FAQ or Meta-FAQ, the WWW page, and even with their authors before randomly
-posting asking for their present condition and disposition. There's no
-guarantee that the names or addresses below have not changed since printing,
-and in fact, they probably have!
+=item *
+ftp://ftp.switch.ch/mirror/CPAN/
-=over 12
+=item *
+ftp://ftp.sunet.se/pub/lang/perl/CPAN/
-=item C<Curses>
+=item *
+ftp://ftp.ci.uminho.pt/pub/lang/perl/
-Written by William Setzer <F<William_Setzer@ncsu.edu>>, while not
-included with the standard distribution, this extension module ports to
-most systems. FTP from your nearest Perl archive site, or try
+=item *
+ftp://ftp.cs.ruu.nl/pub/PERL/CPAN/
- ftp://ftp.ncsu.edu/pub/math/wsetzer/cursperl5??.tar.gz
+=item *
+ftp://ftp.demon.co.uk/pub/mirrors/perl/CPAN/
-It is currently in alpha test, so the name and ftp location may
-change.
+=item *
+ftp://ftp.rz.ruhr-uni-bochum.de/pub/programming/languages/perl/CPAN/
+=item *
+ftp://ftp.leo.org/pub/comp/programming/languages/perl/CPAN/
-=item C<DBI>
+=item *
+ftp://ftp.pasteur.fr/pub/computing/unix/perl/CPAN/
-This is the portable database interface written by
-<F<Tim.Bunce@ig.co.uk>>. This supersedes the many perl4 ports for
-database extensions. The official archive for DBperl extensions is
-F<ftp.demon.co.uk:/pub/perl/db>. This archive contains copies of perl4
-ports for Ingres, Oracle, Sybase, Informix, Unify, Postgres, and
-Interbase, as well as rdb and shql and other non-SQL systems.
+=item *
+ftp://ftp.ibp.fr/pub/perl/CPAN/
-=item C<DB_File>
+=item *
+ftp://ftp.funet.fi/pub/languages/perl/CPAN/
-Fastest and most restriction-free of the DBM bindings, this extension module
-uses the popular Berkeley DB to tie() into your hashes. This has a
-standardly-distributed man page and dynamic loading extension module, but
-you'll have to fetch the Berkeley code yourself. See L<DB_File> for
-where.
+=item *
+ftp://ftp.tekotago.ac.nz/pub/perl/CPAN/
-=item C<Sx>
+=item *
+ftp://ftp.mame.mu.oz.au/pub/perl/CPAN/
-This extension module is a front to the Athena and Xlib libraries for Perl
-GUI programming, originally written by by Dominic Giampaolo
-<F<dbg@sgi.com>>, then and rewritten for Sx by FrE<eacute>dE<eacute>ric
-Chauveau <F<fmc@pasteur.fr>>. It's available for FTP from
+=item *
+ftp://coombs.anu.edu.au/pub/perl/
- ftp.pasteur.fr:/pub/Perl/Sx.tar.gz
+=item *
+ftp://dongpo.math.ncu.edu.tw/perl/CPAN/
-=item C<Tk>
+=item *
+ftp://ftp.lab.kdd.co.jp/lang/perl/CPAN/
-This extension module is an object-oriented Perl5 binding to the popular
-tcl/tk X11 package. However, you need know no TCL to use it!
-It was written by Malcolm Beattie <F<mbeattie@sable.ox.ac.uk>>.
-If you are unable to locate it using archie(1L) or a similar
-tool, you may try retrieving it from F</private/Tk-october.tar.gz>
-from Malcolm's machine listed above.
+=item *
+ftp://ftp.is.co.za/programming/perl/CPAN/
=back
+
+For an up-to-date listing of CPAN sites,
+see http://www.perl.com/perl/ or ftp://ftp.perl.com/perl/.
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 574e9238d8..9e1e3f14d0 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -414,7 +414,7 @@ can assign to them):
Note that this is not guaranteed to contribute to the readability of
your program.
-=head2 Assigment Operators
+=head2 Assignment Operators
"=" is the ordinary assignment operator.
@@ -463,8 +463,9 @@ argument and returns that value. This is just like C's comma operator.
In a list context, it's just the list argument separator, and inserts
both its arguments into the list.
-The => digraph is simply a synonym for the comma operator. It's useful
-for documenting arguments that come in pairs.
+The => digraph is mostly just a synonym for the comma operator. It's useful for
+documenting arguments that come in pairs. As of 5.001, it also forces
+any word to the left of it to be interpreted as a string.
=head2 List Operators (Rightward)
@@ -622,8 +623,8 @@ interpolating won't change over the life of the script. However, mentioning
C</o> constitutes a promise that you won't change the variables in the pattern.
If you change them, Perl won't even notice.
-If the PATTERN evaluates to a null string, the most recently executed
-(and successfully compiled) regular expression is used instead.
+If the PATTERN evaluates to a null string, the last
+successfully executed regular expression is used instead.
If used in a context that requires a list value, a pattern match returns a
list consisting of the subexpressions matched by the parentheses in the
@@ -745,7 +746,7 @@ PATTERN contains a $ that looks like a variable rather than an
end-of-string test, the variable will be interpolated into the pattern
at run-time. If you only want the pattern compiled once the first time
the variable is interpolated, use the C</o> option. If the pattern
-evaluates to a null string, the most recently executed (and successfully compiled) regular
+evaluates to a null string, the last successfully executed regular
expression is used instead. See L<perlre> for further explanation on these.
Options are:
@@ -797,9 +798,9 @@ Examples:
# Delete C comments.
$program =~ s {
- /\* (?# Match the opening delimiter.)
- .*? (?# Match a minimal number of characters.)
- \*/ (?# Match the closing delimiter.)
+ /\* # Match the opening delimiter.
+ .*? # Match a minimal number of characters.
+ \*/ # Match the closing delimiter.
} []gsx;
s/^\s*(.*?)\s*$/$1/; # trim white space
@@ -997,15 +998,16 @@ If the string inside the angle brackets is a reference to a scalar
variable (e.g. <$foo>), then that variable contains the name of the
filehandle to input from.
-If the string inside angle brackets is not a filehandle, it is
-interpreted as a filename pattern to be globbed, and either a list of
-filenames or the next filename in the list is returned, depending on
-context. One level of $ interpretation is done first, but you can't
-say C<E<lt>$fooE<gt>> because that's an indirect filehandle as explained in the
-previous paragraph. You could insert curly brackets to force
-interpretation as a filename glob: C<E<lt>${foo}E<gt>>. (Alternately, you can
-call the internal function directly as C<glob($foo)>, which is probably
-the right way to have done it in the first place.) Example:
+If the string inside angle brackets is not a filehandle, it is interpreted
+as a filename pattern to be globbed, and either a list of filenames or the
+next filename in the list is returned, depending on context. One level of
+$ interpretation is done first, but you can't say C<E<lt>$fooE<gt>>
+because that's an indirect filehandle as explained in the previous
+paragraph. In older version of Perl, programmers would insert curly
+brackets to force interpretation as a filename glob: C<E<lt>${foo}E<gt>>.
+These days, it's consdired cleaner to call the internal function directly
+as C<glob($foo)>, which is probably the right way to have done it in the
+first place.) Example:
while (<*.c>) {
chmod 0644, $_;
@@ -1030,6 +1032,30 @@ and just do your own grep() on the filenames. Furthermore, due to its current
implementation of using a shell, the glob() routine may get "Arg list too
long" errors (unless you've installed tcsh(1L) as F</bin/csh>).
+A glob only evaluates its (embedded) argument when it is starting a new
+list. All values must be read before it will start over. In a list
+context this isn't important, because you automatically get them all
+anyway. In a scalar context, however, the operator returns the next value
+each time it is called, or a FALSE value if you've just run out. Again,
+FALSE is returned only once. So if you're expecting a single value from
+a glob, it is much better to say
+
+ ($file) = <blurch*>;
+
+than
+
+ $file = <blurch*>;
+
+because the latter will alternate between returning a filename and
+returning FALSE.
+
+It you're trying to do variable interpolation, it's definitely better
+to use the glob() function, because the older notation can cause people
+to become confused with the indirect filehandle notatin.
+
+ @files = glob("$dir/*.[ch]");
+ @files = glob($files[$i]);
+
=head2 Constant Folding
Like C, Perl does a certain amount of expression evaluation at
diff --git a/pod/perlpod.pod b/pod/perlpod.pod
index 46693f1793..b51c9f8fc5 100644
--- a/pod/perlpod.pod
+++ b/pod/perlpod.pod
@@ -29,6 +29,7 @@ use however it pleases. Currently recognized commands are
=item text
=over N
=back
+ =cut
=item *
@@ -75,6 +76,13 @@ book. I'm just trying to make an idiot-proof common source for nroff,
TeX, and other markup languages, as used for online documentation.
Both B<pod2html> and B<pod2man> translators exist.
+=head1 Embedding Pods in Perl Modules
+
+You can embed pod documentation in your Perl scripts. Start your
+documentation with a =head1 command at the beg, and end it with
+an =cut command. Perl will ignore the pod text. See any of the
+supplied library modules for examples.
+
=head1 Author
Larry Wall
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 295b6bd518..2f2d79b492 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -19,12 +19,13 @@ in question might not actually be a slash. In fact, any of these
modifiers may also be embedded within the regular expression itself using
the new C<(?...)> construct. See below.
-The C</x> modifier itself needs a little more explanation. It tells the
-regular expression parser to ignore whitespace that is not backslashed
-or within a character class. You can use this to break up your regular
-expression into (slightly) more readable parts. Together with the
-capability of embedding comments described later, this goes a long
-way towards making Perl 5 a readable language. See the C comment
+The C</x> modifier itself needs a little more explanation. It tells
+the regular expression parser to ignore whitespace that is not
+backslashed or within a character class. You can use this to break up
+your regular expression into (slightly) more readable parts. The C<#>
+character is also treated as a metacharacter introducing a comment,
+just as in ordinary Perl code. Taken together, these features go a
+long way towards making Perl 5 a readable language. See the C comment
deletion code in L<perlop>.
=head2 Regular Expressions
@@ -147,7 +148,7 @@ When the bracketing construct C<( ... )> is used, \<digit> matches the
digit'th substring. (Outside of the pattern, always use "$" instead of
"\" in front of the digit. The scope of $<digit> (and C<$`>, C<$&>, and C<$')>
extends to the end of the enclosing BLOCK or eval string, or to the
-next pattern match with subexpressions.
+next successful pattern match, whichever comes first.
If you want to
use parentheses to delimit subpattern (e.g. a set of alternatives) without
saving it as a subpattern, follow the ( with a ?.
diff --git a/pod/perlref.pod b/pod/perlref.pod
index f12cad4554..62d99a8a28 100644
--- a/pod/perlref.pod
+++ b/pod/perlref.pod
@@ -1,3 +1,6 @@
+(Don't
+convert references into strings though, or you'll break their referenceness.)
+
=head1 NAME
perlref - Perl references and nested data structures
diff --git a/pod/perlsyn.pod b/pod/perlsyn.pod
index 252e679b72..4b1d607e7e 100644
--- a/pod/perlsyn.pod
+++ b/pod/perlsyn.pod
@@ -19,35 +19,43 @@ which lines you look at. (Actually, I'm lying--it is possible to do an
implicit loop with either the B<-n> or B<-p> switch. It's just not the
mandatory default like it is in B<sed> and B<awk>.)
+=head2 Declarations
+
Perl is, for the most part, a free-form language. (The only
exception to this is format declarations, for obvious reasons.) Comments
are indicated by the "#" character, and extend to the end of the line. If
you attempt to use C</* */> C-style comments, it will be interpreted
either as division or pattern matching, depending on the context, and C++
-C<//> comments just look like a null regular expression, So don't do
+C<//> comments just look like a null regular expression, so don't do
that.
A declaration can be put anywhere a statement can, but has no effect on
the execution of the primary sequence of statements--declarations all
take effect at compile time. Typically all the declarations are put at
-the beginning or the end of the script.
+the beginning or the end of the script. However, if you're using
+lexically-scoped private variables created with my(), you'll have to make sure
+your format or subroutine definition is within the same block scope
+as the my if you expect to to be able to access those private variables.
-As of Perl 5, declaring a subroutine allows a subroutine name to be used
-as if it were a list operator from that point forward in the program. You
-can declare a subroutine without defining it by saying just
+Declaring a subroutine allows a subroutine name to be used as if it were a
+list operator from that point forward in the program. You can declare a
+subroutine without defining it by saying just
sub myname;
$me = myname $0 or die "can't get myname";
-Note that it functions as a list operator though, not a unary
+Note that it functions as a list operator though, not as a unary
operator, so be careful to use C<or> instead of C<||> there.
-Subroutines declarations can also be imported by a C<use> statement.
+Subroutines declarations can also be loaded up with the C<require> statement
+or both loaded and imported into your namespace with a C<use> statement.
+See L<perlmod> for details on this.
-Also as of Perl 5, a statement sequence may contain declarations of
-lexically scoped variables, but apart from declaring a variable name,
-the declaration acts like an ordinary statement, and is elaborated within
-the sequence of statements as if it were an ordinary statement.
+A statement sequence may contain declarations of lexically-scoped
+variables, but apart from declaring a variable name, the declaration acts
+like an ordinary statement, and is elaborated within the sequence of
+statements as if it were an ordinary statement. That means it actually
+has both compile-time and run-time effects.
=head2 Simple statements
@@ -58,8 +66,7 @@ the semicolon is optional. (A semicolon is still encouraged there if the
block takes up more than one line, since you may eventually add another line.)
Note that there are some operators like C<eval {}> and C<do {}> that look
like compound statements, but aren't (they're just TERMs in an expression),
-and thus need an explicit termination
-if used as the last item in a statement.
+and thus need an explicit termination if used as the last item in a statement.
Any simple statement may optionally be followed by a I<SINGLE> modifier,
just before the terminating semicolon (or block ending). The possible
@@ -79,14 +86,14 @@ executes once before the conditional is evaluated. This is so that you
can write loops like:
do {
- $_ = <STDIN>;
+ $line = <STDIN>;
...
- } until $_ eq ".\n";
+ } until $line eq ".\n";
See L<perlfunc/do>. Note also that the loop control
statements described later will I<NOT> work in this construct, since
modifiers don't take loop labels. Sorry. You can always wrap
-another block around it to do that sort of thing.)
+another block around it to do that sort of thing.
=head2 Compound statements
@@ -128,14 +135,86 @@ the sense of the test is reversed.
The C<while> statement executes the block as long as the expression is
true (does not evaluate to the null string or 0 or "0"). The LABEL is
-optional, and if present, consists of an identifier followed by a
-colon. The LABEL identifies the loop for the loop control statements
-C<next>, C<last>, and C<redo> (see below). If there is a C<continue>
-BLOCK, it is always executed just before the conditional is about to be
-evaluated again, just like the third part of a C<for> loop in C.
-Thus it can be used to increment a loop variable, even when the loop
-has been continued via the C<next> statement (which is similar to the C
-C<continue> statement).
+optional, and if present, consists of an identifier followed by a colon.
+The LABEL identifies the loop for the loop control statements C<next>,
+C<last>, and C<redo>. If the LABEL is omitted, the loop control statement
+refers to the innermost enclosing loop. This may include dynamically
+looking back your call-stack at run time to find the LABEL. Such
+desperate behavior triggers a warning if you use the B<-w> flag.
+
+If there is a C<continue> BLOCK, it is always executed just before the
+conditional is about to be evaluated again, just like the third part of a
+C<for> loop in C. Thus it can be used to increment a loop variable, even
+when the loop has been continued via the C<next> statement (which is
+similar to the C C<continue> statement).
+
+=head2 Loop Control
+
+The C<next> command is like the C<continue> statement in C; it starts
+the next iteration of the loop:
+
+ LINE: while (<STDIN>) {
+ next LINE if /^#/; # discard comments
+ ...
+ }
+
+The C<last> command is like the C<break> statement in C (as used in
+loops); it immediately exits the loop in question. The
+C<continue> block, if any, is not executed:
+
+ LINE: while (<STDIN>) {
+ last LINE if /^$/; # exit when done with header
+ ...
+ }
+
+The C<redo> command restarts the loop block without evaluating the
+conditional again. The C<continue> block, if any, is I<not> executed.
+This command is normally used by programs that want to lie to themselves
+about what was just input.
+
+For example, when processing a file like F</etc/termcap>.
+If your input lines might end in backslashes to indicate continuation, you
+want to skip ahead and get the next record.
+
+ while (<>) {
+ chomp;
+ if (s/\\$//) {
+ $_ .= <>;
+ redo unless eof();
+ }
+ # now process $_
+ }
+
+which is Perl short-hand for the more explicitly written version:
+
+ LINE: while ($line = <ARGV>) {
+ chomp($line);
+ if ($line =~ s/\\$//) {
+ $line .= <ARGV>;
+ redo LINE unless eof(); # not eof(ARGV)!
+ }
+ # now process $line
+ }
+
+Or here's a a simpleminded Pascal comment stripper (warning: assumes no { or } in strings)
+
+ LINE: while (<STDIN>) {
+ while (s|({.*}.*){.*}|$1 |) {}
+ s|{.*}| |;
+ if (s|{.*| |) {
+ $front = $_;
+ while (<STDIN>) {
+ if (/}/) { # end of comment?
+ s|^|$front{|;
+ redo LINE;
+ }
+ }
+ }
+ print;
+ }
+
+Note that if there were a C<continue> block on the above code, it would get
+executed even on discarded lines.
If the word C<while> is replaced by the word C<until>, the sense of the
test is reversed, but the conditional is still tested before the first
@@ -143,11 +222,13 @@ iteration.
In either the C<if> or the C<while> statement, you may replace "(EXPR)"
with a BLOCK, and the conditional is true if the value of the last
-statement in that block is true. (This feature continues to work in Perl
-5 but is deprecated. Please change any occurrences of "if BLOCK" to
-"if (do BLOCK)".)
+statement in that block is true. While this "feature" continues to work in
+version 5, it has been deprecated, so please change any occurrences of "if BLOCK" to
+"if (do BLOCK)".
+
+=head2 For and Foreach
-The C-style C<for> loop works exactly like the corresponding C<while> loop:
+Perl's C-style C<for> loop works exactly like the corresponding C<while> loop:
for ($i = 1; $i < 10; $i++) {
...
@@ -162,38 +243,78 @@ is the same as
$i++;
}
-The foreach loop iterates over a normal list value and sets the
+The C<foreach> loop iterates over a normal list value and sets the
variable VAR to be each element of the list in turn. The variable is
-implicitly local to the loop and regains its former value upon exiting
-the loop. (If the variable was previously declared with C<my>, it uses
-that variable instead of the global one, but it's still localized to
-the loop.) The C<foreach> keyword is actually a synonym for the C<for>
-keyword, so you can use C<foreach> for readability or C<for> for
-brevity. If VAR is omitted, $_ is set to each value. If LIST is an
-actual array (as opposed to an expression returning a list value), you
-can modify each element of the array by modifying VAR inside the loop.
+implicitly local to the loop and regains its former value upon exiting the
+loop. If the variable was previously declared with C<my>, it uses that
+variable instead of the global one, but it's still localized to the loop.
+This can cause problems if you have subroutine or format declarations
+within that block's scope.
+
+The C<foreach> keyword is actually a synonym for the C<for> keyword, so
+you can use C<foreach> for readability or C<for> for brevity. If VAR is
+omitted, $_ is set to each value. If LIST is an actual array (as opposed
+to an expression returning a list value), you can modify each element of
+the array by modifying VAR inside the loop. That's because the C<foreach>
+loop index variable is an implicit alias for each item in the list that
+you're looping over.
+
Examples:
- for (@ary) { s/foo/bar/; }
+ for (@ary) { s/foo/bar/ }
foreach $elem (@elements) {
$elem *= 2;
}
- for ((10,9,8,7,6,5,4,3,2,1,'BOOM')) {
- print $_, "\n"; sleep(1);
+ for $count (10,9,8,7,6,5,4,3,2,1,'BOOM') {
+ print $count, "\n"; sleep(1);
}
for (1..15) { print "Merry Christmas\n"; }
- foreach $item (split(/:[\\\n:]*/, $ENV{'TERMCAP'})) {
+ foreach $item (split(/:[\\\n:]*/, $ENV{TERMCAP})) {
print "Item: $item\n";
}
+Here's how a C programmer might code up a particular algorithm in Perl:
+
+ for ($i = 0; $i < @ary1; $i++) {
+ for ($j = 0; $j < @ary2; $j++) {
+ if ($ary1[$i] > $ary2[$j]) {
+ last; # can't go to outer :-(
+ }
+ $ary1[$i] += $ary2[$j];
+ }
+ }
+
+Whereas here's how a Perl programmer more confortable with the idiom might
+do it this way:
+
+ OUTER: foreach $i (@ary1) {
+ INNER: foreach $j (@ary2) {
+ next OUTER if $i > $j;
+ $i += $j;
+ }
+ }
+
+See how much easier this is? It's cleaner, safer, and faster.
+It's cleaner because it's less noisy.
+It's safer because if code gets added
+between the inner and outer loops later, you won't accidentally excecute
+it because you've explicitly asked to iterate the other loop rather than
+merely terminating the inner one.
+And it's faster because Perl exececute C<foreach> statement more
+rapidly than it would the equivalent C<for> loop.
+
+=head2 Basic BLOCKs and Switch Statements
+
A BLOCK by itself (labeled or not) is semantically equivalent to a loop
that executes once. Thus you can use any of the loop control
statements in it to leave or restart the block. The C<continue> block
-is optional. This construct is particularly nice for doing case
+is optional.
+
+The BLOCK construct is particularly nice for doing case
structures.
SWITCH: {
@@ -267,3 +388,48 @@ or even, horrors,
else
{ $nothing = 1 }
+
+A common idiom for a switch statement is to use C<foreach>'s aliasing to make
+a temporary assignment to $_ for convenient matching:
+
+ SWITCH: for ($where) {
+ /In Card Names/ && do { push @flags, '-e'; last; };
+ /Anywhere/ && do { push @flags, '-h'; last; };
+ /In Rulings/ && do { last; };
+ die "unknown value for form variable where: `$where'";
+ }
+
+=head2 Goto
+
+Although not for the faint of heart, Perl does support a C<goto> statement.
+A loop's LABEL is not actually a valid target for a C<goto>;
+it's just the name of the loop. There are three forms: goto-LABEL,
+goto-EXPR, and goto-&NAME.
+
+The goto-LABEL form finds the statement labeled with LABEL and resumes
+execution there. It may not be used to go into any construct that
+requires initialization, such as a subroutine or a foreach loop. It
+also can't be used to go into a construct that is optimized away. It
+can be used to go almost anywhere else within the dynamic scope,
+including out of subroutines, but it's usually better to use some other
+construct such as last or die. The author of Perl has never felt the
+need to use this form of goto (in Perl, that is--C is another matter).
+
+The goto-EXPR form expects a label name, whose scope will be resolved
+dynamically. This allows for computed gotos per FORTRAN, but isn't
+necessarily recommended if you're optimizing for maintainability:
+
+ goto ("FOO", "BAR", "GLARCH")[$i];
+
+The goto-&NAME form is highly magical, and substitutes a call to the
+named subroutine for the currently running subroutine. This is used by
+AUTOLOAD() subroutines that wish to load another subroutine and then
+pretend that the other subroutine had been called in the first place
+(except that any modifications to @_ in the current subroutine are
+propagated to the other subroutine.) After the C<goto>, not even caller()
+will be able to tell that this routine was called first.
+
+In almost cases like this, it's usually a far, far better idea to use the
+structured control flow mechanisms of C<next>, C<last>, or C<redo> insetad
+resorting to a C<goto>. For certain applications, the catch and throw pair of
+C<eval{}> and die() for exception processing can also be a prudent approach.
diff --git a/pod/perltrap.pod b/pod/perltrap.pod
index fa68a753c2..cfe964270c 100644
--- a/pod/perltrap.pod
+++ b/pod/perltrap.pod
@@ -191,7 +191,8 @@ in Perl 5 is the backslash, which creates a reference.
=item *
-C<ARGV> must be capitalized.
+C<ARGV> must be capitalized. C<$ARGV[0]> is C's C<argv[1]>, and C<argv[0]>
+ends up in C<$0>.
=item *
@@ -381,6 +382,28 @@ Because if that were to work, then this couldn't:
=item *
+The precedence of assignment operators is now the same as the precedence
+of assignment. Perl 4 mistakenly gave them the precedence of the associated
+operator. So you now must parenthesize them in expressions like
+
+ /foo/ ? ($a += 2) : ($a -= 2);
+
+Otherwise
+
+ /foo/ ? $a += 2 : $a -= 2;
+
+would be erroneously parsed as
+
+ (/foo/ ? $a += 2 : $a) -= 2;
+
+On the other hand,
+
+ $a += /foo/ ? 1 : 2;
+
+now works as a C programmer would expect.
+
+=item *
+
C<open FOO || die> is now incorrect. You need parens around the filehandle.
While temporarily supported, using such a construct will
generate a non-fatal (but non-suppressible) warning.
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index bfd04f74d4..e5d0091c85 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -139,12 +139,13 @@ Use of "C<$*>" is deprecated in Perl 5.
=item $.
-The current input line number of the last filehandle that was read.
-This variable should be considered read-only.
-Remember that only an explicit close on the filehandle
-resets the line number. Since "C<E<lt>E<gt>>" never does an explicit close, line
-numbers increase across ARGV files (but see examples under eof()).
-(Mnemonic: many programs use "." to mean the current line number.)
+The current input line number of the last filehandle that was read. An
+explicit close on the filehandle resets the line number. Since
+"C<E<lt>E<gt>>" never does an explicit close, line numbers increase
+across ARGV files (but see examples under eof()). Localizing C<$.> has
+the effect of also localizing Perl's notion of "the last read
+filehandle". (Mnemonic: many programs use "." to mean the current line
+number.)
=item input_record_separator HANDLE EXPR
diff --git a/pod/perlxs.pod b/pod/perlxs.pod
index ffbaa6b1c3..df2aefa0b6 100644
--- a/pod/perlxs.pod
+++ b/pod/perlxs.pod
@@ -151,16 +151,6 @@ the variable, as is demonstrated in the rpcb_gettime() function above. See
the section on typemaps for more about handling qualifiers and unary
operators in C types.
-The parameter list of a function must not have whitespace after the
-open-parenthesis or before the close-parenthesis. (This restriction will be
-relaxed in later versions of B<xsubpp>.)
-
- INCORRECT CORRECT
-
- double double
- sin( x ) sin(x)
- double x double x
-
The function name and the return type must be placed on
separate lines.
@@ -552,6 +542,15 @@ then not push return values on the stack.
}
}
+=head2 The REQUIRE: Keyword
+
+The REQUIRE: keyword is used to indicate the minimum version of the
+B<xsubpp> compiler needed to compile the XS module. An XS module which
+contains the following statement will only compile with B<xsubpp> version
+1.922 or greater:
+
+ REQUIRE: 1.922
+
=head2 The CLEANUP: Keyword
This keyword can be used when an XSUB requires special cleanup procedures
diff --git a/pod/perlxstut.pod b/pod/perlxstut.pod
new file mode 100644
index 0000000000..484f49dfc3
--- /dev/null
+++ b/pod/perlxstut.pod
@@ -0,0 +1,529 @@
+=head1 NAME
+
+perlXStut - Tutorial for XSUB's
+
+=head1 DESCRIPTION
+
+This tutorial will educate the reader on the steps involved in creating
+a Perl 5 extension. The reader is assumed to have access to L<perlguts> and
+L<perlxs>.
+
+This tutorial starts with very simple examples and becomes more complex,
+bringing in more features that are available. Thus, certain statements
+towards the beginning may be incomplete. The reader is encouraged to
+read the entire document before lambasting the author about apparent
+mistakes.
+
+This tutorial is still under construction. Constructive comments
+are welcome.
+
+=head1 EXAMPLE 1
+
+Our first extension will be very simple. When we call the routine in the
+extension, it will print out a well-known message and terminate.
+
+Run "h2xs -A -n Test1". This creates a directory named Test1, possibly under
+ext/ if it exists in the current working directory. Four files will be
+created in the Test1 dir: MANIFEST, Makefile.PL, Test1.pm, Test1.xs.
+
+The MANIFEST file should contain the names of the four files created.
+
+The file Makefile.PL should look something like this:
+
+ use ExtUtils::MakeMaker;
+ # See lib/ExtUtils/MakeMaker.pm for details of how to influence
+ # the contents of the Makefile that is written.
+ WriteMakefile(
+ 'NAME' => 'Test1',
+ 'VERSION' => '0.1',
+ 'LIBS' => [''], # e.g., '-lm'
+ 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING'
+ 'INC' => '', # e.g., '-I/usr/include/other'
+ );
+
+The file Test1.pm should look something like this:
+
+ package Test1;
+
+ require Exporter;
+ require DynaLoader;
+
+ @ISA = qw(Exporter DynaLoader);
+ # Items to export into callers namespace by default. Note: do not export
+ # names by default without a very good reason. Use EXPORT_OK instead.
+ # Do not simply export all your public functions/methods/constants.
+ @EXPORT = qw(
+
+ );
+ bootstrap Test1;
+
+ # Preloaded methods go here.
+
+ # Autoload methods go after __END__, and are processed by the autosplit program.
+
+ 1;
+ __END__
+
+And the Test1.xs file should look something like this:
+
+ #include "EXTERN.h"
+ #include "perl.h"
+ #include "XSUB.h"
+
+ MODULE = Test1 PACKAGE = Test1
+
+Let's edit the .xs file by adding this to the end of the file:
+
+ void
+ hello()
+
+ CODE:
+ printf("Hello, world!\n");
+
+Now we'll run "perl Makefile.PL". This will create a real Makefile,
+which make needs. It's output looks something like:
+
+ % perl Makefile.PL
+ Checking if your kit is complete...
+ Looks good
+ Writing Makefile for Test1
+ %
+
+Now, running make will produce output that looks something like this:
+
+ % make
+ mkdir ./blib
+ mkdir ./blib/auto
+ mkdir ./blib/auto/Test1
+ perl xsubpp -typemap typemap Test1.xs >Test1.tc && mv Test1.tc Test1.c
+ cc -c Test1.c
+ Running Mkbootstrap for Test1 ()
+ chmod 644 Test1.bs
+ LD_RUN_PATH="" ld -o ./blib/auto/Test1/Test1.sl -b Test1.o
+ chmod 755 ./blib/auto/Test1/Test1.sl
+ cp Test1.bs ./blib/auto/Test1/Test1.bs
+ chmod 644 ./blib/auto/Test1/Test1.bs
+ cp Test1.pm ./blib/Test1.pm
+ chmod 644 ./blib/Test1.pm
+
+Now we'll create a test script, test1.pl in the Test1 directory. It should
+look like this:
+
+ #! /usr/local/bin/perl
+
+ BEGIN { unshift(@INC, "./blib") }
+
+ use Test1;
+
+ Test1::hello();
+
+Now we run the script and we should see the following output:
+
+ % perl test1.pl
+ Hello, world!
+ %
+
+=head1 EXAMPLE 2
+
+Now let's create a simple extension that will take a single argument and
+return 0 if the argument is even, 1 if the argument is odd.
+
+Run "h2xs -A -n Test2". This will create a Test2 directory with a file
+Test2.xs underneath it. Add the following to the end of the XS file:
+
+ int
+ is_even(input)
+ int input
+
+ CODE:
+ RETVAL = input % 2;
+
+ OUTPUT:
+ RETVAL
+
+(Note that the line after the declaration of is_even is indented one tab
+stop. Although there is a tab between "int" and "input", this can be any
+amount of white space. Also notice that there is no semi-colon following
+the "declaration" of the variable input)
+
+Now perform the same steps before, generating a Makefile from the
+Makefile.PL file, and running make.
+
+Our test file test2.pl will now look like:
+
+ BEGIN { unshift(@INC, "./blib"); }
+
+ use Test2;
+
+ $a = &Test2::is_even(2);
+ $b = &Test2::is_even(3);
+
+ print "\$a is $a, \$b is $b\n";
+
+The output should look like:
+
+ % perl test2.pl
+ $a is 0, $b is 1
+ %
+
+=head1 WHAT HAS GONE ON?
+
+The program h2xs is the starting point for creating extensions. In later
+examples, we'll see how we can use h2xs to read header files and generate
+templates to connect to C routines.
+
+h2xs creates a number of files in the extension directory. The file
+Makefile.PL is a perl script which will generate a true Makefile to build
+the extension. We'll take a closer look at it later.
+
+The files <extension>.pm and <extension>.xs contain the meat of the extension.
+The .xs file holds the C routines that make up the extension. The .pm file
+contains routines that tells Perl how to load your extension.
+
+Generating the invoking the Makefile created a directory blib in the current
+working directory. This directory will contain the shared library that we
+will build. Once we have tested it, we can install it into its final location.
+
+Finally, our test scripts do two important things. First of all, they place
+the directory "blib" at the head of the @INC array. Placing this inside a
+BEGIN block assures us that Perl will look in the blib directory hierarchy
+before looking in the system directories. This could be important if you are
+upgrading an already-existing extension and do not want to disturb the system
+version until you are ready to install it.
+
+Second, the test scripts tell Perl to C<use extension;>. When Perl sees this,
+it searches for a .pm file of the same name in the various directories kept
+in the @INC array. If it cannot be found, perl will die with an error that
+will look something like:
+
+ Can't locate Test2.pm in @INC at ./test2.pl line 5.
+ BEGIN failed--compilation aborted at ./test2.pl line 5.
+
+The .pm file tells perl that it will need the Exporter and Dynamic Loader
+extensions. It then sets the @ISA array, which is used for looking up
+methods that might not exist in the current package, and finally tells perl
+to bootstrap the module. Perl will call its dynamic loader routine and load
+the shared library.
+
+The @EXPORT array in the .pm file tells Perl which of the extension's
+routines should be placed into the calling package's namespace. In our two
+examples so far, we have not modified the @EXPORT array, so our test
+scripts must call the routines by their complete name (e.g., Test1::hello).
+If we placed the name of the routine in the @EXPORT array, so that the
+.pm file looked like:
+
+ @EXPORT = qw( hello );
+
+Then the hello routine would also be callable from the "main" package.
+We could therefore change test1.pl to look like:
+
+ #! /usr/local/bin/perl
+
+ BEGIN { unshift(@INC, "./blib") }
+
+ use Test1;
+
+ hello();
+
+And we would get the same output, "Hello, world!".
+
+Most of the time you do not want to export the names of your extension's
+subroutines, because they might accidentally clash with other subroutines
+from other extensions or from the calling program itself.
+
+=head1 EXAMPLE 3
+
+Our third extension will take one argument as its input, round off that
+value, and set the argument to the rounded value.
+
+Run "h2xs -A -n Test3". This will create a Test3 directory with a file
+Test3.xs underneath it. Add the following to the end of the XS file:
+
+ void
+ round(arg)
+ double arg
+
+ CODE:
+ if (arg > 0.0) {
+ arg = floor(arg + 0.5);
+ } else if (arg < 0.0) {
+ arg = ceil(arg - 0.5);
+ } else {
+ arg = 0.0;
+ }
+ OUTPUT:
+ arg
+
+Edit the file Makefile.PL so that the corresponding line looks like this:
+
+ 'LIBS' => ['-lm'], # e.g., '-lm'
+
+Generate the Makefile and run make. The test script test3.pl looks like:
+
+ #! /usr/local/bin/perl
+
+ BEGIN { unshift(@INC, "./blib"); }
+
+ use Test3;
+
+ foreach $i (-1.4, -0.5, 0.0, 0.4, 0.5) {
+ $j = $i;
+ &Test3::round($j);
+ print "Rounding $i results in $j\n";
+ }
+
+ print STDERR "Trying to round a constant -- ";
+ &Test3::round(2.0);
+
+Notice the output from trying to send a constant in to the routine. Perl
+reports:
+
+ Modification of a read-only value attempted at ./test3.pl line 15.
+
+Perl won't let you change the value of two to, say, three, unlike a FORTRAN
+compiler from long, long ago!
+
+=head1 WHAT'S NEW HERE?
+
+Two things are new here. First, we've made some changes to Makefile.PL.
+In this case, we've specified an extra library to link in, in this case the
+math library, libm. We'll talk later about how to write XSUBs that can call
+every routine in a library.
+
+Second, the value of the function is being passed back not as the function's
+return value, but through the same variable that was passed into the function.
+
+=head1 INPUT AND OUTPUT PARAMETERS
+
+You specify the parameters that will be passed into the XSUB just after you
+declare the function return value and name. The list of parameters looks
+very C-like, but the lines must be indented by a tab stop, and each line
+may not have an ending semi-colon.
+
+The list of output parameters occurs after the OUTPUT: directive. The use
+of RETVAL tells Perl that you wish to send this value back as the return
+value of the XSUB function. Otherwise, you specify which variables used
+in the XSUB function should be placed into the respective Perl variables
+passed in.
+
+=head1 THE XSUBPP COMPILER
+
+The compiler xsubpp takes the XS code in the .xs file and converts it into
+C code, placing it in a file whose suffix is .c. The C code created makes
+heavy use of the C functions within Perl.
+
+=head1 THE TYPEMAP FILE
+
+The xsubpp compiler uses rules to convert from Perl's data types (scalar,
+array, etc.) to C's data types (int, char *, etc.). These rules are stored
+in the typemap file ($PERLLIB/ExtUtils/typemap). This file is split into
+three parts.
+
+The first part attempts to map various C data types to a coded flag, which
+has some correspondence with the various Perl types. The second part contains
+C code which xsubpp uses for input parameters. The third part contains C
+code which xsubpp uses for output parameters. We'll talk more about the
+C code later.
+
+Let's now take a look at the .c file created for the Test3 extension.
+
+ /*
+ * This file was generated automatically by xsubpp version 1.9 from the
+ * contents of Test3.xs. Don't edit this file, edit Test3.xs instead.
+ *
+ * ANY CHANGES MADE HERE WILL BE LOST!
+ *
+ */
+
+ #include "EXTERN.h"
+ #include "perl.h"
+ #include "XSUB.h"
+
+
+ XS(XS_Test3_round)
+ {
+ dXSARGS;
+ if (items != 1) {
+ croak("Usage: Test3::round(arg)");
+ }
+ {
+ double arg = (double)SvNV(ST(0)); /* XXXXX */
+
+ if (arg > 0.0) {
+ arg = floor(arg + 0.5);
+ } else if (arg < 0.0) {
+ arg = ceil(arg - 0.5);
+ }
+
+ sv_setnv(ST(0), (double)arg); /* XXXXX */
+ }
+ XSRETURN(1);
+ }
+
+ XS(boot_Test3)
+ {
+ dXSARGS;
+ char* file = __FILE__;
+
+ newXS("Test3::round", XS_Test3_round, file);
+ ST(0) = &sv_yes;
+ XSRETURN(1);
+ }
+
+Notice the two lines marked with "XXXXX". If you check the first section of
+the typemap file, you'll see that doubles are of type T_DOUBLE. In the
+INPUT section, an argument that is T_DOUBLE is assigned to the variable
+arg by calling the routine SvNV on something, then casting it to double,
+then assigned to the variable arg. Similarly, in the OUTPUT section,
+once arg has its final value, it is passed to the sv_setnv function to
+be passed back to the calling subroutine. These two functions are explained
+in perlguts; we'll talk more later about what that "ST(0)" means in the
+section on the argument stack.
+
+=head1 WARNING
+
+In general, it's not agood idea to write extensions that modify their input
+parameters, as in Example 3. However, in order to better accomodate calling
+pre-existing C routines, which often do modify their input parameters,
+this behavior is tolerated. The next example will show to do this.
+
+=head1 EXAMPLE 4
+
+We'll now show how we can call routines in libraries, such as the curses
+screen handling package, or a DBM module like GDBM. Each of these libraries
+has a header file from which we will generate an XS template that we'll then
+fine-tune.
+
+Rather than attempt to find a library that exists on all systems, we'll
+first create our own C library, then create an XSUB to it.
+
+Let's create the files libtest4.h and libtest4.c as follows:
+
+ /* libtest4.h */
+
+ #define TESTVAL 4
+
+ extern int test4(int, long, const char*);
+
+ /* libtest4.c */
+
+ #include <stdlib.h>
+ #include "./libtest4.h"
+
+ int
+ test4(a, b, c)
+ int a;
+ long b;
+ const char * c;
+ {
+ return (a + b + atof(c) + TESTVAL);
+ }
+
+Now let's compile it into a library. Since we'll be eventually using this
+archive to create a shared library, be sure to use the correct flags to
+generate position-independent code. In HP-UX, that's:
+
+ % cc -Aa -D_HPUX_SOURCE -c +z libtest4.c
+ % ar cr libtest4.a libtest4.o
+
+Now let's move the libtest4.h and libtest.a files into a sub-directory under
+/tmp, so we don't interfere with anything.
+
+ % mkdir /tmp/test4
+ % mkdir /tmp/test4/include
+ % mkdir /tmp/test4/lib
+ % cp libtest4.h /tmp/test4/include
+ % cp libtest4.a /tmp/test4/lib
+
+Okay, now that we have a header file and a library, let's begin actually
+writing the extension.
+
+Run "h2xs -n Test4 /tmp/test4/include/libtest4.h" (notice we are no longer
+specifying -A as an argument). This will create a Test4 directory with a file
+Test4.xs underneath it. If we look at it now, we'll see some interesting
+things have been added to the various files.
+
+=over 2
+
+=item *
+
+In the .xs file, there's now a #include declaration with the full path to
+the libtest4.h header file.
+
+=item *
+
+There's now some new C code that's been added to the .xs file. The purpose
+of the C<constant> routine is to make the values that are #define'd in the
+header file available to the Perl script by calling C<&main::TESTVAL>.
+There's also some XS code to allow calls to the C<constant> routine.
+
+=item *
+
+The .pm file has exported the name TESTVAL in the @EXPORT array. This
+could lead to name clashes. A good rule of thumb is that if the #define
+is only going to be used by the C routines themselves, and not by the user,
+they should be removed from the @EXPORT array. Alternately, if you don't
+mind using the "fully qualified name" of a variable, you could remove most
+or all of the items in the @EXPORT array.
+
+=back
+
+Let's now add a definition for the routine in our library. Add the following
+code to the end of the .xs file:
+
+ int
+ test4(a,b,c)
+ int a
+ long b
+ const char * c
+
+Now we also need to create a typemap file because the default Perl doesn't
+currently support the const char * type. Create a file called typemap and
+place the following in it:
+
+ const char * T_PV
+
+Now we must tell our Makefile template where our new library is. Edit the
+Makefile.PL and change the following line:
+
+ 'LIBS' => ['-ltest4 -L/tmp/test4'], # e.g., '-lm'
+
+This specifies that we want the library test4 linked into our XSUB, and that
+it should also look in the directory /tmp/test4.
+
+Let's also change the following line in the Makefile.PL to this:
+
+ 'INC' => '-I/tmp/test/include', # e.g., '-I/usr/include/other'
+
+and also change the #include in test4.xs to be:
+
+ #include <libtest4.h>
+
+Now we don't have to specify the absolute path of the header file in the
+.xs file, relying on the Makefile to tell the compiler where to find the
+header files. This is generally considered a Good Thing.
+
+Okay, let's create the Makefile, and run make. You can ignore a message that
+may look like:
+
+ Warning (non-fatal): No library found for -ltest4
+
+If you forgot to create the typemap file, you might see output that looks
+like this:
+
+ Error: 'const char *' not in typemap in test4.xs, line 102
+
+This error means that you have used a C datatype that xsubpp doesn't know
+how to convert between Perl and C. You'll have to create a typemap file to
+tell xsubpp how to do the conversions.
+
+=head1 Author
+
+Jeff Okamoto
+
+=head1 Last Changed
+
+1995/11/20
+
+Jeff Okamoto
+F<E<lt>okamoto@hpcc123.corp.hp.comE<gt>>
diff --git a/pod/pod2html.PL b/pod/pod2html.PL
new file mode 100644
index 0000000000..aee400df34
--- /dev/null
+++ b/pod/pod2html.PL
@@ -0,0 +1,550 @@
+#!/usr/local/bin/perl
+
+use Config;
+use File::Basename qw(&basename &dirname);
+
+# List explicitly here the variables you want Configure to
+# generate. Metaconfig only looks for shell variables, so you
+# have to mention them as if they were shell variables, not
+# %Config entries. Thus you write
+# $startperl
+# to ensure Configure will look for $Config{startperl}.
+
+# This forces PL files to create target in same directory as PL file.
+# This is so that make depend always knows where to find PL derivatives.
+chdir(dirname($0));
+($file = basename($0)) =~ s/\.PL$//;
+$file =~ s/\.pl$//
+ if ($Config{'osname'} eq 'VMS' or
+ $Config{'osname'} eq 'OS2'); # "case-forgiving"
+
+open OUT,">$file" or die "Can't create $file: $!";
+
+print "Extracting $file (with variable substitutions)\n";
+
+# In this section, perl variables will be expanded during extraction.
+# You can use $Config{...} to use Configure variables.
+
+print OUT <<"!GROK!THIS!";
+$Config{'startperl'}
+!GROK!THIS!
+
+# In the following, perl variables are not expanded during extraction.
+
+print OUT <<'!NO!SUBS!';
+eval 'exec perl -S $0 ${1+"$@"}'
+ if $running_under_some_shell;
+#
+# pod2html - convert pod format to html
+# Version 1.15
+# usage: pod2html [podfiles]
+# Will read the cwd and parse all files with .pod extension
+# if no arguments are given on the command line.
+#
+# Many helps, suggestions, and fixes from the perl5 porters, and all over.
+# Bill Middleton - wjm@metronet.com
+#
+# Please send patches/fixes/features to me
+#
+#
+#
+*RS = */;
+*ERRNO = *!;
+
+################################################################################
+# Invoke with various levels of debugging possible
+################################################################################
+while ($ARGV[0] =~ /^-d(.*)/) {
+ shift;
+ $Debug{ lc($1 || shift) }++;
+}
+
+# ck for podnames on command line
+while ($ARGV[0]) {
+ push(@Pods,shift);
+}
+
+################################################################################
+# CONFIGURE
+#
+# The beginning of the url for the anchors to the other sections.
+# Edit $type to suit. It's configured for relative url's now.
+# Other possibilities are:
+# $type = '<A HREF="file://localhost/usr/local/htmldir/'; # file url
+# $type = '<A HREF="http://www.bozo.com/perl/manual/html/' # server
+#
+################################################################################
+
+$type = '<A HREF="';
+$dir = "."; # location of pods
+
+# look in these pods for things not found within the current pod
+# be careful tho, namespace collisions cause stupid links
+
+@inclusions = qw[
+ perlfunc perlvar perlrun perlop
+];
+################################################################################
+# END CONFIGURE
+################################################################################
+
+$A = {}; # The beginning of all things
+
+unless (@Pods) {
+ opendir(DIR,$dir) or die "Can't opendir $dir: $ERRNO";
+ @Pods = grep(/\.pod$/,readdir(DIR));
+ closedir(DIR) or die "Can't closedir $dir: $ERRNO";
+}
+@Pods or die "aak, expected pods";
+
+# loop twice through the pods, first to learn the links, then to produce html
+for $count (0,1) {
+ print STTDER "Scanning pods...\n" unless $count;
+ foreach $podfh ( @Pods ) {
+ ($pod = $podfh) =~ s/\.pod$//;
+ Debug("files", "opening 2 $podfh" );
+ print "Creating $pod.html from $podfh\n" if $count;
+ $RS = "\n="; # grok pods by item (Nonstandard but effecient)
+ open($podfh,"<".$podfh) || die "can't open $podfh: $ERRNO";
+ @all = <$podfh>;
+ close($podfh);
+ $RS = "\n";
+
+ $all[0] =~ s/^=//;
+ for (@all) { s/=$// }
+ $Podnames{$pod} = 1;
+ $in_list = 0;
+ $html = $pod.".html";
+ if ($count) { # give us a html and rcs header
+ open(HTML,">$html") || die "can't create $html: $ERRNO";
+ print HTML '<!-- $Id$ -->',"\n",'<HTML><HEAD>',"\n";
+ print HTML "<CENTER>" unless $NO_NS;
+ print HTML "<TITLE>$pod</TITLE>\n</HEAD>\n<BODY>";
+ print HTML "</CENTER>" unless $NO_NS;
+ }
+ for ($i = 0; $i <= $#all; $i++) { # decide what to do with each chunk
+ $all[$i] =~ /^(\w+)\s*(.*)\n?([^\0]*)$/ ;
+ ($cmd, $title, $rest) = ($1,$2,$3);
+ if ($cmd eq "item") {
+ if ($count ) { # producing html
+ do_list("over",$all[$i],\$in_list,\$depth) unless $depth;
+ do_item($title,$rest,$in_list);
+ }
+ else {
+ # scan item
+ scan_thing("item",$title,$pod);
+ }
+ }
+ elsif ($cmd =~ /^head([12])/) {
+ $num = $1;
+ if ($count) { # producing html
+ do_hdr($num,$title,$rest,$depth);
+ }
+ else {
+ # header scan
+ scan_thing($cmd,$title,$pod); # skip head1
+ }
+ }
+ elsif ($cmd =~ /^over/) {
+ $count and $depth and do_list("over",$all[$i+1],\$in_list,\$depth);
+ }
+ elsif ($cmd =~ /^back/) {
+ if ($count) { # producing html
+ ($depth) or next; # just skip it
+ do_list("back",$all[$i+1],\$in_list,\$depth);
+ do_rest("$title.$rest");
+ }
+ }
+ elsif ($cmd =~ /^cut/) {
+ next;
+ }
+ elsif ($cmd =~ /^for/) { # experimental pragma html
+ if ($count) { # producing html
+ if ($title =~ s/^html//) {
+ $in_html =1;
+ do_rest("$title.$rest");
+ }
+ }
+ }
+ elsif ($cmd =~ /^begin/) { # experimental pragma html
+ if ($count) { # producing html
+ if ($title =~ s/^html//) {
+ print HTML $title,"\n",$rest;
+ }
+ elsif ($title =~ /^end/) {
+ next;
+ }
+ }
+ }
+ elsif ($Debug{"misc"}) {
+ warn("unrecognized header: $cmd");
+ }
+ }
+ # close open lists without '=back' stmts
+ if ($count) { # producing html
+ while ($depth) {
+ do_list("back",$all[$i+1],\$in_list,\$depth);
+ }
+ print HTML "\n</BODY>\n</HTML>\n";
+ }
+ }
+}
+
+sub do_list{ # setup a list type, depending on some grok logic
+ my($which,$next_one,$list_type,$depth) = @_;
+ my($key);
+ if ($which eq "over") {
+ unless ($next_one =~ /^item\s+(.*)/) {
+ warn "Bad list, $1\n" if $Debug{"misc"};
+ }
+ $key = $1;
+
+ if ($key =~ /^1\.?/) {
+ $$list_type = "OL";
+ } elsif ($key =~ /\*\s*$/) {
+ $$list_type = "UL";
+ } elsif ($key =~ /\*?\s*\w/) {
+ $$list_type = "DL";
+ } else {
+ warn "unknown list type for item $key" if $Debug{"misc"};
+ }
+
+ print HTML qq{\n};
+ print HTML $$list_type eq 'DL' ? qq{<DL COMPACT>} : qq{<$$list_type>};
+ $$depth++;
+ }
+ elsif ($which eq "back") {
+ print HTML qq{\n</$$list_type>\n};
+ $$depth--;
+ }
+}
+
+sub do_hdr{ # headers
+ my($num,$title,$rest,$depth) = @_;
+ print HTML qq{<p><hr>\n} if $num == 1;
+ process_thing(\$title,"NAME");
+ print HTML qq{\n<H$num> };
+ print HTML $title;
+ print HTML qq{</H$num>\n};
+ do_rest($rest);
+}
+
+sub do_item{ # list items
+ my($title,$rest,$list_type) = @_;
+ my $bullet_only = $title eq '*' and $list_type eq 'UL';
+ process_thing(\$title,"NAME");
+ if ($list_type eq "DL") {
+ print HTML qq{\n<DT><STRONG>\n};
+ print HTML $title;
+ print HTML qq{\n</STRONG>\n};
+ print HTML qq{<DD>\n};
+ }
+ else {
+ print HTML qq{\n<LI>};
+ unless ($bullet_only or $list_type eq "OL") {
+ print HTML $title,"\n";
+ }
+ }
+ do_rest($rest);
+}
+
+sub do_rest{ # the rest of the chunk handled here
+ my($rest) = @_;
+ my(@lines,$p,$q,$line,,@paras,$inpre);
+ @paras = split(/\n\n\n*/,$rest);
+ for ($p = 0; $p <= $#paras; $p++) {
+ $paras[$p] =~ s/^\n//mg;
+ @lines = split(/\n/,$paras[$p]);
+ if ($in_html) { # handle =for html paragraphs
+ print HTML $paras[0];
+ $in_html = 0;
+ next;
+ }
+ elsif ($lines[0] =~ /^\s+\w*\t.*/) { # listing or unordered list
+ print HTML qq{<UL>};
+ foreach $line (@lines) {
+ ($line =~ /^\s+(\w*)\t(.*)/) && (($key,$rem) = ($1,$2));
+ print HTML defined($Podnames{$key})
+ ? "<LI>$type$key.html\">$key<\/A>\t$rem</LI>\n"
+ : "<LI>$line</LI>\n";
+ }
+ print HTML qq{</UL>\n};
+ }
+ elsif ($lines[0] =~ /^\s/) { # preformatted code
+ if ($paras[$p] =~/>>|<</) {
+ print HTML qq{\n<PRE>\n};
+ $inpre=1;
+ }
+ else { # Still cant beat XMP. Yes, I know
+ print HTML qq{\n<XMP>\n}; # it's been obsoleted... suggestions?
+ $inpre = 0;
+ }
+ while (defined($paras[$p])) {
+ @lines = split(/\n/,$paras[$p]);
+ foreach $q (@lines) { # mind your p's and q's here :-)
+ if ($paras[$p] =~ />>|<</) {
+ if ($inpre) {
+ process_thing(\$q,"HTML");
+ }
+ else {
+ print HTML qq{\n</XMP>\n};
+ print HTML qq{<PRE>\n};
+ $inpre=1;
+ process_thing(\$q,"HTML");
+ }
+ }
+ 1 while $q =~ s/\t+/' 'x (length($&) * 8 - length($`) % 8)/e;
+ print HTML $q,"\n";
+ }
+ last if $paras[$p+1] !~ /^\s/;
+ $p++;
+ }
+ print HTML ($inpre==1) ? (qq{\n</PRE>\n}) : (qq{\n</XMP>\n});
+ }
+ else { # other text
+ @lines = split(/\n/,$paras[$p]);
+ foreach $line (@lines) {
+ process_thing(\$line,"HTML");
+ print HTML qq{$line\n};
+ }
+ }
+ print HTML qq{<p>};
+ }
+}
+
+sub process_thing{ # process a chunk, order important
+ my($thing,$htype) = @_;
+ pre_escapes($thing);
+ find_refs($thing,$htype);
+ post_escapes($thing);
+}
+
+sub scan_thing{ # scan a chunk for later references
+ my($cmd,$title,$pod) = @_;
+ $_ = $title;
+ s/\n$//;
+ s/E<(.*?)>/&$1;/g;
+ # remove any formatting information for the headers
+ s/[SFCBI]<(.*?)>/$1/g;
+ # the "don't format me" thing
+ s/Z<>//g;
+ if ($cmd eq "item") {
+ /^\*/ and return; # skip bullets
+ /^\d+\./ and return; # skip numbers
+ s/(-[a-z]).*/$1/i;
+ trim($_);
+ return if defined $A->{$pod}->{"Items"}->{$_};
+ $A->{$pod}->{"Items"}->{$_} = gensym($pod, $_);
+ $A->{$pod}->{"Items"}->{(split(' ',$_))[0]}=$A->{$pod}->{"Items"}->{$_};
+ Debug("items", "item $_");
+ if (!/^-\w$/ && /([%\$\@\w]+)/ && $1 ne $_
+ && !defined($A->{$pod}->{"Items"}->{$_}) && ($_ ne $1))
+ {
+ $A->{$pod}->{"Items"}->{$1} = $A->{$pod}->{"Items"}->{$_};
+ Debug("items", "item $1 REF TO $_");
+ }
+ if ( m{^(tr|y|s|m|q[qwx])/.*[^/]} ) {
+ my $pf = $1 . '//';
+ $pf .= "/" if $1 eq "tr" || $1 eq "y" || $1 eq "s";
+ if ($pf ne $_) {
+ $A->{$pod}->{"Items"}->{$pf} = $A->{$pod}->{"Items"}->{$_};
+ Debug("items", "item $pf REF TO $_");
+ }
+ }
+ }
+ elsif ($cmd =~ /^head[12]/) {
+ return if defined($A->{$pod}->{"Headers"}->{$_});
+ $A->{$pod}->{"Headers"}->{$_} = gensym($pod, $_);
+ Debug("headers", "header $_");
+ }
+ else {
+ warn "unrecognized header: $cmd" if $Debug;
+ }
+}
+
+
+sub picrefs {
+ my($char, $bigkey, $lilkey,$htype) = @_;
+ my($key,$ref,$podname);
+ for $podname ($pod,@inclusions) {
+ for $ref ( "Items", "Headers" ) {
+ if (defined $A->{$podname}->{$ref}->{$bigkey}) {
+ $value = $A->{$podname}->{$ref}->{$key = $bigkey};
+ Debug("subs", "bigkey is $bigkey, value is $value\n");
+ }
+ elsif (defined $A->{$podname}->{$ref}->{$lilkey}) {
+ $value = $A->{$podname}->{$ref}->{$key = $lilkey};
+ return "" if $lilkey eq '';
+ Debug("subs", "lilkey is $lilkey, value is $value\n");
+ }
+ }
+ if (length($key)) {
+ ($pod2,$num) = split(/_/,$value,2);
+ if ($htype eq "NAME") {
+ return "\n<A NAME=\"".$value."\">\n$bigkey</A>\n"
+ }
+ else {
+ return "\n$type$pod2.html\#".$value."\">$bigkey<\/A>\n";
+ }
+ }
+ }
+ if ($char =~ /[IF]/) {
+ return "<EM>$bigkey</EM>";
+ } elsif ($char =~ /C/) {
+ return "<CODE>$bigkey</CODE>";
+ } else {
+ return "<STRONG>$bigkey</STRONG>";
+ }
+}
+
+sub find_refs {
+ my($thing,$htype) = @_;
+ my($orig) = $$thing;
+ # LREF: a manpage(3f) we don't know about
+ for ($$thing) {
+ #s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))>:the I<$1>$2 manpage:g;
+ s@(\S+?://\S*[^.,;!?\s])@noremap(qq{<A HREF="$1">$1</A>})@ge;
+ s,([a-z0-9_.-]+\@([a-z0-9_-]+\.)+([a-z0-9_-]+)),noremap(qq{<A HREF="MAILTO:$1">$1</A>}),gie;
+ s/L<([^>]*)>/lrefs($1,$htype)/ge;
+ s/([CIBF])<(\W*?(-?\w*).*?)>/picrefs($1, $2, $3, $htype)/ge;
+ s/(S)<([^\/]\W*?(-?\w*).*?)>/picrefs($1, $2, $3, $htype)/ge;
+ s/((\w+)\(\))/picrefs("I", $1, $2,$htype)/ge;
+ s/([\$\@%](?!&[gl]t)([\w:]+|\W\b))/varrefs($1,$htype)/ge;
+ }
+ if ($$thing eq $orig && $htype eq "NAME") {
+ $$thing = picrefs("I", $$thing, "", $htype);
+ }
+
+}
+
+sub lrefs {
+ my($page, $item) = split(m#/#, $_[0], 2);
+ my($htype) = $_[1];
+ my($podname);
+ my($section) = $page =~ /\((.*)\)/;
+ my $selfref;
+ if ($page =~ /^[A-Z]/ && $item) {
+ $selfref++;
+ $item = "$page/$item";
+ $page = $pod;
+ } elsif (!$item && $page =~ /[^a-z\-]/ && $page !~ /^\$.$/) {
+ $selfref++;
+ $item = $page;
+ $page = $pod;
+ }
+ $item =~ s/\(\)$//;
+ if (!$item) {
+ if (!defined $section && defined $Podnames{$page}) {
+ return "\n$type$page.html\">\nthe <EM>$page</EM> manpage<\/A>\n";
+ } else {
+ (warn "Bizarre entry $page/$item") if $Debug;
+ return "the <EM>$_[0]</EM> manpage\n";
+ }
+ }
+
+ if ($item =~ s/"(.*)"/$1/ || ($item =~ /[^\w\/\-]/ && $item !~ /^\$.$/)) {
+ $text = "<EM>$item</EM>";
+ $ref = "Headers";
+ } else {
+ $text = "<EM>$item</EM>";
+ $ref = "Items";
+ }
+ for $podname ($pod, @inclusions) {
+ undef $value;
+ if ($ref eq "Items") {
+ if (defined($value = $A->{$podname}->{$ref}->{$item})) {
+ ($pod2,$num) = split(/_/,$value,2);
+ return (($pod eq $pod2) && ($htype eq "NAME"))
+ ? "\n<A NAME=\"".$value."\">\n$text</A>\n"
+ : "\n$type$pod2.html\#".$value."\">$text<\/A>\n";
+ }
+ }
+ elsif ($ref eq "Headers") {
+ if (defined($value = $A->{$podname}->{$ref}->{$item})) {
+ ($pod2,$num) = split(/_/,$value,2);
+ return (($pod eq $pod2) && ($htype eq "NAME"))
+ ? "\n<A NAME=\"".$value."\">\n$text</A>\n"
+ : "\n$type$pod2.html\#".$value."\">$text<\/A>\n";
+ }
+ }
+ }
+ warn "No $ref reference for $item (@_)" if $Debug;
+ return $text;
+}
+
+sub varrefs {
+ my ($var,$htype) = @_;
+ for $podname ($pod,@inclusions) {
+ if ($value = $A->{$podname}->{"Items"}->{$var}) {
+ ($pod2,$num) = split(/_/,$value,2);
+ Debug("vars", "way cool -- var ref on $var");
+ return (($pod eq $pod2) && ($htype eq "NAME")) # INHERIT $_, $pod
+ ? "\n<A NAME=\"".$value."\">\n$var</A>\n"
+ : "\n$type$pod2.html\#".$value."\">$var<\/A>\n";
+ }
+ }
+ Debug( "vars", "bummer, $var not a var");
+ return "<STRONG>$var</STRONG>";
+}
+
+sub gensym {
+ my ($podname, $key) = @_;
+ $key =~ s/\s.*//;
+ ($key = lc($key)) =~ tr/a-z/_/cs;
+ my $name = "${podname}_${key}_0";
+ $name =~ s/__/_/g;
+ while ($sawsym{$name}++) {
+ $name =~ s/_?(\d+)$/'_' . ($1 + 1)/e;
+ }
+ return $name;
+}
+
+sub pre_escapes { # twiddle these, and stay up late :-)
+ my($thing) = @_;
+ for ($$thing) {
+ s/"(.*?)"/``$1''/gs;
+ s/&/noremap("&amp;")/ge;
+ s/<</noremap("&lt;&lt;")/eg;
+ s/([^ESIBLCF])</$1\&lt\;/g;
+ s/E<([^\/][^<>]*)>/\&$1\;/g; # embedded special
+ }
+}
+sub noremap { # adding translator for hibit chars soon
+ my $hide = $_[0];
+ $hide =~ tr/\000-\177/\200-\377/;
+ $hide;
+}
+
+
+sub post_escapes {
+ my($thing) = @_;
+ for ($$thing) {
+ s/([^GM])>>/$1\&gt\;\&gt\;/g;
+ s/([^D][^"MGA])>/$1\&gt\;/g;
+ tr/\200-\377/\000-\177/;
+ }
+}
+
+sub Debug {
+ my $level = shift;
+ print STDERR @_,"\n" if $Debug{$level};
+}
+
+sub dumptable {
+ my $t = shift;
+ print STDERR "TABLE DUMP $t\n";
+ foreach $k (sort keys %$t) {
+ printf STDERR "%-20s <%s>\n", $t->{$k}, $k;
+ }
+}
+sub trim {
+ for (@_) {
+ s/^\s+//;
+ s/\s\n?$//;
+ }
+}
+!NO!SUBS!
+
+close OUT or die "Can't close $file: $!";
+chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
+exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
diff --git a/pod/pod2html.SH b/pod/pod2html.SH
deleted file mode 100755
index af5161377d..0000000000
--- a/pod/pod2html.SH
+++ /dev/null
@@ -1,490 +0,0 @@
-case $CONFIG in
-'')
- if test -f config.sh; then TOP=.;
- elif test -f ../config.sh; then TOP=..;
- elif test -f ../../config.sh; then TOP=../..;
- elif test -f ../../../config.sh; then TOP=../../..;
- elif test -f ../../../../config.sh; then TOP=../../../..;
- else
- echo "Can't find config.sh."; exit 1
- fi
- . $TOP/config.sh
- ;;
-esac
-case "$0" in
-*/*) cd `expr X$0 : 'X\(.*\)/'` ;;
-esac
-echo "Extracting pod/pod2html (with variable substitutions)"
-rm -f pod2html
-$spitshell >pod2html <<!GROK!THIS!
-#!$binexp/perl
-eval 'exec perl -S \$0 \${1+"\$@"}'
- if \$running_under_some_shell;
-!GROK!THIS!
-
-$spitshell >>pod2html <<'!NO!SUBS!'
-#
-# pod2html - convert pod format to html
-#
-# usage: pod2html [podfiles]
-# will read the cwd and parse all files with .pod extension
-# if no arguments are given on the command line.
-#
-*RS = */;
-*ERRNO = *!;
-
-use Carp;
-
-$gensym = 0;
-
-while ($ARGV[0] =~ /^-d(.*)/) {
- shift;
- $Debug{ lc($1 || shift) }++;
-}
-
-# look in these pods for things not found within the current pod
-@inclusions = qw[
- perlfunc perlvar perlrun perlop
-];
-
-# ck for podnames on command line
-while ($ARGV[0]) {
- push(@Pods,shift);
-}
-$A={};
-
-# location of pods
-$dir=".";
-
-# The beginning of the url for the anchors to the other sections.
-# Edit $type to suit. It's configured for relative url's now.
-$type='<A HREF="';
-$debug = 0;
-
-unless(@Pods){
- opendir(DIR,$dir) or die "Can't opendir $dir: $ERRNO";
- @Pods = grep(/\.pod$/,readdir(DIR));
- closedir(DIR) or die "Can't closedir $dir: $ERRNO";
-}
-@Pods or die "expected pods";
-
-# loop twice through the pods, first to learn the links, then to produce html
-for $count (0,1){
- (print "Scanning pods...\n") unless $count;
- foreach $podfh ( @Pods ) {
- ($pod = $podfh) =~ s/\.pod$//;
- Debug("files", "opening 2 $podfh" );
- (print "Creating $pod.html from $podfh\n") if $count;
- $RS = "\n=";
- open($podfh,"<".$podfh) || die "can't open $podfh: $ERRNO";
- @all=<$podfh>;
- close($podfh);
- $RS = "\n";
- $all[0]=~s/^=//;
- for(@all){s/=$//;}
- $Podnames{$pod} = 1;
- $in_list=0;
- $html=$pod.".html";
- if($count){
- open(HTML,">$html") || die "can't create $html: $ERRNO";
- print HTML <<'HTML__EOQ', <<"HTML__EOQQ";
- <!-- \$RCSfile\$\$Revision\$\$Date\$ -->
- <!-- \$Log\$ -->
- <HTML>
-HTML__EOQ
- <TITLE>\U$pod\E</TITLE>
-HTML__EOQQ
- }
-
- for($i=0;$i<=$#all;$i++){
-
- $all[$i] =~ /^(\w+)\s*(.*)\n?([^\0]*)$/ ;
- ($cmd, $title, $rest) = ($1,$2,$3);
- if ($cmd eq "item") {
- if($count ){
- ($depth) or do_list("over",$all[$i],\$in_list,\$depth);
- do_item($title,$rest,$in_list);
- }
- else{
- # scan item
- scan_thing("item",$title,$pod);
- }
- }
- elsif ($cmd =~ /^head([12])/){
- $num=$1;
- if($count){
- do_hdr($num,$title,$rest,$depth);
- }
- else{
- # header scan
- scan_thing($cmd,$title,$pod); # skip head1
- }
- }
- elsif ($cmd =~ /^over/) {
- $count and $depth and do_list("over",$all[$i+1],\$in_list,\$depth);
- }
- elsif ($cmd =~ /^back/) {
- if($count){
- ($depth) or next; # just skip it
- do_list("back",$all[$i+1],\$in_list,\$depth);
- do_rest("$title.$rest");
- }
- }
- elsif ($cmd =~ /^cut/) {
- next;
- }
- elsif($Debug){
- (warn "unrecognized header: $cmd") if $Debug;
- }
- }
- # close open lists without '=back' stmts
- if($count){
- while($depth){
- do_list("back",$all[$i+1],\$in_list,\$depth);
- }
- print HTML "\n</HTML>\n";
- }
- }
-}
-
-sub do_list{
- my($which,$next_one,$list_type,$depth)=@_;
- my($key);
- if($which eq "over"){
- ($next_one =~ /^item\s+(.*)/ ) or (warn "Bad list, $1\n") if $Debug;
- $key=$1;
- if($key =~ /^1\.?/){
- $$list_type = "OL";
- }
- elsif($key =~ /\*\s*$/){
- $$list_type="UL";
- }
- elsif($key =~ /\*?\s*\w/){
- $$list_type="DL";
- }
- else{
- (warn "unknown list type for item $key") if $Debug;
- }
- print HTML qq{\n};
- print HTML qq{<$$list_type>};
- $$depth++;
- }
- elsif($which eq "back"){
- print HTML qq{\n</$$list_type>\n};
- $$depth--;
- }
-}
-
-sub do_hdr{
- my($num,$title,$rest,$depth)=@_;
- ($num == 1) and print HTML qq{<p><hr>\n};
- process_thing(\$title,"NAME");
- print HTML qq{\n<H$num> };
- print HTML $title;
- print HTML qq{</H$num>\n};
- do_rest($rest);
-}
-
-sub do_item{
- my($title,$rest,$list_type)=@_;
- process_thing(\$title,"NAME");
- if($list_type eq "DL"){
- print HTML qq{\n<DT><STRONG>\n};
- print HTML $title;
- print HTML qq{\n</STRONG></DT>\n};
- print HTML qq{<DD>\n};
- }
- else{
- print HTML qq{\n<LI>};
- ($list_type ne "OL") && (print HTML $title,"\n");
- }
- do_rest($rest);
- print HTML ($list_type eq "DL" )? qq{</DD>} : qq{</LI>};
-}
-
-sub do_rest{
- my($rest)=@_;
- my(@lines,$p,$q,$line,,@paras,$inpre);
- @paras=split(/\n\n+/,$rest);
- for($p=0;$p<=$#paras;$p++){
- @lines=split(/\n/,$paras[$p]);
- if($lines[0] =~ /^\s+\w*\t.*/){ # listing or unordered list
- print HTML qq{<UL>};
- foreach $line (@lines){
- ($line =~ /^\s+(\w*)\t(.*)/) && (($key,$rem) = ($1,$2));
- print HTML defined($Podnames{$key}) ?
- "<LI>$type$key.html\">$key<\/A>\t$rem</LI>\n" :
- "<LI>$line</LI>\n";
- }
- print HTML qq{</UL>\n};
- }
- elsif($lines[0] =~ /^\s/){ # preformatted code
- if($paras[$p] =~/>>|<</){
- print HTML qq{\n<PRE>\n};
- $inpre=1;
- }
- else{
- print HTML qq{\n<XMP>\n};
- $inpre=0;
- }
-inner:
- while(defined($paras[$p])){
- @lines=split(/\n/,$paras[$p]);
- foreach $q (@lines){
- if($paras[$p]=~/>>|<</){
- if($inpre){
- process_thing(\$q,"HTML");
- }
- else {
- print HTML qq{\n</XMP>\n};
- print HTML qq{<PRE>\n};
- $inpre=1;
- process_thing(\$q,"HTML");
- }
- }
- while($q =~ s/\t+/' 'x (length($&) * 8 - length($`) % 8)/e){
- 1;
- }
- print HTML $q,"\n";
- }
- last if $paras[$p+1] !~ /^\s/;
- $p++;
- }
- print HTML ($inpre==1) ? (qq{\n</PRE>\n}) : (qq{\n</XMP>\n});
- }
- else{ # other text
- @lines=split(/\n/,$paras[$p]);
- foreach $line (@lines){
- process_thing(\$line,"HTML");
- print HTML qq{$line\n};
- }
- }
- print HTML qq{<p>};
- }
-}
-
-sub process_thing{
- my($thing,$htype)=@_;
- pre_escapes($thing);
- find_refs($thing,$htype);
- post_escapes($thing);
-}
-
-sub scan_thing{
- my($cmd,$title,$pod)=@_;
- $_=$title;
- s/\n$//;
- s/E<(.*?)>/&$1;/g;
- # remove any formatting information for the headers
- s/[SFCBI]<(.*?)>/$1/g;
- # the "don't format me" thing
- s/Z<>//g;
- if ($cmd eq "item") {
-
- if (/^\*/) { return } # skip bullets
- if (/^\d+\./) { return } # skip numbers
- s/(-[a-z]).*/$1/i;
- trim($_);
- return if defined $A->{$pod}->{"Items"}->{$_};
- $A->{$pod}->{"Items"}->{$_} = gensym($pod, $_);
- $A->{$pod}->{"Items"}->{(split(' ',$_))[0]}=$A->{$pod}->{"Items"}->{$_};
- Debug("items", "item $_");
- if (!/^-\w$/ && /([%\$\@\w]+)/ && $1 ne $_
- && !defined($A->{$pod}->{"Items"}->{$_}) && ($_ ne $1))
- {
- $A->{$pod}->{"Items"}->{$1} = $A->{$pod}->{"Items"}->{$_};
- Debug("items", "item $1 REF TO $_");
- }
- if ( m{^(tr|y|s|m|q[qwx])/.*[^/]} ) {
- my $pf = $1 . '//';
- $pf .= "/" if $1 eq "tr" || $1 eq "y" || $1 eq "s";
- if ($pf ne $_) {
- $A->{$pod}->{"Items"}->{$pf} = $A->{$pod}->{"Items"}->{$_};
- Debug("items", "item $pf REF TO $_");
- }
- }
- }
- elsif ($cmd =~ /^head[12]/){
- return if defined($Headers{$_});
- $A->{$pod}->{"Headers"}->{$_} = gensym($pod, $_);
- Debug("headers", "header $_");
- }
- else {
- (warn "unrecognized header: $cmd") if $Debug;
- }
-}
-
-
-sub picrefs {
- my($char, $bigkey, $lilkey,$htype) = @_;
- my($key,$ref,$podname);
- for $podname ($pod,@inclusions){
- for $ref ( "Items", "Headers" ) {
- if (defined $A->{$podname}->{$ref}->{$bigkey}) {
- $value = $A->{$podname}->{$ref}->{$key=$bigkey};
- Debug("subs", "bigkey is $bigkey, value is $value\n");
- }
- elsif (defined $A->{$podname}->{$ref}->{$lilkey}) {
- $value = $A->{$podname}->{$ref}->{$key=$lilkey};
- return "" if $lilkey eq '';
- Debug("subs", "lilkey is $lilkey, value is $value\n");
- }
- }
- if (length($key)) {
- ($pod2,$num) = split(/_/,$value,2);
- if($htype eq "NAME"){
- return "\n<A NAME=\"".$value."\">\n$bigkey</A>\n"
- }
- else{
- return "\n$type$pod2.html\#".$value."\">$bigkey<\/A>\n";
- }
- }
- }
- if ($char =~ /[IF]/) {
- return "<EM>$bigkey</EM>";
- } elsif($char =~ /C/) {
- return "<CODE>$bigkey</CODE>";
- } else {
- return "<STRONG>$bigkey</STRONG>";
- }
-}
-
-sub find_refs {
- my($thing,$htype)=@_;
- my($orig) = $$thing;
- # LREF: a manpage(3f) we don't know about
- $$thing=~s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))>:the I<$1>$2 manpage:g;
- $$thing=~s/L<([^>]*)>/lrefs($1,$htype)/ge;
- $$thing=~s/([CIBF])<(\W*?(-?\w*).*?)>/picrefs($1, $2, $3, $htype)/ge;
- $$thing=~s/((\w+)\(\))/picrefs("I", $1, $2,$htype)/ge;
- $$thing=~s/([\$\@%](?!&[gl]t)([\w:]+|\W\b))/varrefs($1,$htype)/ge;
- (($$thing eq $orig) && ($htype eq "NAME")) &&
- ($$thing=picrefs("I", $$thing, "", $htype));
-}
-
-sub lrefs {
- my($page, $item) = split(m#/#, $_[0], 2);
- my($htype)=$_[1];
- my($podname);
- my($section) = $page =~ /\((.*)\)/;
- my $selfref;
- if ($page =~ /^[A-Z]/ && $item) {
- $selfref++;
- $item = "$page/$item";
- $page = $pod;
- } elsif (!$item && $page =~ /[^a-z\-]/ && $page !~ /^\$.$/) {
- $selfref++;
- $item = $page;
- $page = $pod;
- }
- $item =~ s/\(\)$//;
- if (!$item) {
- if (!defined $section && defined $Podnames{$page}) {
- return "\n$type$page.html\">\nthe <EM>$page</EM> manpage<\/A>\n";
- } else {
- (warn "Bizarre entry $page/$item") if $Debug;
- return "the <EM>$_[0]</EM> manpage\n";
- }
- }
-
- if ($item =~ s/"(.*)"/$1/ || ($item =~ /[^\w\/\-]/ && $item !~ /^\$.$/)) {
- $text = "<EM>$item</EM>";
- $ref = "Headers";
- } else {
- $text = "<EM>$item</EM>";
- $ref = "Items";
- }
- for $podname ($pod, @inclusions){
- undef $value;
- if ($ref eq "Items") {
- if (defined($value = $A->{$podname}->{$ref}->{$item})) {
- ($pod2,$num) = split(/_/,$value,2);
- return (($pod eq $pod2) && ($htype eq "NAME"))
- ? "\n<A NAME=\"".$value."\">\n$text</A>\n"
- : "\n$type$pod2.html\#".$value."\">$text<\/A>\n";
- }
- }
- elsif($ref eq "Headers") {
- if (defined($value = $A->{$podname}->{$ref}->{$item})) {
- ($pod2,$num) = split(/_/,$value,2);
- return (($pod eq $pod2) && ($htype eq "NAME"))
- ? "\n<A NAME=\"".$value."\">\n$text</A>\n"
- : "\n$type$pod2.html\#".$value."\">$text<\/A>\n";
- }
- }
- }
- (warn "No $ref reference for $item (@_)") if $Debug;
- return $text;
-}
-
-sub varrefs {
- my ($var,$htype) = @_;
- for $podname ($pod,@inclusions){
- if ($value = $A->{$podname}->{"Items"}->{$var}) {
- ($pod2,$num) = split(/_/,$value,2);
- Debug("vars", "way cool -- var ref on $var");
- return (($pod eq $pod2) && ($htype eq "NAME")) # INHERIT $_, $pod
- ? "\n<A NAME=\"".$value."\">\n$var</A>\n"
- : "\n$type$pod2.html\#".$value."\">$var<\/A>\n";
- }
- }
- Debug( "vars", "bummer, $var not a var");
- return "<STRONG>$var</STRONG>";
-}
-
-sub gensym {
- my ($podname, $key) = @_;
- $key =~ s/\s.*//;
- ($key = lc($key)) =~ tr/a-z/_/cs;
- my $name = "${podname}_${key}_0";
- $name =~ s/__/_/g;
- while ($sawsym{$name}++) {
- $name =~ s/_?(\d+)$/'_' . ($1 + 1)/e;
- }
- return $name;
-}
-
-sub pre_escapes {
- my($thing)=@_;
- $$thing=~s/&/noremap("&amp;")/ge;
- $$thing=~s/<</noremap("&lt;&lt;")/eg;
- $$thing=~s/(?:[^ESIBLCF])</noremap("&lt;")/eg;
- $$thing=~s/E<([^\/][^<>]*)>/\&$1\;/g; # embedded special
-}
-
-sub noremap {
- my $hide = $_[0];
- $hide =~ tr/\000-\177/\200-\377/;
- $hide;
-}
-
-sub post_escapes {
- my($thing)=@_;
- $$thing=~s/[^GM]>>/\&gt\;\&gt\;/g;
- $$thing=~s/([^"MGAE])>/$1\&gt\;/g;
- $$thing=~tr/\200-\377/\000-\177/;
-}
-
-sub Debug {
- my $level = shift;
- print STDERR @_,"\n" if $Debug{$level};
-}
-
-sub dumptable {
- my $t = shift;
- print STDERR "TABLE DUMP $t\n";
- foreach $k (sort keys %$t) {
- printf STDERR "%-20s <%s>\n", $t->{$k}, $k;
- }
-}
-sub trim {
- for (@_) {
- s/^\s+//;
- s/\s\n?$//;
- }
-}
-
-
-!NO!SUBS!
-chmod 755 pod2html
-$eunicefix pod2html
diff --git a/pod/pod2latex.SH b/pod/pod2latex.PL
index 45f64232be..bd6df71c74 100755..100644
--- a/pod/pod2latex.SH
+++ b/pod/pod2latex.PL
@@ -1,28 +1,39 @@
-case $CONFIG in
-'')
- if test -f config.sh; then TOP=.;
- elif test -f ../config.sh; then TOP=..;
- elif test -f ../../config.sh; then TOP=../..;
- elif test -f ../../../config.sh; then TOP=../../..;
- elif test -f ../../../../config.sh; then TOP=../../../..;
- else
- echo "Can't find config.sh."; exit 1
- fi
- . $TOP/config.sh
- ;;
-esac
-case "$0" in
-*/*) cd `expr X$0 : 'X\(.*\)/'` ;;
-esac
-echo "Extracting pod/pod2latex (with variable substitutions)"
-rm -f pod2latex
-$spitshell >pod2latex <<!GROK!THIS!
-#!$binexp/perl
-eval 'exec perl -S \$0 \${1+"\$@"}'
- if \$running_under_some_shell;
+#!/usr/local/bin/perl
+
+use Config;
+use File::Basename qw(&basename &dirname);
+
+# List explicitly here the variables you want Configure to
+# generate. Metaconfig only looks for shell variables, so you
+# have to mention them as if they were shell variables, not
+# %Config entries. Thus you write
+# $startperl
+# to ensure Configure will look for $Config{startperl}.
+
+# This forces PL files to create target in same directory as PL file.
+# This is so that make depend always knows where to find PL derivatives.
+chdir(dirname($0));
+($file = basename($0)) =~ s/\.PL$//;
+$file =~ s/\.pl$//
+ if ($Config{'osname'} eq 'VMS' or
+ $Config{'osname'} eq 'OS2'); # "case-forgiving"
+
+open OUT,">$file" or die "Can't create $file: $!";
+
+print "Extracting $file (with variable substitutions)\n";
+
+# In this section, perl variables will be expanded during extraction.
+# You can use $Config{...} to use Configure variables.
+
+print OUT <<"!GROK!THIS!";
+$Config{'startperl'}
+ eval 'exec perl -S \$0 "\$@"'
+ if 0;
!GROK!THIS!
-$spitshell >>pod2latex <<'!NO!SUBS!'
+# In the following, perl variables are not expanded during extraction.
+
+print OUT <<'!NO!SUBS!';
#
# pod2latex, version 1.1
# by Taro Kawagish (kawagish@imslab.co.jp), Jan 11, 1995.
@@ -656,5 +667,7 @@ BEGIN {
);
}
!NO!SUBS!
-chmod 755 pod2latex
-$eunicefix pod2latex
+
+close OUT or die "Can't close $file: $!";
+chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
+exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
diff --git a/pod/pod2man.SH b/pod/pod2man.PL
index a1be14d4e5..3a8c5db2a8 100755..100644
--- a/pod/pod2man.SH
+++ b/pod/pod2man.PL
@@ -1,28 +1,39 @@
-case $CONFIG in
-'')
- if test -f config.sh; then TOP=.;
- elif test -f ../config.sh; then TOP=..;
- elif test -f ../../config.sh; then TOP=../..;
- elif test -f ../../../config.sh; then TOP=../../..;
- elif test -f ../../../../config.sh; then TOP=../../../..;
- else
- echo "Can't find config.sh."; exit 1
- fi
- . $TOP/config.sh
- ;;
-esac
-case "$0" in
-*/*) cd `expr X$0 : 'X\(.*\)/'` ;;
-esac
-echo "Extracting pod/pod2man (with variable substitutions)"
-rm -f pod2man
-$spitshell >pod2man <<!GROK!THIS!
-#!$binexp/perl
-eval 'exec perl -S \$0 \${1+"\$@"}'
- if \$running_under_some_shell;
+#!/usr/local/bin/perl
+
+use Config;
+use File::Basename qw(&basename &dirname);
+
+# List explicitly here the variables you want Configure to
+# generate. Metaconfig only looks for shell variables, so you
+# have to mention them as if they were shell variables, not
+# %Config entries. Thus you write
+# $startperl
+# to ensure Configure will look for $Config{startperl}.
+
+# This forces PL files to create target in same directory as PL file.
+# This is so that make depend always knows where to find PL derivatives.
+chdir(dirname($0));
+($file = basename($0)) =~ s/\.PL$//;
+$file =~ s/\.pl$//
+ if ($Config{'osname'} eq 'VMS' or
+ $Config{'osname'} eq 'OS2'); # "case-forgiving"
+
+open OUT,">$file" or die "Can't create $file: $!";
+
+print "Extracting $file (with variable substitutions)\n";
+
+# In this section, perl variables will be expanded during extraction.
+# You can use $Config{...} to use Configure variables.
+
+print OUT <<"!GROK!THIS!";
+$Config{'startperl'}
+ eval 'exec perl -S \$0 "\$@"'
+ if 0;
!GROK!THIS!
-$spitshell >>pod2man <<'!NO!SUBS!'
+# In the following, perl variables are not expanded during extraction.
+
+print OUT <<'!NO!SUBS!';
$/ = "";
$cutting = 1;
@@ -648,5 +659,7 @@ BEGIN {
);
}
!NO!SUBS!
-chmod 755 pod2man
-$eunicefix pod2man
+
+close OUT or die "Can't close $file: $!";
+chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
+exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';