diff options
Diffstat (limited to 'pod')
56 files changed, 24465 insertions, 5845 deletions
diff --git a/pod/Makefile b/pod/Makefile index bfe6c8edad..7eeabd943b 100644 --- a/pod/Makefile +++ b/pod/Makefile @@ -1,154 +1,219 @@ -CONVERTERS = pod2html pod2latex pod2man pod2text +CONVERTERS = pod2html pod2latex pod2man pod2text checkpods + +HTMLROOT = / # Change this to fix cross-references in HTML +POD2HTML = pod2html \ + --htmlroot=$(HTMLROOT) \ + --podroot=.. --podpath=pod:lib:ext:vms \ + --libpods=perlfunc:perlguts:perlvar:perlrun:perlop all: $(CONVERTERS) man + PERL = ../miniperl -POD = \ +POD = \ perl.pod \ - perlbook.pod \ - perlbot.pod \ - perlcall.pod \ + perldelta.pod \ perldata.pod \ - perldebug.pod \ - perldiag.pod \ - perldsc.pod \ - perlembed.pod \ - perlform.pod \ - perlfunc.pod \ - perlguts.pod \ - perlipc.pod \ - perllol.pod \ - perlmod.pod \ - perlobj.pod \ + perlsyn.pod \ perlop.pod \ - perlovl.pod \ - perlpod.pod \ perlre.pod \ - perlref.pod \ perlrun.pod \ - perlsec.pod \ - perlstyle.pod \ + perlfunc.pod \ + perlvar.pod \ perlsub.pod \ - perlsyn.pod \ + perlmod.pod \ + perlmodlib.pod \ + perlform.pod \ + perllocale.pod \ + perlref.pod \ + perldsc.pod \ + perllol.pod \ + perltoot.pod \ + perlobj.pod \ perltie.pod \ - perltoc.pod \ + perlbot.pod \ + perlipc.pod \ + perldebug.pod \ + perldiag.pod \ + perlsec.pod \ perltrap.pod \ - perlvar.pod \ + perlstyle.pod \ + perlpod.pod \ + perlbook.pod \ + perlembed.pod \ + perlapio.pod \ perlxs.pod \ - perlxstut.pod + perlxstut.pod \ + perlguts.pod \ + perlcall.pod \ + perlfaq.pod \ + perlfaq1.pod \ + perlfaq2.pod \ + perlfaq3.pod \ + perlfaq4.pod \ + perlfaq5.pod \ + perlfaq6.pod \ + perlfaq7.pod \ + perlfaq8.pod \ + perlfaq9.pod \ + perltoc.pod -MAN = \ +MAN = \ perl.man \ - perlbook.man \ - perlbot.man \ - perlcall.man \ + perldelta.man \ perldata.man \ - perldebug.man \ - perldiag.man \ - perldsc.man \ - perlembed.man \ - perlform.man \ - perlfunc.man \ - perlguts.man \ - perlipc.man \ - perllol.man \ - perlmod.man \ - perlobj.man \ + perlsyn.man \ perlop.man \ - perlovl.man \ - perlpod.man \ perlre.man \ - perlref.man \ perlrun.man \ - perlsec.man \ - perlstyle.man \ + perlfunc.man \ + perlvar.man \ perlsub.man \ - perlsyn.man \ + perlmod.man \ + perlmodlib.man \ + perlform.man \ + perllocale.man \ + perlref.man \ + perldsc.man \ + perllol.man \ + perltoot.man \ + perlobj.man \ perltie.man \ - perltoc.man \ + perlbot.man \ + perlipc.man \ + perldebug.man \ + perldiag.man \ + perlsec.man \ perltrap.man \ - perlvar.man \ + perlstyle.man \ + perlpod.man \ + perlbook.man \ + perlembed.man \ + perlapio.man \ perlxs.man \ - perlxstut.man + perlxstut.man \ + perlguts.man \ + perlcall.man \ + perlfaq.man \ + perlfaq1.man \ + perlfaq2.man \ + perlfaq3.man \ + perlfaq4.man \ + perlfaq5.man \ + perlfaq6.man \ + perlfaq7.man \ + perlfaq8.man \ + perlfaq9.man \ + perltoc.man -HTML = \ +HTML = \ perl.html \ - perlbook.html \ - perlbot.html \ - perlcall.html \ + perldelta.html \ perldata.html \ - perldebug.html \ - perldiag.html \ - perldsc.html \ - perlembed.html \ - perlform.html \ - perlfunc.html \ - perlguts.html \ - perlipc.html \ - perllol.html \ - perlmod.html \ - perlobj.html \ + perlsyn.html \ perlop.html \ - perlovl.html \ - perlpod.html \ perlre.html \ - perlref.html \ perlrun.html \ - perlsec.html \ - perlstyle.html \ + perlfunc.html \ + perlvar.html \ perlsub.html \ - perlsyn.html \ + perlmod.html \ + perlmodlib.html \ + perlform.html \ + perllocale.html \ + perlref.html \ + perldsc.html \ + perllol.html \ + perltoot.html \ + perlobj.html \ perltie.html \ - perltoc.html \ + perlbot.html \ + perlipc.html \ + perldebug.html \ + perldiag.html \ + perlsec.html \ perltrap.html \ - perlvar.html \ + perlstyle.html \ + perlpod.html \ + perlbook.html \ + perlembed.html \ + perlapio.html \ perlxs.html \ - perlxstut.html + perlxstut.html \ + perlguts.html \ + perlcall.html \ + perlfaq.html \ + perlfaq1.html \ + perlfaq2.html \ + perlfaq3.html \ + perlfaq4.html \ + perlfaq5.html \ + perlfaq6.html \ + perlfaq7.html \ + perlfaq8.html \ + perlfaq9.html +# not perltoc.html -TEX = \ +TEX = \ perl.tex \ - perlbook.tex \ - perlbot.tex \ - perlcall.tex \ + perldelta.tex \ perldata.tex \ - perldebug.tex \ - perldiag.tex \ - perldsc.tex \ - perlembed.tex \ - perlform.tex \ - perlfunc.tex \ - perlguts.tex \ - perlipc.tex \ - perllol.tex \ - perlmod.tex \ - perlobj.tex \ + perlsyn.tex \ perlop.tex \ - perlovl.tex \ - perlpod.tex \ perlre.tex \ - perlref.tex \ perlrun.tex \ - perlsec.tex \ - perlstyle.tex \ + perlfunc.tex \ + perlvar.tex \ perlsub.tex \ - perlsyn.tex \ + perlmod.tex \ + perlmodlib.tex \ + perlform.tex \ + perllocale.tex \ + perlref.tex \ + perldsc.tex \ + perllol.tex \ + perltoot.tex \ + perlobj.tex \ perltie.tex \ - perltoc.tex \ + perlbot.tex \ + perlipc.tex \ + perldebug.tex \ + perldiag.tex \ + perlsec.tex \ perltrap.tex \ - perlvar.tex \ + perlstyle.tex \ + perlpod.tex \ + perlbook.tex \ + perlembed.tex \ + perlapio.tex \ perlxs.tex \ - perlxstut.tex + perlxstut.tex \ + perlguts.tex \ + perlcall.tex \ + perlfaq.tex \ + perlfaq1.tex \ + perlfaq2.tex \ + perlfaq3.tex \ + perlfaq4.tex \ + perlfaq5.tex \ + perlfaq6.tex \ + perlfaq7.tex \ + perlfaq8.tex \ + perlfaq9.tex \ + perltoc.tex -man: pod2man $(MAN) +man: pod2man $(MAN) -# pod2html normally runs on all the pods at once in order to build up -# cross-references. -html: pod2html - $(PERL) -I../lib pod2html $(POD) +html: pod2html $(HTML) tex: pod2latex $(TEX) -.SUFFIXES: .pm .pod .man +toc: + $(PERL) -I../lib buildtoc >perltoc.pod + +.SUFFIXES: .pm .pod + +.SUFFIXES: .man .pm.man: pod2man $(PERL) -I../lib pod2man $*.pm >$*.man @@ -156,30 +221,36 @@ tex: pod2latex $(TEX) .pod.man: pod2man $(PERL) -I../lib pod2man $*.pod >$*.man -.SUFFIXES: .mp .pod .html +.SUFFIXES: .html .pm.html: pod2html - $(PERL) -I../lib pod2html $*.pod + $(PERL) -I../lib $(POD2HTML) --infile=$*.pm --outfile=$*.html .pod.html: pod2html - $(PERL) -I../lib pod2html $*.pod + $(PERL) -I../lib $(POD2HTML) --infile=$*.pod --outfile=$*.html -.SUFFIXES: .pm .pod .tex - -.pod.tex: pod2latex - $(PERL) -I../lib pod2latex $*.pod +.SUFFIXES: .tex .pm.tex: pod2latex + $(PERL) -I../lib pod2latex $*.pm + +.pod.tex: pod2latex $(PERL) -I../lib pod2latex $*.pod clean: rm -f $(MAN) $(HTML) $(TEX) + rm -f pod2html-*cache + rm -f *.aux *.log realclean: clean rm -f $(CONVERTERS) distclean: realclean +check: checkpods + @echo "checking..."; \ + $(PERL) -I../lib checkpods $(POD) + # Dependencies. pod2latex: pod2latex.PL ../lib/Config.pm $(PERL) -I../lib pod2latex.PL @@ -192,3 +263,8 @@ pod2man: pod2man.PL ../lib/Config.pm pod2text: pod2text.PL ../lib/Config.pm $(PERL) -I ../lib pod2text.PL + +checkpods: checkpods.PL ../lib/Config.pm + $(PERL) -I ../lib checkpods.PL + + diff --git a/pod/buildtoc b/pod/buildtoc index 9ca5e920fd..d657d68c84 100644 --- a/pod/buildtoc +++ b/pod/buildtoc @@ -1,19 +1,25 @@ use File::Find; use Cwd; +use Text::Wrap; -@pods = qw{ - perl perldata perlsyn perlop perlre perlrun perlfunc perlvar - perlsub perlmod perlref perldsc perllol perlobj perltie - perlbot perldebug perldiag perlform perlipc perlsec perltrap - perlstyle perlxs perlxstut perlguts perlcall perlembed perlpod - perlbook - }; -for (@pods) { s/$/.pod/ } +sub output ($); + +@pods = qw( + perl perlfaq perlfaq1 perlfaq2 perlfaq3 perlfaq4 perlfaq5 + perlfaq6 perlfaq7 perlfaq8 perlfaq9 perldelta perldata + perlsyn perlop perlre perlrun perlfunc perlvar perlsub + perlmod perlmodlib perlform perllocale perlref perldsc + perllol perltoot perlobj perltie perlbot perlipc perldebug + perldiag perlsec perltrap perlstyle perlpod perlbook + perlembed perlapio perlxs perlxstut perlguts perlcall + ); + +for (@pods) { s/$/.pod/ } $/ = ''; @ARGV = @pods; -($_= <<EOPOD2B) =~ s/^\t//gm && print; +($_= <<EOPOD2B) =~ s/^\t//gm && output($_); =head1 NAME @@ -21,38 +27,40 @@ $/ = ''; =head1 DESCRIPTION - This page provides a brief table of contents for the rest of the Perl - documentation set. It is meant to be be quickly scanned or grepped + This page provides a brief table of contents for the rest of the Perl + documentation set. It is meant to be scanned quickly or grepped through to locate the proper section you're looking for. =head1 BASIC DOCUMENTATION EOPOD2B +#' make emacs happy podset(@pods); find \&getpods => qw(../lib ../ext); + sub getpods { - if (/\.p(od|m)$/) { - my $tmp; + if (/\.p(od|m)$/) { # Skip .pm files that have corresponding .pod files, and Functions.pm. - return if (($tmp = $_) =~ s/\.pm$/.pod/ && -f $tmp); - return if ($_ eq '../lib/Pod/Functions.pm');####Used only by pod itself - + return if /(.*)\.pm$/ && -f "$1.pod"; my $file = $File::Find::name; + return if $file eq '../lib/Pod/Functions.pm'; # Used only by pod itself + die "tut $name" if $file =~ /TUT/; unless (open (F, "< $_\0")) { warn "bogus <$file>: $!"; system "ls", "-l", $file; - } else { + } + else { my $line; while ($line = <F>) { if ($line =~ /^=head1\s+NAME\b/) { push @modpods, $file; #warn "GOOD $file\n"; return; - } - } + } + } warn "EVIL $file\n"; } } @@ -69,14 +77,14 @@ for (@modpods) { if ($done{$name}++) { # warn "already did $_\n"; next; - } + } push @modules, $_; push @modname, $name; - } -} + } +} + +($_= <<EOPOD2B) =~ s/^\t//gm && output($_); -($_= <<EOPOD2B) =~ s/^\t//gm && print; - =head1 PRAGMA DOCUMENTATION @@ -85,8 +93,8 @@ EOPOD2B podset(sort @pragmata); -($_= <<EOPOD2B) =~ s/^\t//gm && print; - +($_= <<EOPOD2B) =~ s/^\t//gm && output($_); + =head1 MODULE DOCUMENTATION @@ -96,41 +104,41 @@ EOPOD2B podset( @modules[ sort { $modname[$a] cmp $modname[$b] } 0 .. $#modules ] ); ($_= <<EOPOD2B) =~ s/^\t//gm; - + =head1 AUXILIARY DOCUMENTATION - Here should be listed all the extra program's docs, but they - don't all have man pages yet: + Here should be listed all the extra programs' documentation, but they + don't all have manual pages yet: =item a2p =item s2p =item find2perl - + =item h2ph - + =item c2ph =item h2xs =item xsubpp - =item pod2man + =item pod2man =item wrapsuid =head1 AUTHOR - Larry Wall E<lt>F<lwall\@sems.com>E<gt>, with the help of oodles + Larry Wall <F<larry\@wall.org>>, with the help of oodles of other folks. EOPOD2B -print; - +output $_; +output "\n"; # flush $LINE exit; sub podset { @@ -139,69 +147,94 @@ sub podset { while(<>) { if (s/^=head1 (NAME)\s*/=head2 /) { $pod = path2modname($ARGV); - sub path2modname { - local $_ = shift; - s/\.p(m|od)$//; - s-.*?/(lib|ext)/--; - s-/-::-g; - s/(\w+)::\1/$1/; - return $_; - } - unitem(); unhead2(); - print "\n \n\n=head2 "; + unitem(); + unhead2(); + output "\n \n\n=head2 "; $_ = <>; if ( /^\s*$pod\b/ ) { - print; + s/$pod\.pm/$pod/; # '.pm' in NAME !? + output $_; } else { s/^/$pod, /; - print; - } + output $_; + } next; } if (s/^=head1 (.*)/=item $1/) { unitem(); unhead2(); - print; nl(); next; - } + output $_; nl(); next; + } if (s/^=head2 (.*)/=item $1/) { unitem(); - print "=over\n\n" unless $inhead2; + output "=over\n\n" unless $inhead2; $inhead2 = 1; - print; nl(); next; + output $_; nl(); next; - } + } if (s/^=item (.*)\n/$1/) { next if $pod eq 'perldiag'; s/^\s*\*\s*$// && next; s/^\s*\*\s*//; s/\s+$//; next if /^[\d.]+$/; - next if $pod eq 'perlmod' && /^ftp:/; + next if $pod eq 'perlmodlib' && /^ftp:/; ##print "=over\n\n" unless $initem; - print ", " if $initem; + output ", " if $initem; $initem = 1; s/\.$//; - print; next; - } - } + s/^-X\b/-I<X>/; + output $_; next; + } + } +} -} +sub path2modname { + local $_ = shift; + s/\.p(m|od)$//; + s-.*?/(lib|ext)/--; + s-/-::-g; + s/(\w+)::\1/$1/; + return $_; +} sub unhead2 { if ($inhead2) { - print "\n\n=back\n\n"; - } - $inhead2 = 0; - $initem = 0; -} + output "\n\n=back\n\n"; + } + $inhead2 = 0; + $initem = 0; +} sub unitem { if ($initem) { - print "\n\n"; + output "\n\n"; ##print "\n\n=back\n\n"; - } + } $initem = 0; -} +} sub nl { - print "\n"; -} + output "\n"; +} + +my $NEWLINE; # how many newlines have we seen recently +my $LINE; # what remains to be printed + +sub output ($) { + for (split /(\n)/, shift) { + if ($_ eq "\n") { + if ($LINE) { + print wrap('', '', $LINE); + $LINE = ''; + } + if ($NEWLINE < 2) { + print; + $NEWLINE++; + } + } + elsif (/\S/ && length) { + $LINE .= $_; + $NEWLINE = 0; + } + } +} diff --git a/pod/checkpods.PL b/pod/checkpods.PL new file mode 100644 index 0000000000..4bec4da609 --- /dev/null +++ b/pod/checkpods.PL @@ -0,0 +1,72 @@ +#!/usr/local/bin/perl + +use Config; +use File::Basename qw(&basename &dirname); + +# List explicitly here the variables you want Configure to +# generate. Metaconfig only looks for shell variables, so you +# have to mention them as if they were shell variables, not +# %Config entries. Thus you write +# $startperl +# to ensure Configure will look for $Config{startperl}. + +# This forces PL files to create target in same directory as PL file. +# This is so that make depend always knows where to find PL derivatives. +chdir dirname($0); +$file = basename($0, '.PL'); +$file .= '.com' if $^O eq 'VMS'; + +open OUT,">$file" or die "Can't create $file: $!"; + +print "Extracting $file (with variable substitutions)\n"; + +# In this section, perl variables will be expanded during extraction. +# You can use $Config{...} to use Configure variables. + +print OUT <<"!GROK!THIS!"; +$Config{startperl} + eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' + if \$running_under_some_shell; +!GROK!THIS! + +# In the following, perl variables are not expanded during extraction. + +print OUT <<'!NO!SUBS!'; +# From roderick@gate.netThu Sep 5 17:19:30 1996 +# Date: Thu, 05 Sep 1996 00:11:22 -0400 +# From: Roderick Schertler <roderick@gate.net> +# To: perl5-porters@africa.nicoh.com +# Subject: POD lines with only spaces +# +# There are some places in the documentation where a POD directive is +# ignored because the line before it contains whitespace (and so the +# directive doesn't start a paragraph). This patch adds a way to check +# for these to the pod Makefile (though it isn't made part of the build +# process, which would be a good idea), and fixes those places where the +# problem currently exists. +# +# Version 1.00 Original. +# Version 1.01 Andy Dougherty <doughera@lafcol.lafayette.edu> +# Trivial modifications to output format for easier auto-parsing +# Broke it out as a separate function to avoid nasty +# Make/Shell/Perl quoting problems, and also to make it easier +# to grow. Someone will probably want to rewrite in terms of +# some sort of Pod::Checker module. Or something. Consider this +# a placeholder for the future. +$exit = $last_blank = 0; +while (<>) { + chop; + if (/^(=\S+)/ && $last_blank) { + printf "%s: line %5d, Non-empty line preceeding directive %s\n", + $ARGV, $., $1; + $exit = 1; + } + $last_blank = /^\s+$/; + close(ARGV) if eof; +} +exit $exit +!NO!SUBS! + +close OUT or die "Can't close $file: $!"; +chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; +exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; diff --git a/pod/perl.pod b/pod/perl.pod index 150bb7d842..3036f35b21 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -19,7 +19,9 @@ For ease of access, the Perl manual has been split up into a number of sections: perl Perl overview (this section) - perltoc Perl documentation table of contents + perldelta Perl changes since previous version + perlfaq Perl frequently asked questions + perldata Perl data structures perlsyn Perl syntax perlop Perl operators and precedence @@ -28,42 +30,61 @@ of sections: perlfunc Perl builtin functions perlvar Perl predefined variables perlsub Perl subroutines - perlmod Perl modules - perlref Perl references + perlmod Perl modules: how they work + perlmodlib Perl modules: how to write and use + perlform Perl formats + perllocale Perl locale support + + perlref Perl references perldsc Perl data structures intro perllol Perl data structures: lists of lists + perltoot Perl OO tutorial perlobj Perl objects perltie Perl objects hidden behind simple variables perlbot Perl OO tricks and examples + perlipc Perl interprocess communication + perldebug Perl debugging perldiag Perl diagnostic messages - perlform Perl formats - perlipc Perl interprocess communication perlsec Perl security perltrap Perl traps for the unwary perlstyle Perl style guide + + perlpod Perl plain old documentation + perlbook Perl book information + + perlembed Perl ways to embed perl in your C or C++ application + perlapio Perl internal IO abstraction interface perlxs Perl XS application programming interface perlxstut Perl XS tutorial - perlguts Perl internal functions for those doing extensions + perlguts Perl internal functions for those doing extensions perlcall Perl calling conventions from C - perlembed Perl how to embed perl in your C or C++ app - perlpod Perl plain old documentation - perlbook Perl book information (If you're intending to read these straight through for the first time, the suggested order will tend to reduce the number of forward references.) -Additional documentation for Perl modules is available in the -F</usr/local/man/> directory. Some of this is distributed standard with -Perl, but you'll also find third-party modules there. You should be able -to view this with your man(1) program by including the proper directories -in the appropriate start-up files. To find out where these are, type: +By default, all of the above manpages are installed in the +F</usr/local/man/> directory. - perl -le 'use Config; print "@Config{man1dir,man3dir}"' +Extensive additional documentation for Perl modules is available. The +default configuration for perl will place this additional documentation +in the F</usr/local/lib/perl5/man> directory (or else in the F<man> +subdirectory of the Perl library directory). Some of this additional +documentation is distributed standard with Perl, but you'll also find +documentation for third-party modules there. -If the directories were F</usr/local/man/man1> and F</usr/local/man/man3>, -you would only need to add F</usr/local/man> to your MANPATH. If -they are different, you'll have to add both stems. +You should be able to view Perl's documentation with your man(1) +program by including the proper directories in the appropriate start-up +files, or in the MANPATH environment variable. To find out where the +configuration has installed the manpages, type: + + perl -V:man.dir + +If the directories have a common stem, such as F</usr/local/man/man1> +and F</usr/local/man/man3>, you need only to add that stem +(F</usr/local/man>) to your man(1) configuration files or your MANPATH +environment variable. If they do not share a stem, you'll have to add +both stems. If that doesn't work for some reason, you can still use the supplied F<perldoc> script to view module information. You might @@ -75,33 +96,35 @@ will often point out exactly where the trouble is. =head1 DESCRIPTION -Perl is an interpreted language optimized for scanning arbitrary +Perl is a language optimized for scanning arbitrary text files, extracting information from those text files, and printing reports based on that information. It's also a good language for many system management tasks. The language is intended to be practical (easy to use, efficient, complete) rather than beautiful (tiny, elegant, minimal). -Perl combines (in the author's opinion, anyway) some -of the best features of C, B<sed>, B<awk>, and B<sh>, so people -familiar with those languages should have little difficulty with it. -(Language historians will also note some vestiges of B<csh>, Pascal, -and even BASIC-PLUS.) Expression syntax corresponds quite closely to C +Perl combines (in the author's opinion, anyway) some of the best +features of C, B<sed>, B<awk>, and B<sh>, so people familiar with +those languages should have little difficulty with it. (Language +historians will also note some vestiges of B<csh>, Pascal, and even +BASIC-PLUS.) Expression syntax corresponds quite closely to C expression syntax. Unlike most Unix utilities, Perl does not arbitrarily limit the size of your data--if you've got the memory, -Perl can slurp in your whole file as a single string. Recursion is -of unlimited depth. And the hash tables used by associative arrays -grow as necessary to prevent degraded performance. Perl uses -sophisticated pattern matching techniques to scan large amounts of data -very quickly. Although optimized for scanning text, Perl can also -deal with binary data, and can make dbm files look like associative -arrays. Setuid Perl scripts are safer than -C programs through a dataflow tracing mechanism which prevents many -stupid security holes. If you have a problem that would ordinarily use -B<sed> or B<awk> or B<sh>, but it exceeds their capabilities or must -run a little faster, and you don't want to write the silly thing in C, -then Perl may be for you. There are also translators to turn your -B<sed> and B<awk> scripts into Perl scripts. +Perl can slurp in your whole file as a single string. Recursion is of +unlimited depth. And the tables used by hashes (previously called +"associative arrays") grow as necessary to prevent degraded +performance. Perl uses sophisticated pattern matching techniques to +scan large amounts of data very quickly. Although optimized for +scanning text, Perl can also deal with binary data, and can make dbm +files look like hashes. Setuid Perl scripts are safer than C programs +through a dataflow tracing mechanism which prevents many stupid +security holes. + +If you have a problem that would ordinarily use B<sed> or B<awk> or +B<sh>, but it exceeds their capabilities or must run a little faster, +and you don't want to write the silly thing in C, then Perl may be for +you. There are also translators to turn your B<sed> and B<awk> +scripts into Perl scripts. But wait, there's more... @@ -131,7 +154,8 @@ will continue to work unchanged. Perl variables may now be declared within a lexical scope, like "auto" variables in C. Not only is this more efficient, but it contributes -to better privacy for "programming in the large". +to better privacy for "programming in the large". Anonymous +subroutines exhibit deep binding of lexical variables (closures). =item * Arbitrarily nested data structures @@ -159,7 +183,7 @@ Perl may now be embedded easily in your C or C++ application, and can either call or be called by your routines through a documented interface. The XS preprocessor is provided to make it easy to glue your C or C++ routines into Perl. Dynamic loading of modules is -supported. +supported, and Perl itself can be made into a dynamic library. =item * POSIX compliant @@ -184,80 +208,44 @@ to an object class which defines its access methods. =item * Subroutine definitions may now be autoloaded In fact, the AUTOLOAD mechanism also allows you to define any arbitrary -semantics for undefined subroutine calls. It's not just for autoloading. +semantics for undefined subroutine calls. It's not for just autoloading. =item * Regular expression enhancements -You can now specify non-greedy quantifiers. You can now do grouping +You can now specify nongreedy quantifiers. You can now do grouping without creating a backreference. You can now write regular expressions with embedded whitespace and comments for readability. A consistent extensibility mechanism has been added that is upwardly compatible with all old regular expressions. -=back +=item * Innumerable Unbundled Modules -Ok, that's I<definitely> enough hype. +The Comprehensive Perl Archive Network described in L<perlmodlib> +contains hundreds of plug-and-play modules full of reusable code. +See F<http://www.perl.com/CPAN> for a site near you. -=head1 ENVIRONMENT +=item * Compilability -=over 12 - -=item HOME - -Used if chdir has no argument. - -=item LOGDIR - -Used if chdir has no argument and HOME is not set. - -=item PATH - -Used in executing subprocesses, and in finding the script if B<-S> is -used. - -=item PERL5LIB - -A colon-separated list of directories in which to look for Perl library -files before looking in the standard library and the current -directory. If PERL5LIB is not defined, PERLLIB is used. When running -taint checks (because the script was running setuid or setgid, or the -B<-T> switch was used), neither variable is used. The script should -instead say - - use lib "/my/directory"; - -=item PERL5DB - -The command used to get the debugger code. If unset, uses - - BEGIN { require 'perl5db.pl' } - -=item PERLLIB - -A colon-separated list of directories in which to look for Perl library -files before looking in the standard library and the current -directory. If PERL5LIB is defined, PERLLIB is not used. +While not yet in full production mode, a working perl-to-C compiler +does exist. It can generate portable byte code, simple C, or +optimized C code. =back -Apart from these, Perl uses no other environment variables, except -to make them available to the script being executed, and to child -processes. However, scripts running setuid would do well to execute -the following lines before doing anything else, just to keep people -honest: +Okay, that's I<definitely> enough hype. + +=head1 ENVIRONMENT - $ENV{'PATH'} = '/bin:/usr/bin'; # or whatever you need - $ENV{'SHELL'} = '/bin/sh' if defined $ENV{'SHELL'}; - $ENV{'IFS'} = '' if defined $ENV{'IFS'}; +See L<perlrun>. =head1 AUTHOR -Larry Wall E<lt>F<lwall@sems.com>E<gt>, with the help of oodles of other folks. +Larry Wall <F<larry@wall.org>>, with the help of oodles of other folks. =head1 FILES "/tmp/perl-e$$" temporary file for -e commands - "@INC" locations of perl 5 libraries + "@INC" locations of perl libraries =head1 SEE ALSO @@ -287,8 +275,8 @@ switch? The B<-w> switch is not mandatory. Perl is at the mercy of your machine's definitions of various -operations such as type casting, atof() and sprintf(). The latter -can even trigger a coredump when passed ludicrous input values. +operations such as type casting, atof(), and floating-point +output with sprintf(). If your stdio requires a seek or eof between reads and writes on a particular stream, so does Perl. (This doesn't apply to sysread() @@ -296,14 +284,13 @@ and syswrite().) While none of the built-in data types have any arbitrary size limits (apart from memory size), there are still a few arbitrary limits: a -given identifier may not be longer than 255 characters, and no +given variable name may not be longer than 255 characters, and no component of your PATH may be longer than 255 if you use B<-S>. A regular expression may not compile to more than 32767 bytes internally. -See the perl bugs database at F< http://perl.com/perl/bugs/ >. You may -mail your bug reports (be sure to include full configuration information -as output by the myconfig program in the perl source tree) to -F<perlbug@perl.com>. +You may mail your bug reports (be sure to include full configuration +information as output by the myconfig program in the perl source tree, +or by C<perl -V>) to <F<perlbug@perl.com>>. If you've succeeded in compiling perl, the perlbug script in the utils/ subdirectory can be used to help mail in a bug report. diff --git a/pod/perlapio.pod b/pod/perlapio.pod new file mode 100644 index 0000000000..d88e44509c --- /dev/null +++ b/pod/perlapio.pod @@ -0,0 +1,274 @@ +=head1 NAME + +perlapio - perl's IO abstraction interface. + +=head1 SYNOPSIS + + PerlIO *PerlIO_stdin(void); + PerlIO *PerlIO_stdout(void); + PerlIO *PerlIO_stderr(void); + + PerlIO *PerlIO_open(const char *,const char *); + int PerlIO_close(PerlIO *); + + int PerlIO_stdoutf(const char *,...) + int PerlIO_puts(PerlIO *,const char *); + int PerlIO_putc(PerlIO *,int); + int PerlIO_write(PerlIO *,const void *,size_t); + int PerlIO_printf(PerlIO *, const char *,...); + int PerlIO_vprintf(PerlIO *, const char *, va_list); + int PerlIO_flush(PerlIO *); + + int PerlIO_eof(PerlIO *); + int PerlIO_error(PerlIO *); + void PerlIO_clearerr(PerlIO *); + + int PerlIO_getc(PerlIO *); + int PerlIO_ungetc(PerlIO *,int); + int PerlIO_read(PerlIO *,void *,size_t); + + int PerlIO_fileno(PerlIO *); + PerlIO *PerlIO_fdopen(int, const char *); + PerlIO *PerlIO_importFILE(FILE *); + FILE *PerlIO_exportFILE(PerlIO *); + FILE *PerlIO_findFILE(PerlIO *); + void PerlIO_releaseFILE(PerlIO *,FILE *); + + void PerlIO_setlinebuf(PerlIO *); + + long PerlIO_tell(PerlIO *); + int PerlIO_seek(PerlIO *,off_t,int); + int PerlIO_getpos(PerlIO *,Fpos_t *) + int PerlIO_setpos(PerlIO *,Fpos_t *) + void PerlIO_rewind(PerlIO *); + + int PerlIO_has_base(PerlIO *); + int PerlIO_has_cntptr(PerlIO *); + int PerlIO_fast_gets(PerlIO *); + int PerlIO_canset_cnt(PerlIO *); + + char *PerlIO_get_ptr(PerlIO *); + int PerlIO_get_cnt(PerlIO *); + void PerlIO_set_cnt(PerlIO *,int); + void PerlIO_set_ptrcnt(PerlIO *,char *,int); + char *PerlIO_get_base(PerlIO *); + int PerlIO_get_bufsiz(PerlIO *); + +=head1 DESCRIPTION + +Perl's source code should use the above functions instead of those +defined in ANSI C's I<stdio.h>, I<perlio.h> will the C<#define> them to +the I/O mechanism selected at Configure time. + +The functions are modeled on those in I<stdio.h>, but parameter order +has been "tidied up a little". + +=over 4 + +=item B<PerlIO *> + +This takes the place of FILE *. Unlike FILE * it should be treated as +opaque (it is probably safe to assume it is a pointer to something). + +=item B<PerlIO_stdin()>, B<PerlIO_stdout()>, B<PerlIO_stderr()> + +Use these rather than C<stdin>, C<stdout>, C<stderr>. They are written +to look like "function calls" rather than variables because this makes +it easier to I<make them> function calls if platform cannot export data +to loaded modules, or if (say) different "threads" might have different +values. + +=item B<PerlIO_open(path, mode)>, B<PerlIO_fdopen(fd,mode)> + +These correspond to fopen()/fdopen() arguments are the same. + +=item B<PerlIO_printf(f,fmt,...)>, B<PerlIO_vprintf(f,fmt,a)> + +These are is fprintf()/vfprintf equivalents. + +=item B<PerlIO_stdoutf(fmt,...)> + +This is printf() equivalent. printf is #defined to this function, +so it is (currently) legal to use C<printf(fmt,...)> in perl sources. + +=item B<PerlIO_read(f,buf,count)>, B<PerlIO_write(f,buf,count)> + +These correspond to fread() and fwrite(). Note that arguments +are different, there is only one "count" and order has +"file" first. + +=item B<PerlIO_close(f)> + +=item B<PerlIO_puts(s,f)>, B<PerlIO_putc(c,f)> + +These correspond to fputs() and fputc(). +Note that arguments have been revised to have "file" first. + +=item B<PerlIO_ungetc(c,f)> + +This corresponds to ungetc(). +Note that arguments have been revised to have "file" first. + +=item B<PerlIO_getc(f)> + +This corresponds to getc(). + +=item B<PerlIO_eof(f)> + +This corresponds to feof(). + +=item B<PerlIO_error(f)> + +This corresponds to ferror(). + +=item B<PerlIO_fileno(f)> + +This corresponds to fileno(), note that on some platforms, +the meaning of "fileno" may not match Unix. + +=item B<PerlIO_clearerr(f)> + +This corresponds to clearerr(), i.e., clears 'eof' and 'error' +flags for the "stream". + +=item B<PerlIO_flush(f)> + +This corresponds to fflush(). + +=item B<PerlIO_tell(f)> + +This corresponds to ftell(). + +=item B<PerlIO_seek(f,o,w)> + +This corresponds to fseek(). + +=item B<PerlIO_getpos(f,p)>, B<PerlIO_setpos(f,p)> + +These correspond to fgetpos() and fsetpos(). If platform does not +have the stdio calls then they are implemented in terms of PerlIO_tell() +and PerlIO_seek(). + +=item B<PerlIO_rewind(f)> + +This corresponds to rewind(). Note may be redefined +in terms of PerlIO_seek() at some point. + +=item B<PerlIO_tmpfile()> + +This corresponds to tmpfile(), i.e., returns an anonymous +PerlIO which will automatically be deleted when closed. + +=back + +=head2 Co-existence with stdio + +There is outline support for co-existence of PerlIO with stdio. +Obviously if PerlIO is implemented in terms of stdio there is +no problem. However if perlio is implemented on top of (say) sfio +then mechanisms must exist to create a FILE * which can be passed +to library code which is going to use stdio calls. + +=over 4 + +=item B<PerlIO_importFILE(f,flags)> + +Used to get a PerlIO * from a FILE *. +May need additional arguments, interface under review. + +=item B<PerlIO_exportFILE(f,flags)> + +Given an PerlIO * return a 'native' FILE * suitable for +passing to code expecting to be compiled and linked with +ANSI C I<stdio.h>. + +The fact that such a FILE * has been 'exported' is recorded, +and may affect future PerlIO operations on the original +PerlIO *. + +=item B<PerlIO_findFILE(f)> + +Returns previously 'exported' FILE * (if any). +Place holder until interface is fully defined. + +=item B<PerlIO_releaseFILE(p,f)> + +Calling PerlIO_releaseFILE informs PerlIO that all use +of FILE * is complete. It is removed from list of 'exported' +FILE *s, and associated PerlIO * should revert to original +behaviour. + +=item B<PerlIO_setlinebuf(f)> + +This corresponds to setlinebuf(). Use is deprecated pending +further discussion. (Perl core uses it I<only> when "dumping" +is has nothing to do with $| auto-flush.) + +=back + +In addition to user API above there is an "implementation" interface +which allows perl to get at internals of PerlIO. +The following calls correspond to the various FILE_xxx macros determined +by Configure. This section is really of interest to only those +concerned with detailed perl-core behaviour or implementing a +PerlIO mapping. + +=over 4 + +=item B<PerlIO_has_cntptr(f)> + +Implementation can return pointer to current position in the "buffer" and +a count of bytes available in the buffer. + +=item B<PerlIO_get_ptr(f)> + +Return pointer to next readable byte in buffer. + +=item B<PerlIO_get_cnt(f)> + +Return count of readable bytes in the buffer. + +=item B<PerlIO_canset_cnt(f)> + +Implementation can adjust its idea of number of +bytes in the buffer. + +=item B<PerlIO_fast_gets(f)> + +Implementation has all the interfaces required to +allow perl's fast code to handle <FILE> mechanism. + + PerlIO_fast_gets(f) = PerlIO_has_cntptr(f) && \ + PerlIO_canset_cnt(f) && \ + `Can set pointer into buffer' + +=item B<PerlIO_set_ptrcnt(f,p,c)> + +Set pointer into buffer, and a count of bytes still in the +buffer. Should be used only to set +pointer to within range implied by previous calls +to C<PerlIO_get_ptr> and C<PerlIO_get_cnt>. + +=item B<PerlIO_set_cnt(f,c)> + +Obscure - set count of bytes in the buffer. Deprecated. +Currently used in only doio.c to force count < -1 to -1. +Perhaps should be PerlIO_set_empty or similar. +This call may actually do nothing if "count" is deduced from pointer +and a "limit". + +=item B<PerlIO_has_base(f)> + +Implementation has a buffer, and can return pointer +to whole buffer and its size. Used by perl for B<-T> / B<-B> tests. +Other uses would be very obscure... + +=item B<PerlIO_get_base(f)> + +Return I<start> of buffer. + +=item B<PerlIO_get_bufsiz(f)> + +Return I<total size> of buffer. + +=back diff --git a/pod/perlbook.pod b/pod/perlbook.pod index 5bb4bfb0b5..d4bc876692 100644 --- a/pod/perlbook.pod +++ b/pod/perlbook.pod @@ -5,18 +5,31 @@ perlbook - Perl book information =head1 DESCRIPTION You can order Perl books from O'Reilly & Associates, 1-800-998-9938. -Local/overseas is +1 707 829 0515. If you can locate an O'Reilly order -form, you can also fax to +1 707 829 0104. I<Programming Perl> is a -reference work that covers nearly all of Perl (version 4, alas), while -I<Learning Perl> is a tutorial that covers the most frequently used subset -of the language. +Local/overseas is +1 707 829 0515. If you can locate an O'Reilly +order form, you can also fax to +1 707 829 0104. If you're +web-connected, you can even mosey on over to http://www.ora.com/ for +an online order form. - Programming Perl (the Camel Book): - ISBN 0-937175-64-1 (English) - ISBN 4-89052-384-7 (Japanese) +I<Programming Perl, Second Edition> is a reference work that covers +nearly all of Perl, while I<Learning Perl> is a tutorial that covers +the most frequently used subset of the language. You might also check +out the very handy, inexpensive, and compact I<Perl 5 Desktop +Reference>, especially when the thought of lugging the 676-page Camel +around doesn't make much sense. I<Mastering Regular Expressions>, by +Jeffrey Friedl, is a reference work that covers the art and implementation +of regular expressions in various languages including Perl. + + Programming Perl, Second Edition (the Camel Book): + ISBN 1-56592-149-6 (English) Learning Perl (the Llama Book): ISBN 1-56592-042-2 (English) ISBN 4-89502-678-1 (Japanese) ISBN 2-84177-005-2 (French) ISBN 3-930673-08-8 (German) + + Perl 5 Desktop Reference (the reference card): + ISBN 1-56592-187-9 (brief English) + + Mastering Regular Expressions (the Hip Owl Book): + ISBN 1-56592-257-3 (English) diff --git a/pod/perlbot.pod b/pod/perlbot.pod index 0fd545fe88..bc4e4da1f7 100644 --- a/pod/perlbot.pod +++ b/pod/perlbot.pod @@ -57,7 +57,7 @@ See L<CLASS CONTEXT AND THE OBJECT>. =item 7 -IO syntax is certainly less noisy, but it is also prone to ambiguities which +IO syntax is certainly less noisy, but it is also prone to ambiguities that can cause difficult-to-find bugs. Allow people to use the sure-thing OO syntax, even if you don't like it. @@ -265,7 +265,7 @@ This example demonstrates an interface for the SDBM class. This creates a $ref->FETCH(@_); } sub STORE { - my $self = shift; + my $self = shift; if (defined $_[0]){ my $ref = $self->{'dbm'}; $ref->STORE(@_); @@ -277,11 +277,11 @@ This example demonstrates an interface for the SDBM class. This creates a package main; use Fcntl qw( O_RDWR O_CREAT ); - tie %foo, Mydbm, "Sdbm", O_RDWR|O_CREAT, 0640; + tie %foo, "Mydbm", "Sdbm", O_RDWR|O_CREAT, 0640; $foo{'bar'} = 123; print "foo-bar = $foo{'bar'}\n"; - tie %bar, Mydbm, "Sdbm2", O_RDWR|O_CREAT, 0640; + tie %bar, "Mydbm", "Sdbm2", O_RDWR|O_CREAT, 0640; $bar{'Cathy'} = 456; print "bar-Cathy = $bar{'Cathy'}\n"; @@ -404,7 +404,7 @@ This problem can be solved by using the object to define the context of the method. Let the method look in the object for a reference to the data. The alternative is to force the method to go hunting for the data ("Is it in my class, or in a subclass? Which subclass?"), and this can be inconvenient -and will lead to hackery. It is better to just let the object tell the +and will lead to hackery. It is better just to let the object tell the method where that data is located. package Bar; @@ -420,7 +420,7 @@ method where that data is located. sub enter { my $self = shift; - + # Don't try to guess if we should use %Bar::fizzle # or %Foo::fizzle. The object already knows which # we should use, so just ask it. @@ -522,6 +522,6 @@ behavior by adding custom FETCH() and STORE() methods, if this is desired. package main; use Fcntl qw( O_RDWR O_CREAT ); - tie %foo, Mydbm, "adbm", O_RDWR|O_CREAT, 0640; + tie %foo, "Mydbm", "adbm", O_RDWR|O_CREAT, 0640; $foo{'bar'} = 123; print "foo-bar = $foo{'bar'}\n"; diff --git a/pod/perlcall.pod b/pod/perlcall.pod index 996c9145d0..f90e09f238 100644 --- a/pod/perlcall.pod +++ b/pod/perlcall.pod @@ -5,7 +5,7 @@ perlcall - Perl calling conventions from C =head1 DESCRIPTION The purpose of this document is to show you how to call Perl subroutines -directly from C, i.e. how to write I<callbacks>. +directly from C, i.e., how to write I<callbacks>. Apart from discussing the C interface provided by Perl for writing callbacks the document uses a series of examples to show how the @@ -29,8 +29,8 @@ called instead. The classic example of where callbacks are used is when writing an event driven program like for an X windows application. In this case -your register functions to be called whenever specific events occur, -e.g. a mouse button is pressed, the cursor moves into a window or a +you register functions to be called whenever specific events occur, +e.g., a mouse button is pressed, the cursor moves into a window or a menu item is selected. =back @@ -61,7 +61,7 @@ subroutines. They are The key function is I<perl_call_sv>. All the other functions are fairly simple wrappers which make it easier to call Perl subroutines in special cases. At the end of the day they will all call I<perl_call_sv> -to actually invoke the Perl subroutine. +to invoke the Perl subroutine. All the I<perl_call_*> functions have a C<flags> parameter which is used to pass a bit mask of options to Perl. This bit mask operates @@ -84,9 +84,9 @@ use of I<perl_call_sv>. The function, I<perl_call_pv>, is similar to I<perl_call_sv> except it expects its first parameter to be a C char* which identifies the Perl -subroutine you want to call, e.g. C<perl_call_pv("fred", 0)>. If the +subroutine you want to call, e.g., C<perl_call_pv("fred", 0)>. If the subroutine you want to call is in another package, just include the -package name in the string, e.g. C<"pkg::fred">. +package name in the string, e.g., C<"pkg::fred">. =item B<perl_call_method> @@ -126,31 +126,55 @@ which can consist of any combination of the symbols defined below, OR'ed together. +=head2 G_VOID + +Calls the Perl subroutine in a void context. + +This flag has 2 effects: + +=over 5 + +=item 1. + +It indicates to the subroutine being called that it is executing in +a void context (if it executes I<wantarray> the result will be the +undefined value). + +=item 2. + +It ensures that nothing is actually returned from the subroutine. + +=back + +The value returned by the I<perl_call_*> function indicates how many +items have been returned by the Perl subroutine - in this case it will +be 0. + + =head2 G_SCALAR Calls the Perl subroutine in a scalar context. This is the default context flag setting for all the I<perl_call_*> functions. -This flag has 2 effects +This flag has 2 effects: =over 5 =item 1. -it indicates to the subroutine being called that it is executing in a +It indicates to the subroutine being called that it is executing in a scalar context (if it executes I<wantarray> the result will be false). - =item 2. -it ensures that only a scalar is actually returned from the subroutine. +It ensures that only a scalar is actually returned from the subroutine. The subroutine can, of course, ignore the I<wantarray> and return a list anyway. If so, then only the last element of the list will be returned. =back -The value returned by the I<perl_call_*> function indicates how may +The value returned by the I<perl_call_*> function indicates how many items have been returned by the Perl subroutine - in this case it will be either 0 or 1. @@ -164,34 +188,34 @@ accessible from the stack - think of the case where only one value is returned as being a list with only one element. Any other items that were returned will not exist by the time control returns from the I<perl_call_*> function. The section I<Returning a list in a scalar -context> shows an example of this behaviour. +context> shows an example of this behavior. =head2 G_ARRAY Calls the Perl subroutine in a list context. -As with G_SCALAR, this flag has 2 effects +As with G_SCALAR, this flag has 2 effects: =over 5 =item 1. -it indicates to the subroutine being called that it is executing in an +It indicates to the subroutine being called that it is executing in an array context (if it executes I<wantarray> the result will be true). =item 2. -it ensures that all items returned from the subroutine will be +It ensures that all items returned from the subroutine will be accessible when control returns from the I<perl_call_*> function. =back -The value returned by the I<perl_call_*> function indicates how may +The value returned by the I<perl_call_*> function indicates how many items have been returned by the Perl subroutine. -If 0, the you have specified the G_DISCARD flag. +If 0, then you have specified the G_DISCARD flag. If not 0, then it will be a count of the number of items returned by the subroutine. These items will be stored on the Perl stack. The @@ -208,10 +232,10 @@ automatically for you. Note that it is still possible to indicate a context to the Perl subroutine by using either G_SCALAR or G_ARRAY. If you do not set this flag then it is I<very> important that you make -sure that any temporaries (i.e. parameters passed to the Perl +sure that any temporaries (i.e., parameters passed to the Perl subroutine and values returned from the subroutine) are disposed of yourself. The section I<Returning a Scalar> gives details of how to -explicitly dispose of these temporaries and the section I<Using Perl to +dispose of these temporaries explicitly and the section I<Using Perl to dispose of temporaries> discusses the specific circumstances where you can ignore the problem and let Perl deal with it for you. @@ -251,10 +275,10 @@ What has happened is that C<fred> accesses the C<@_> array which belongs to C<joe>. -=head2 G_EVAL +=head2 G_EVAL It is possible for the Perl subroutine you are calling to terminate -abnormally, e.g. by calling I<die> explicitly or by not actually +abnormally, e.g., by calling I<die> explicitly or by not actually existing. By default, when either of these of events occurs, the process will terminate immediately. If though, you want to trap this type of event, specify the G_EVAL flag. It will put an I<eval { }> @@ -265,7 +289,7 @@ check the C<$@> variable as you would in a normal Perl script. The value returned from the I<perl_call_*> function is dependent on what other flags have been specified and whether an error has -occurred. Here are all the different cases that can occur +occurred. Here are all the different cases that can occur: =over 5 @@ -293,7 +317,7 @@ from the stack. =back -See I<Using G_EVAL> for details of using G_EVAL. +See I<Using G_EVAL> for details on using G_EVAL. =head2 G_KEEPERR @@ -326,14 +350,17 @@ The G_KEEPERR flag was introduced in Perl version 5.002. See I<Using G_KEEPERR> for an example of a situation that warrants the use of this flag. -=head2 Determining the Context +=head2 Determining the Context As mentioned above, you can determine the context of the currently -executing subroutine in Perl with I<wantarray>. The equivalent test can -be made in C by using the C<GIMME> macro. This will return C<G_SCALAR> -if you have been called in a scalar context and C<G_ARRAY> if in an -array context. An example of using the C<GIMME> macro is shown in -section I<Using GIMME>. +executing subroutine in Perl with I<wantarray>. The equivalent test +can be made in C by using the C<GIMME_V> macro, which returns +C<G_ARRAY> if you have been called in an array context, C<G_SCALAR> if +in a scalar context, or C<G_VOID> if in a void context (i.e. the +return value will not be used). An older version of this macro is +called C<GIMME>; in a void context it returns C<G_SCALAR> instead of +C<G_VOID>. An example of using the C<GIMME_V> macro is shown in +section I<Using GIMME_V>. =head1 KNOWN PROBLEMS @@ -368,7 +395,7 @@ For example, say you want to call this Perl sub sub fred { eval { die "Fatal Error" ; } - print "Trapped error: $@\n" + print "Trapped error: $@\n" if $@ ; } @@ -388,8 +415,8 @@ When C<Call_fred> is executed it will print As control never returns to C<Call_fred>, the C<"back in Call_fred"> string will not get printed. -To work around this problem, you can either upgrade to Perl 5.002 (or -later), or use the G_EVAL flag with I<perl_call_*> as shown below +To work around this problem, you can either upgrade to Perl 5.002 or +higher, or use the G_EVAL flag with I<perl_call_*> as shown below void Call_fred() @@ -408,7 +435,7 @@ Enough of the definition talk, let's have a few examples. Perl provides many macros to assist in accessing the Perl stack. Wherever possible, these macros should always be used when interfacing -to Perl internals. Hopefully this should make the code less vulnerable +to Perl internals. We hope this should make the code less vulnerable to any changes made to Perl in the future. Another point worth noting is that in the first series of examples I @@ -458,7 +485,7 @@ specified. =item 3. We aren't interested in anything returned from I<PrintUID>, so -G_DISCARD is specified. Even if I<PrintUID> was changed to actually +G_DISCARD is specified. Even if I<PrintUID> was changed to return some value(s), having specified G_DISCARD will mean that they will be wiped by the time control returns from I<perl_call_pv>. @@ -529,15 +556,15 @@ have used this macro. The exception to this rule is if you are calling a Perl subroutine directly from an XSUB function. In this case it is not necessary to -explicitly use the C<dSP> macro - it will be declared for you +use the C<dSP> macro explicitly - it will be declared for you automatically. =item 3. Any parameters to be pushed onto the stack should be bracketed by the C<PUSHMARK> and C<PUTBACK> macros. The purpose of these two macros, in -this context, is to automatically count the number of parameters you -are pushing. Then whenever Perl is creating the C<@_> array for the +this context, is to count the number of parameters you are +pushing automatically. Then whenever Perl is creating the C<@_> array for the subroutine, it knows how big to make it. The C<PUSHMARK> macro tells Perl to make a mental note of the current @@ -555,7 +582,7 @@ local copy, I<not> the global copy. =item 4. -The only flag specified this time is G_DISCARD. Since we are passing 2 +The only flag specified this time is G_DISCARD. Because we are passing 2 parameters to the Perl subroutine this time, we have not specified G_NOARGS. @@ -565,7 +592,7 @@ Next, we come to XPUSHs. This is where the parameters actually get pushed onto the stack. In this case we are pushing a string and an integer. -See the section L<perlguts/"XSUB'S and the Argument Stack"> for details +See L<perlguts/"XSUBs and the Argument Stack"> for details on how the XPUSH macros work. =item 6. @@ -580,7 +607,7 @@ function. Now for an example of dealing with the items returned from a Perl subroutine. -Here is a Perl subroutine, I<Adder>, which takes 2 integer parameters +Here is a Perl subroutine, I<Adder>, that takes 2 integer parameters and simply returns their sum. sub Adder @@ -589,7 +616,7 @@ and simply returns their sum. $a + $b ; } -Since we are now concerned with the return value from I<Adder>, the C +Because we are now concerned with the return value from I<Adder>, the C function required to call it is now a bit more complex. static void @@ -626,7 +653,7 @@ Points to note this time are =over 5 -=item 1. +=item 1. The only flag specified this time was G_SCALAR. That means the C<@_> array will be created and that the value returned by I<Adder> will @@ -654,7 +681,7 @@ temporaries we create. This means that the temporaries we get rid of will be limited to those which were created after these calls. The C<FREETMPS>/C<LEAVE> pair will get rid of any values returned by -the Perl subroutine, plus it will also dump the mortal SV's we have +the Perl subroutine, plus it will also dump the mortal SVs we have created. Having C<ENTER>/C<SAVETMPS> at the beginning of the code makes sure that no other mortals are destroyed. @@ -668,11 +695,11 @@ an alternative to using these macros. The purpose of the macro C<SPAGAIN> is to refresh the local copy of the stack pointer. This is necessary because it is possible that the memory -allocated to the Perl stack has been re-allocated whilst in the +allocated to the Perl stack has been reallocated whilst in the I<perl_call_pv> call. If you are making use of the Perl stack pointer in your code you must -always refresh the your local copy using SPAGAIN whenever you make use +always refresh the local copy using SPAGAIN whenever you make use of the I<perl_call_*> functions or any other Perl internal function. =item 4. @@ -685,7 +712,7 @@ Expecting a single value is not quite the same as knowing that there will be one. If someone modified I<Adder> to return a list and we didn't check for that possibility and take appropriate action the Perl stack would end up in an inconsistent state. That is something you -I<really> don't want to ever happen. +I<really> don't want to happen ever. =item 5. @@ -834,7 +861,7 @@ then the output will be Value 1 = 3 In this case the main point to note is that only the last item in the -list returned from the subroutine, I<Adder> actually made it back to +list is returned from the subroutine, I<AddSubtract> actually made it back to I<call_AddSubScalar>. @@ -977,7 +1004,7 @@ I<Subtract>. =item 2. -The code +The code if (SvTRUE(GvSV(errgv))) { @@ -998,7 +1025,7 @@ refers to the C equivalent of C<$@>. Note that the stack is popped using C<POPs> in the block where C<SvTRUE(GvSV(errgv))> is true. This is necessary because whenever a I<perl_call_*> function invoked with G_EVAL|G_SCALAR returns an error, -the top of the stack holds the value I<undef>. Since we want the +the top of the stack holds the value I<undef>. Because we want the program to continue after detecting this error, it is essential that the stack is tidied up by removing the I<undef>. @@ -1012,7 +1039,7 @@ version of the call_Subtract example above inside a destructor: package Foo; sub new { bless {}, $_[0] } - sub Subtract { + sub Subtract { my($a,$b) = @_; die "death can be fatal" if $a < $b ; $a - $b; @@ -1026,7 +1053,7 @@ version of the call_Subtract example above inside a destructor: This example will fail to recognize that an error occurred inside the C<eval {}>. Here's why: the call_Subtract code got executed while perl -was cleaning up temporaries when exiting the eval block, and since +was cleaning up temporaries when exiting the eval block, and because call_Subtract is implemented with I<perl_call_pv> using the G_EVAL flag, it promptly reset C<$@>. This results in the failure of the outermost test for C<$@>, and thereby the failure of the error trap. @@ -1063,8 +1090,8 @@ Here is a snippet of XSUB which defines I<CallSubPV>. PUSHMARK(sp) ; perl_call_pv(name, G_DISCARD|G_NOARGS) ; -That is fine as far as it goes. The thing is, the Perl subroutine -can be specified only as a string. For Perl 4 this was adequate, +That is fine as far as it goes. The thing is, the Perl subroutine +can be specified as only a string. For Perl 4 this was adequate, but Perl 5 allows references to subroutines and anonymous subroutines. This is where I<perl_call_sv> is useful. @@ -1079,7 +1106,7 @@ I<perl_call_sv> instead of I<perl_call_pv>. PUSHMARK(sp) ; perl_call_sv(name, G_DISCARD|G_NOARGS) ; -Since we are using an SV to call I<fred> the following can all be used +Because we are using an SV to call I<fred> the following can all be used CallSubSV("fred") ; CallSubSV(\&fred) ; @@ -1092,7 +1119,7 @@ how you can specify the Perl subroutine. You should note that if it is necessary to store the SV (C<name> in the example above) which corresponds to the Perl subroutine so that it can -be used later in the program, it not enough to just store a copy of the +be used later in the program, it not enough just to store a copy of the pointer to the SV. Say the code above had been like this static SV * rememberSub ; @@ -1121,29 +1148,29 @@ particularly true for these cases CallSavedSub1() ; By the time each of the C<SaveSub1> statements above have been executed, -the SV*'s which corresponded to the parameters will no longer exist. +the SV*s which corresponded to the parameters will no longer exist. Expect an error message from Perl of the form Can't use an undefined value as a subroutine reference at ... for each of the C<CallSavedSub1> lines. -Similarly, with this code +Similarly, with this code $ref = \&fred ; SaveSub1($ref) ; $ref = 47 ; CallSavedSub1() ; -you can expect one of these messages (which you actually get is dependant on -the version of Perl you are using) +you can expect one of these messages (which you actually get is dependent on +the version of Perl you are using) Not a CODE reference at ... Undefined subroutine &main::47 called ... The variable C<$ref> may have referred to the subroutine C<fred> whenever the call to C<SaveSub1> was made but by the time -C<CallSavedSub1> gets called it now holds the number C<47>. Since we +C<CallSavedSub1> gets called it now holds the number C<47>. Because we saved only a pointer to the original SV in C<SaveSub1>, any changes to C<$ref> will be tracked by the pointer C<rememberSub>. This means that whenever C<CallSavedSub1> gets called, it will attempt to execute the @@ -1159,7 +1186,7 @@ A similar but more subtle problem is illustrated with this code CallSavedSub1() ; This time whenever C<CallSavedSub1> get called it will execute the Perl -subroutine C<joe> (assuming it exists) rather than C<fred> as was +subroutine C<joe> (assuming it exists) rather than C<fred> as was originally requested in the call to C<SaveSub1>. To get around these problems it is necessary to take a full copy of the @@ -1185,7 +1212,7 @@ SV. The code below shows C<SaveSub2> modified to do that PUSHMARK(sp) ; perl_call_sv(keepSub, G_DISCARD|G_NOARGS) ; -In order to avoid creating a new SV every time C<SaveSub2> is called, +To avoid creating a new SV every time C<SaveSub2> is called, the function first checks to see if it has been called before. If not, then space for a new SV is allocated and the reference to the Perl subroutine, C<name> is copied to the variable C<keepSub> in one @@ -1247,9 +1274,9 @@ Consider the following Perl code } } -It just implements a very simple class to manage an array. Apart from +It implements just a very simple class to manage an array. Apart from the constructor, C<new>, it declares methods, one static and one -virtual. The static method, C<PrintID>, simply prints out the class +virtual. The static method, C<PrintID>, prints out simply the class name and a version number. The virtual method, C<Display>, prints out a single element of the array. Here is an all Perl example of using it. @@ -1260,7 +1287,7 @@ single element of the array. Here is an all Perl example of using it. will print 1: green - This is Class Mine version 1.0 + This is Class Mine version 1.0 Calling a Perl method from C is fairly straightforward. The following things are required @@ -1320,33 +1347,38 @@ The only thing to note is that in both the static and virtual methods, the method name is not passed via the stack - it is used as the first parameter to I<perl_call_method>. -=head2 Using GIMME +=head2 Using GIMME_V -Here is a trivial XSUB which prints the context in which it is +Here is a trivial XSUB which prints the context in which it is currently executing. void PrintContext() CODE: - if (GIMME == G_SCALAR) + I32 gimme = GIMME_V; + if (gimme == G_VOID) + printf ("Context is Void\n") ; + else if (gimme == G_SCALAR) printf ("Context is Scalar\n") ; else printf ("Context is Array\n") ; and here is some Perl to test it + PrintContext ; $a = PrintContext ; @a = PrintContext ; The output from that will be + Context is Void Context is Scalar Context is Array =head2 Using Perl to dispose of temporaries In the examples given to date, any temporaries created in the callback -(i.e. parameters passed on the stack to the I<perl_call_*> function or +(i.e., parameters passed on the stack to the I<perl_call_*> function or values returned via the stack) have been freed by one of these methods =over 5 @@ -1418,30 +1450,30 @@ will be more like this perl --> XSUB --> event handler ... - event handler --> perl_call --> perl + event handler --> perl_call --> perl | - event handler <-- perl_call --<--+ + event handler <-- perl_call <----+ ... - event handler --> perl_call --> perl + event handler --> perl_call --> perl | - event handler <-- perl_call --<--+ + event handler <-- perl_call <----+ ... - event handler --> perl_call --> perl + event handler --> perl_call --> perl | - event handler <-- perl_call --<--+ + event handler <-- perl_call <----+ In this case the flow of control can consist of only the repeated sequence event handler --> perl_call --> perl -for the practically the complete duration of the program. This means -that control may I<never> drop back to the surrounding scope in Perl at -the extreme left. +for practically the complete duration of the program. This means that +control may I<never> drop back to the surrounding scope in Perl at the +extreme left. So what is the big problem? Well, if you are expecting Perl to tidy up those temporaries for you, you might be in for a long wait. For Perl -to actually dispose of your temporaries, control must drop back to the +to dispose of your temporaries, control must drop back to the enclosing scope at some stage. In the event driven scenario that may never happen. This means that as time goes on, your program will create more and more temporaries, none of which will ever be freed. As @@ -1450,7 +1482,7 @@ eventually consume all the available memory in your system - kapow! So here is the bottom line - if you are sure that control will revert back to the enclosing Perl scope fairly quickly after the end of your -callback, then it isn't absolutely necessary to explicitly dispose of +callback, then it isn't absolutely necessary to dispose explicitly of any temporaries you may have created. Mind you, if you are at all uncertain about what to do, it doesn't do any harm to tidy up anyway. @@ -1524,7 +1556,7 @@ registers, C<pcb1>, might look like this The mapping between the C callback and the Perl equivalent is stored in the global variable C<callback>. -This will be adequate if you ever need to have only 1 callback +This will be adequate if you ever need to have only one callback registered at any time. An example could be an error handler like the code sketched out above. Remember though, repeated calls to C<register_fatal> will replace the previously registered callback @@ -1553,7 +1585,7 @@ This may expect the C I<ProcessRead> function of this form int fh ; char * buffer ; { - ... + ... } To provide a Perl interface to this library we need to be able to map @@ -1646,7 +1678,7 @@ the C<buffer> parameter like this Without the file handle there is no straightforward way to map from the C callback to the Perl subroutine. -In this case a possible way around this problem is to pre-define a +In this case a possible way around this problem is to predefine a series of C functions to act as the interface to Perl, thus #define MAX_CB 3 @@ -1761,7 +1793,7 @@ series of C functions to act as the interface to Perl, thus asynch_close(fh) ; -In this case the functions C<fn1>, C<fn2> and C<fn3> are used to +In this case the functions C<fn1>, C<fn2>, and C<fn3> are used to remember the Perl subroutine to be called. Each of the functions holds a separate hard-wired index which is used in the function C<Pcb> to access the C<Map> array and actually call the Perl subroutine. @@ -1774,7 +1806,7 @@ example. Secondly, there is a hard-wired limit (in this case 3) to the number of callbacks that can exist simultaneously. The only way to increase the limit is by modifying the code to add more functions and then -re-compiling. None the less, as long as the number of functions is +recompiling. None the less, as long as the number of functions is chosen with some care, it is still a workable solution and in some cases is the only one available. @@ -1878,18 +1910,37 @@ sets the stack up so that we can use the C<ST> macro. Unlike the original coding of this example, the returned values are not accessed in reverse order. So C<ST(0)> refers to the -first value returned by the Perl subroutine and C<ST(count-1)> +first value returned by the Perl subroutine and C<ST(count-1)> refers to the last. =back +=head2 Creating and calling an anonymous subroutine in C + +As we've already shown, L<perl_call_sv> can be used to invoke an +anonymous subroutine. However, our example showed how Perl script +invoking an XSUB to preform this operation. Let's see how it can be +done inside our C code: + + ... + + SV *cvrv = perl_eval_pv("sub { print 'You will not find me cluttering any namespace!' }", TRUE); + + ... + + perl_call_sv(cvrv, G_VOID|G_NOARGS); + +L<perlguts/perl_eval_pv> is used to compile the anonymous subroutine, which +will be the return value as well. Once this code reference is in hand, it +can be mixed in with all the previous examples we've shown. + =head1 SEE ALSO L<perlxs>, L<perlguts>, L<perlembed> =head1 AUTHOR -Paul Marquess <pmarquess@bfsec.bt.co.uk> +Paul Marquess <F<pmarquess@bfsec.bt.co.uk>> Special thanks to the following people who assisted in the creation of the document. @@ -1899,4 +1950,4 @@ and Larry Wall. =head1 DATE -Version 1.2, 16th Jan 1996 +Version 1.3, 14th Apr 1997 diff --git a/pod/perldata.pod b/pod/perldata.pod index 4b6e433515..38d5e9380d 100644 --- a/pod/perldata.pod +++ b/pod/perldata.pod @@ -11,6 +11,28 @@ associative arrays of scalars, known as "hashes". Normal arrays are indexed by number, starting with 0. (Negative subscripts count from the end.) Hash arrays are indexed by string. +Values are usually referred to by name (or through a named reference). +The first character of the name tells you to what sort of data +structure it refers. The rest of the name tells you the particular +value to which it refers. Most often, it consists of a single +I<identifier>, that is, a string beginning with a letter or underscore, +and containing letters, underscores, and digits. In some cases, it +may be a chain of identifiers, separated by C<::> (or by C<'>, but +that's deprecated); all but the last are interpreted as names of +packages, to locate the namespace in which to look +up the final identifier (see L<perlmod/Packages> for details). +It's possible to substitute for a simple identifier an expression +which produces a reference to the value at runtime; this is +described in more detail below, and in L<perlref>. + +There are also special variables whose names don't follow these +rules, so that they don't accidentally collide with one of your +normal variables. Strings which match parenthesized parts of a +regular expression are saved under names containing only digits after +the C<$> (see L<perlop> and L<perlre>). In addition, several special +variables which provide windows into the inner working of Perl have names +containing punctuation characters (see L<perlvar>). + Scalar values are always named with '$', even when referring to a scalar that is part of an array. It works like the English word "the". Thus we have: @@ -43,14 +65,14 @@ This means that $foo and @foo are two different variables. It also means that C<$foo[1]> is a part of @foo, not a part of $foo. This may seem a bit weird, but that's okay, because it is weird. -Since variable and array references always start with '$', '@', or '%', +Because variable and array references always start with '$', '@', or '%', the "reserved" words aren't in fact reserved with respect to variable names. (They ARE reserved with respect to labels and filehandles, however, which don't have an initial special character. You can't have a filehandle named "log", for instance. Hint: you could say C<open(LOG,'logfile')> rather than C<open(log,'logfile')>. Using uppercase filehandles also improves readability and protects you from conflict -with future reserved words.) Case I<IS> significant--"FOO", "Foo" and +with future reserved words.) Case I<IS> significant--"FOO", "Foo", and "foo" are all different names. Names that start with a letter or underscore may also contain digits and underscores. @@ -58,9 +80,9 @@ It is possible to replace such an alphanumeric name with an expression that returns a reference to an object of that type. For a description of this, see L<perlref>. -Names that start with a digit may only contain more digits. Names +Names that start with a digit may contain only more digits. Names which do not start with a letter, underscore, or digit are limited to -one character, e.g. C<$%> or C<$$>. (Most of these one character names +one character, e.g., C<$%> or C<$$>. (Most of these one character names have a predefined significance to Perl. For instance, C<$$> is the current process id.) @@ -81,14 +103,14 @@ list context to each of its arguments. For example, if you say int( <STDIN> ) -the integer operation provides a scalar context for the <STDIN> +the integer operation provides a scalar context for the E<lt>STDINE<gt> operator, which responds by reading one line from STDIN and passing it back to the integer operation, which will then find the integer value of that line and return that. If, on the other hand, you say sort( <STDIN> ) -then the sort operation provides a list context for <STDIN>, which +then the sort operation provides a list context for E<lt>STDINE<gt>, which will proceed to read every line available up to the end of file, and pass that list of lines back to the sort routine, which will then sort those lines and return them as a list to whatever the context @@ -113,7 +135,7 @@ Scalar variables may contain various kinds of singular data, such as numbers, strings, and references. In general, conversion from one form to another is transparent. (A scalar may not contain multiple values, but may contain a reference to an array or hash containing multiple values.) -Because of the automatic conversion of scalars, operations and functions +Because of the automatic conversion of scalars, operations, and functions that return scalars don't need to care (and, in fact, can't care) whether the context is looking for a string or a number. @@ -122,13 +144,13 @@ declare a scalar variable to be of type "string", or of type "number", or type "filehandle", or anything else. Perl is a contextually polymorphic language whose scalars can be strings, numbers, or references (which includes objects). While strings and numbers are considered pretty -much same thing for nearly all purposes, references are strongly-typed -uncastable pointers with built-in reference-counting and destructor +much the same thing for nearly all purposes, references are strongly-typed +uncastable pointers with builtin reference-counting and destructor invocation. A scalar value is interpreted as TRUE in the Boolean sense if it is not the null string or the number 0 (or its string equivalent, "0"). The -Boolean context is just a special kind of scalar context. +Boolean context is just a special kind of scalar context. There are actually two varieties of null scalars: defined and undefined. Undefined null scalars are returned when there is no real @@ -138,14 +160,14 @@ array. An undefined null scalar may become defined the first time you use it as if it were defined, but prior to that you can use the defined() operator to determine whether the value is defined or not. -To find out whether a given string is a valid non-zero number, it's usually +To find out whether a given string is a valid nonzero number, it's usually enough to test it against both numeric 0 and also lexical "0" (although this will cause B<-w> noises). That's because strings that aren't -numbers count as 0, just as the do in I<awk>: +numbers count as 0, just as they do in B<awk>: if ($str == 0 && $str ne "0") { warn "That doesn't look like a number"; - } + } That's usually preferable because otherwise you won't treat IEEE notations like C<NaN> or C<Infinity> properly. At other times you might prefer to @@ -154,21 +176,21 @@ for details on regular expressions. warn "has nondigits" if /\D/; warn "not a whole number" unless /^\d+$/; - warn "not an integer" unless /^[+-]?\d+$/ - warn "not a decimal number" unless /^[+-]?\d+\.?\d*$/ - warn "not a C float" + warn "not an integer" unless /^[+-]?\d+$/ + warn "not a decimal number" unless /^[+-]?\d+\.?\d*$/ + warn "not a C float" unless /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/; The length of an array is a scalar value. You may find the length of array @days by evaluating C<$#days>, as in B<csh>. (Actually, it's not -the length of the array, it's the subscript of the last element, since +the length of the array, it's the subscript of the last element, because there is (ordinarily) a 0th element.) Assigning to C<$#days> changes the length of the array. Shortening an array by this method destroys intervening values. Lengthening an array that was previously shortened I<NO LONGER> recovers the values that were in those elements. (It used to -in Perl 4, but we had to break this make to make sure destructors were +in Perl 4, but we had to break this to make sure destructors were called when expected.) You can also gain some measure of efficiency by -preextending an array that is going to get big. (You can also extend +pre-extending an array that is going to get big. (You can also extend an array by assigning to an element that is off the end of the array.) You can truncate an array down to nothing by assigning the null list () to it. The following are equivalent: @@ -182,10 +204,10 @@ last value, like the C comma operator.) The following is always true: scalar(@whatever) == $#whatever - $[ + 1; -Version 5 of Perl changed the semantics of $[: files that don't set -the value of $[ no longer need to worry about whether another -file changed its value. (In other words, use of $[ is deprecated.) -So in general you can just assume that +Version 5 of Perl changed the semantics of C<$[>: files that don't set +the value of C<$[> no longer need to worry about whether another +file changed its value. (In other words, use of C<$[> is deprecated.) +So in general you can assume that scalar(@whatever) == $#whatever + 1; @@ -198,7 +220,7 @@ If you evaluate a hash in a scalar context, it returns a value which is true if and only if the hash contains any key/value pairs. (If there are any key/value pairs, the value returned is a string consisting of the number of used buckets and the number of allocated buckets, separated -by a slash. This is pretty much only useful to find out whether Perl's +by a slash. This is pretty much useful only to find out whether Perl's (compiled in) hashing algorithm is performing poorly on your data set. For example, you stick 10,000 things in a hash, but evaluating %HASH in scalar context reveals "1/16", which means only one out of sixteen buckets @@ -217,27 +239,33 @@ integer formats: 0377 # octal 4_294_967_296 # underline for legibility -String literals are usually delimited by either single or double quotes. They -work much like shell quotes: double-quoted string literals are subject -to backslash and variable substitution; single-quoted strings are not -(except for "C<\'>" and "C<\\>"). The usual Unix backslash rules apply for making -characters such as newline, tab, etc., as well as some more exotic -forms. See L<perlop/qq> for a list. +String literals are usually delimited by either single or double +quotes. They work much like shell quotes: double-quoted string +literals are subject to backslash and variable substitution; +single-quoted strings are not (except for "C<\'>" and "C<\\>"). +The usual Unix backslash rules apply for making characters such as +newline, tab, etc., as well as some more exotic forms. See +L<perlop/Quote and Quotelike Operators> for a list. + +Octal or hex representations in string literals (e.g. '0xffff') are not +automatically converted to their integer representation. The hex() and +oct() functions make these conversions for you. See L<perlfunc/hex> and +L<perlfunc/oct> for more details. -You can also embed newlines directly in your strings, i.e. they can end +You can also embed newlines directly in your strings, i.e., they can end on a different line than they begin. This is nice, but if you forget your trailing quote, the error will not be reported until Perl finds another line containing the quote character, which may be much further on in the script. Variable substitution inside strings is limited to scalar variables, arrays, and array slices. (In other words, -identifiers beginning with $ or @, followed by an optional bracketed +names beginning with $ or @, followed by an optional bracketed expression as a subscript.) The following code segment prints out "The -price is $100." +price is $Z<>100." $Price = '$100'; # not interpreted print "The price is $Price.\n"; # interpreted -As in some shells, you can put curly brackets around the identifier to +As in some shells, you can put curly brackets around the name to delimit it from following alphanumerics. In fact, an identifier within such curlies is forced to be a string, as is any single identifier within a hash subscript. Our earlier example, @@ -253,19 +281,25 @@ in the subscript will be interpreted as an expression. Note that a single-quoted string must be separated from a preceding word by a -space, since single quote is a valid (though deprecated) character in -an identifier (see L<perlmod/Packages>). - -Two special literals are __LINE__ and __FILE__, which represent the -current line number and filename at that point in your program. They -may only be used as separate tokens; they will not be interpolated into -strings. In addition, the token __END__ may be used to indicate the -logical end of the script before the actual end of file. Any following -text is ignored, but may be read via the DATA filehandle. (The DATA -filehandle may read data only from the main script, but not from any -required file or evaluated string.) The two control characters ^D and -^Z are synonyms for __END__ (or __DATA__ in a module; see L<SelfLoader> for -details on __DATA__). +space, because single quote is a valid (though deprecated) character in +a variable name (see L<perlmod/Packages>). + +Three special literals are __FILE__, __LINE__, and __PACKAGE__, which +represent the current filename, line number, and package name at that +point in your program. They may be used only as separate tokens; they +will not be interpolated into strings. If there is no current package +(due to a C<package;> directive), __PACKAGE__ is the undefined value. + +The tokens __END__ and __DATA__ may be used to indicate the logical end +of the script before the actual end of file. Any following text is +ignored, but may be read via a DATA filehandle: main::DATA for __END__, +or PACKNAME::DATA (where PACKNAME is the current package) for __DATA__. +The two control characters ^D and ^Z are synonyms for __END__ (or +__DATA__ in a module). See L<SelfLoader> for more description of +__DATA__, and an example of its use. Note that you cannot read from the +DATA filehandle in a BEGIN block: the BEGIN block is executed as soon as +it is seen (during compilation), at which point the corresponding +__DATA__ (or __END__) token has not yet been seen. A word that has no other interpretation in the grammar will be treated as if it were a quoted string. These are known as @@ -279,12 +313,12 @@ say then any bareword that would NOT be interpreted as a subroutine call produces a compile-time error instead. The restriction lasts to the -end of the enclosing block. An inner block may countermand this +end of the enclosing block. An inner block may countermand this by saying C<no strict 'subs'>. Array variables are interpolated into double-quoted strings by joining all the elements of the array with the delimiter specified in the C<$"> -variable ($LIST_SEPARATOR in English), space by default. The following +variable (C<$LIST_SEPARATOR> in English), space by default. The following are equivalent: $temp = join($",@ARGV); @@ -302,19 +336,20 @@ and is almost always right. If it does guess wrong, or if you're just plain paranoid, you can force the correct interpretation with curly brackets as above. -A line-oriented form of quoting is based on the shell "here-doc" syntax. -Following a C<E<lt>E<lt>> you specify a string to terminate the quoted material, -and all lines following the current line down to the terminating string -are the value of the item. The terminating string may be either an -identifier (a word), or some quoted text. If quoted, the type of -quotes you use determines the treatment of the text, just as in regular -quoting. An unquoted identifier works like double quotes. There must -be no space between the C<E<lt>E<lt>> and the identifier. (If you put a space it -will be treated as a null identifier, which is valid, and matches the -first blank line.) The terminating string must appear by itself -(unquoted and with no surrounding whitespace) on the terminating line. - - print <<EOF; +A line-oriented form of quoting is based on the shell "here-doc" +syntax. Following a C<E<lt>E<lt>> you specify a string to terminate +the quoted material, and all lines following the current line down to +the terminating string are the value of the item. The terminating +string may be either an identifier (a word), or some quoted text. If +quoted, the type of quotes you use determines the treatment of the +text, just as in regular quoting. An unquoted identifier works like +double quotes. There must be no space between the C<E<lt>E<lt>> and +the identifier. (If you put a space it will be treated as a null +identifier, which is valid, and matches the first empty line.) The +terminating string must appear by itself (unquoted and with no +surrounding whitespace) on the terminating line. + + print <<EOF; The price is $Price. EOF @@ -337,11 +372,11 @@ first blank line.) The terminating string must appear by itself Here's a line or two. THIS - and here another. + and here's another. THAT -Just don't forget that you have to put a semicolon on the end -to finish the statement, as Perl doesn't know you're not going to +Just don't forget that you have to put a semicolon on the end +to finish the statement, as Perl doesn't know you're not going to try to do this: print <<ABC @@ -369,12 +404,12 @@ assigns the entire list value to array foo, but assigns the value of variable bar to variable foo. Note that the value of an actual array in a scalar context is the length of the array; the -following assigns to $foo the value 3: +following assigns the value 3 to $foo: @foo = ('cc', '-E', $bar); $foo = @foo; # $foo gets 3 -You may have an optional comma before the closing parenthesis of an +You may have an optional comma before the closing parenthesis of a list literal, so that you can say: @foo = ( @@ -402,13 +437,13 @@ interpolating an array with no elements is the same as if no array had been interpolated at that point. A list value may also be subscripted like a normal array. You must -put the list in parentheses to avoid ambiguity. Examples: +put the list in parentheses to avoid ambiguity. For example: # Stat returns list value. $time = (stat($file))[8]; # SYNTAX ERROR HERE. - $time = stat($file)[8]; # OOPS, FORGOT PARENS + $time = stat($file)[8]; # OOPS, FORGOT PARENTHESES # Find a hex digit. $hexdigit = ('a','b','c','d','e','f')[$digit-10]; @@ -416,6 +451,11 @@ put the list in parentheses to avoid ambiguity. Examples: # A "reverse comma operator". return (pop(@foo),pop(@foo))[0]; +You may assign to C<undef> in a list. This is useful for throwing +away some of the return values of a function: + + ($dev, $ino, undef, undef, $uid, $gid) = stat($file); + Lists may be assigned to if and only if each element of the list is legal to assign to: @@ -430,7 +470,7 @@ produced by the expression on the right side of the assignment: $x = (($foo,$bar) = f()); # set $x to f()'s return count This is very handy when you want to do a list assignment in a Boolean -context, since most list functions return a null list when finished, +context, because most list functions return a null list when finished, which when assigned produces a 0, which is interpreted as FALSE. The final element may be an array or a hash: @@ -457,8 +497,9 @@ key/value pairs. That's why it's good to use references sometimes. It is often more readable to use the C<=E<gt>> operator between key/value pairs. The C<=E<gt>> operator is mostly just a more visually distinctive -synonym for a comma, but it also quotes its left-hand operand, which makes -it nice for initializing hashes: +synonym for a comma, but it also arranges for its left-hand operand to be +interpreted as a string, if it's a bareword which would be a legal identifier. +This makes it nice for initializing hashes: %map = ( red => 0x00f, @@ -476,7 +517,7 @@ or for initializing hash references to be used as records: or for using call-by-named-parameter to complicated functions: - $field = $query->radio_group( + $field = $query->radio_group( name => 'group_name', values => ['eenie','meenie','minie'], default => 'meenie', @@ -488,17 +529,19 @@ Note that just because a hash is initialized in that order doesn't mean that it comes out in that order. See L<perlfunc/sort> for examples of how to arrange for an output ordering. -=head2 Typeglobs and FileHandles +=head2 Typeglobs and Filehandles Perl uses an internal type called a I<typeglob> to hold an entire symbol table entry. The type prefix of a typeglob is a C<*>, because -it represents all types. This used to be the preferred way to +it represents all types. This used to be the preferred way to pass arrays and hashes by reference into a function, but now that -we have real references, this is seldom needed. +we have real references, this is seldom needed. It also used to be the +preferred way to pass filehandles into a function, but now +that we have the *foo{THING} notation it isn't often needed for that, +either. It is still needed to pass new filehandles into functions +(*HANDLE{IO} only works if HANDLE has already been used). -One place where you still use typeglobs (or references thereto) -is for passing or storing filehandles. If you want to save away -a filehandle, do it this way: +If you need to use a typeglob to save away a filehandle, do it this way: $fh = *STDOUT; @@ -506,16 +549,18 @@ or perhaps as a real reference, like this: $fh = \*STDOUT; -This is also the way to create a local filehandle. For example: +This is also a way to create a local filehandle. For example: sub newopen { my $path = shift; local *FH; # not my! open (FH, $path) || return undef; - return \*FH; + return *FH; } $fh = newopen('/etc/passwd'); -See L<perlref>, L<perlsub>, and L<perlmod/"Symbols Tables"> for more -discussion on typeglobs. See L<perlfunc/open> for other ways of -generating filehandles. +Another way to create local filehandles is with IO::Handle and its ilk, +see the bottom of L<perlfunc/open()>. + +See L<perlref>, L<perlsub>, and L<perlmod/"Symbol Tables"> for more +discussion on typeglobs. diff --git a/pod/perldebug.pod b/pod/perldebug.pod index 17fe25926f..94ece44a6f 100644 --- a/pod/perldebug.pod +++ b/pod/perldebug.pod @@ -6,178 +6,524 @@ perldebug - Perl debugging First of all, have you tried using the B<-w> switch? -=head2 Debugging - -If you invoke Perl with a B<-d> switch, your script will be run under the -debugger. However, the Perl debugger is not a separate program as it is -in a C environment. Instead, the B<-d> flag tells the compiler to insert -source information into the pseudocode it's about to hand to the -interpreter. (That means your code must compile correctly for the -debugger to work on it.) Then when the interpreter starts up, it -pre-loads a Perl library file containing the debugger itself. The program -will halt before the first executable statement (but see below) and ask -you for one of the following commands: +=head1 The Perl Debugger + +If you invoke Perl with the B<-d> switch, your script runs under the +Perl source debugger. This works like an interactive Perl +environment, prompting for debugger commands that let you examine +source code, set breakpoints, get stack backtraces, change the values of +variables, etc. This is so convenient that you often fire up +the debugger all by itself just to test out Perl constructs +interactively to see what they do. For example: + + perl -d -e 42 + +In Perl, the debugger is not a separate program as it usually is in the +typical compiled environment. Instead, the B<-d> flag tells the compiler +to insert source information into the parse trees it's about to hand off +to the interpreter. That means your code must first compile correctly +for the debugger to work on it. Then when the interpreter starts up, it +preloads a Perl library file containing the debugger itself. + +The program will halt I<right before> the first run-time executable +statement (but see below regarding compile-time statements) and ask you +to enter a debugger command. Contrary to popular expectations, whenever +the debugger halts and shows you a line of code, it always displays the +line it's I<about> to execute, rather than the one it has just executed. + +Any command not recognized by the debugger is directly executed +(C<eval>'d) as Perl code in the current package. (The debugger uses the +DB package for its own state information.) + +Leading white space before a command would cause the debugger to think +it's I<NOT> a debugger command but for Perl, so be careful not to do +that. + +=head2 Debugger Commands + +The debugger understands the following commands: =over 12 -=item h +=item h [command] Prints out a help message. +If you supply another debugger command as an argument to the C<h> command, +it prints out the description for just that command. The special +argument of C<h h> produces a more compact help listing, designed to fit +together on one screen. + +If the output the C<h> command (or any command, for that matter) scrolls +past your screen, either precede the command with a leading pipe symbol so +it's run through your pager, as in + + DB> |h + +You may change the pager which is used via C<O pager=...> command. + +=item p expr + +Same as C<print {$DB::OUT} expr> in the current package. In particular, +because this is just Perl's own B<print> function, this means that nested +data structures and objects are not dumped, unlike with the C<x> command. + +The C<DB::OUT> filehandle is opened to F</dev/tty>, regardless of +where STDOUT may be redirected to. + +=item x expr + +Evaluates its expression in list context and dumps out the result +in a pretty-printed fashion. Nested data structures are printed out +recursively, unlike the C<print> function. + +The details of printout are governed by multiple C<O>ptions. + +=item V [pkg [vars]] + +Display all (or some) variables in package (defaulting to the C<main> +package) using a data pretty-printer (hashes show their keys and values so +you see what's what, control characters are made printable, etc.). Make +sure you don't put the type specifier (like C<$>) there, just the symbol +names, like this: + + V DB filename line + +Use C<~pattern> and C<!pattern> for positive and negative regexps. + +Nested data structures are printed out in a legible fashion, unlike +the C<print> function. + +The details of printout are governed by multiple C<O>ptions. + +=item X [vars] + +Same as C<V currentpackage [vars]>. + =item T -Stack trace. -If you do bizarre things to your @_ arguments in a subroutine, the stack -backtrace will not always show the original values. +Produce a stack backtrace. See below for details on its output. -=item s +=item s [expr] Single step. Executes until it reaches the beginning of another -statement. +statement, descending into subroutine calls. If an expression is +supplied that includes function calls, it too will be single-stepped. -=item n +=item n [expr] Next. Executes over subroutine calls, until it reaches the beginning -of the next statement. - -=item f +of the next statement. If an expression is supplied that includes +function calls, those functions will be executed with stops before +each statement. -Finish. Executes statements until it has finished the current -subroutine. +=item E<lt>CRE<gt> -=item c +Repeat last C<n> or C<s> command. -Continue. Executes until the next breakpoint is reached. +=item c [line|sub] -=item c line +Continue, optionally inserting a one-time-only breakpoint +at the specified line or subroutine. -Continue to the specified line. Inserts a one-time-only breakpoint at -the specified line. - -=item <CR> +=item l -Repeat last n or s. +List next window of lines. =item l min+incr -List incr+1 lines starting at min. If min is omitted, starts where -last listing left off. If incr is omitted, previous value of incr is -used. +List C<incr+1> lines starting at C<min>. =item l min-max -List lines in the indicated range. +List lines C<min> through C<max>. C<l -> is synonymous to C<->. =item l line -List just the indicated line. +List a single line. -=item l +=item l subname -List next window. +List first window of lines from subroutine. =item - -List previous window. +List previous window of lines. -=item w line +=item w [line] -List window (a few lines worth of code) around line. +List window (a few lines) around the current line. -=item l subname +=item . + +Return debugger pointer to the last-executed line and +print it out. -List subroutine. If it's a long subroutine it just lists the -beginning. Use "l" to list more. +=item f filename + +Switch to viewing a different file or eval statement. If C<filename> +is not a full filename as found in values of %INC, it is considered as +a regexp. =item /pattern/ -Regular expression search forward in the source code for pattern; the -final / is optional. +Search forwards for pattern; final / is optional. =item ?pattern? -Regular expression search backward in the source code for pattern; the -final ? is optional. +Search backwards for pattern; final ? is optional. =item L -List lines that have breakpoints or actions. +List all breakpoints and actions. -=item S +=item S [[!]pattern] -Lists the names of all subroutines. +List subroutine names [not] matching pattern. =item t -Toggle trace mode on or off. +Toggle trace mode (see also C<AutoTrace> C<O>ption). + +=item t expr + +Trace through execution of expr. For example: + + $ perl -de 42 + Stack dump during die enabled outside of evals. + + Loading DB routines from perl5db.pl patch level 0.94 + Emacs support available. + + Enter h or `h h' for help. + + main::(-e:1): 0 + DB<1> sub foo { 14 } + + DB<2> sub bar { 3 } + + DB<3> t print foo() * bar() + main::((eval 172):3): print foo() + bar(); + main::foo((eval 168):2): + main::bar((eval 170):2): + 42 + +or, with the C<O>ption C<frame=2> set, -=item b line [ condition ] + DB<4> O f=2 + frame = '2' + DB<5> t print foo() * bar() + 3: foo() * bar() + entering main::foo + 2: sub foo { 14 }; + exited main::foo + entering main::bar + 2: sub bar { 3 }; + exited main::bar + 42 + +=item b [line] [condition] Set a breakpoint. If line is omitted, sets a breakpoint on the line -that is about to be executed. If a condition is specified, it is +that is about to be executed. If a condition is specified, it's evaluated each time the statement is reached and a breakpoint is taken -only if the condition is true. Breakpoints may only be set on lines -that begin an executable statement. Conditions don't use C<if>: +only if the condition is true. Breakpoints may be set on only lines +that begin an executable statement. Conditions don't use B<if>: b 237 $x > 30 + b 237 ++$count237 < 11 b 33 /pattern/i -=item b subname [ condition ] +=item b subname [condition] + +Set a breakpoint at the first line of the named subroutine. + +=item b postpone subname [condition] + +Set breakpoint at first line of subroutine after it is compiled. -Set breakpoint at first executable line of subroutine. +=item b load filename -=item d line +Set breakpoint at the first executed line of the file. Filename should +be a full name as found in values of %INC. -Delete breakpoint. If line is omitted, deletes the breakpoint on the -line that is about to be executed. +=item b compile subname + +Sets breakpoint at the first statement executed after the subroutine +is compiled. + +=item d [line] + +Delete a breakpoint at the specified line. If line is omitted, deletes +the breakpoint on the line that is about to be executed. =item D -Delete all breakpoints. +Delete all installed breakpoints. + +=item a [line] command -=item a line command +Set an action to be done before the line is executed. +The sequence of steps taken by the debugger is -Set an action for line. A multiline command may be entered by -backslashing the newlines. This command is Perl code, not another -debugger command. + 1. check for a breakpoint at this line + 2. print the line if necessary (tracing) + 3. do any actions associated with that line + 4. prompt user if at a breakpoint or in single-step + 5. evaluate line + +For example, this will print out C<$foo> every time line +53 is passed: + + a 53 print "DB FOUND $foo\n" =item A -Delete all line actions. +Delete all installed actions. -=item < command +=item O [opt[=val]] [opt"val"] [opt?]... -Set an action to happen before every debugger prompt. A multiline -command may be entered by backslashing the newlines. +Set or query values of options. val defaults to 1. opt can +be abbreviated. Several options can be listed. -=item > command +=over 12 -Set an action to happen after the prompt when you've just given a -command to return to executing the script. A multiline command may be -entered by backslashing the newlines. +=item C<recallCommand>, C<ShellBang> -=item V package [symbols] +The characters used to recall command or spawn shell. By +default, these are both set to C<!>. -Display all (or some) variables in package (defaulting to the C<main> -package) using a data pretty-printer (hashes show their keys and values so -you see what's what, control characters are made printable, etc.). Make -sure you don't put the type specifier (like $) there, just the symbol -names, like this: +=item C<pager> + +Program to use for output of pager-piped commands (those +beginning with a C<|> character.) By default, +C<$ENV{PAGER}> will be used. + +=item C<tkRunning> + +Run Tk while prompting (with ReadLine). + +=item C<signalLevel>, C<warnLevel>, C<dieLevel> + +Level of verbosity. By default the debugger is in a sane verbose mode, +thus it will print backtraces on all the warnings and die-messages +which are going to be printed out, and will print a message when +interesting uncaught signals arrive. + +To disable this behaviour, set these values to 0. If C<dieLevel> is 2, +then the messages which will be caught by surrounding C<eval> are also +printed. + +=item C<AutoTrace> + +Trace mode (similar to C<t> command, but can be put into +C<PERLDB_OPTS>). + +=item C<LineInfo> + +File or pipe to print line number info to. If it is a pipe (say, +C<|visual_perl_db>), then a short, "emacs like" message is used. + +=item C<inhibit_exit> + +If 0, allows I<stepping off> the end of the script. + +=item C<PrintRet> + +affects printing of return value after C<r> command. + +=item C<ornaments> + +affects screen appearance of the command line (see L<Term::Readline>). + +=item C<frame> + +affects printing messages on entry and exit from subroutines. If +C<frame & 2> is false, messages are printed on entry only. (Printing +on exit may be useful if inter(di)spersed with other messages.) + +If C<frame & 4>, arguments to functions are printed as well as the +context and caller info. If C<frame & 8>, overloaded C<stringify> and +C<tie>d C<FETCH> are enabled on the printed arguments. If C<frame & +16>, the return value from the subroutine is printed as well. + +The length at which the argument list is truncated is governed by the +next option: + +=item C<maxTraceLen> + +length at which the argument list is truncated when C<frame> option's +bit 4 is set. + +=back + +The following options affect what happens with C<V>, C<X>, and C<x> +commands: + +=over 12 + +=item C<arrayDepth>, C<hashDepth> + +Print only first N elements ('' for all). + +=item C<compactDump>, C<veryCompact> + +Change style of array and hash dump. If C<compactDump>, short array +may be printed on one line. + +=item C<globPrint> + +Whether to print contents of globs. + +=item C<DumpDBFiles> + +Dump arrays holding debugged files. + +=item C<DumpPackages> + +Dump symbol tables of packages. + +=item C<quote>, C<HighBit>, C<undefPrint> + +Change style of string dump. Default value of C<quote> is C<auto>, one +can enable either double-quotish dump, or single-quotish by setting it +to C<"> or C<'>. By default, characters with high bit set are printed +I<as is>. + +=item C<UsageOnly> + +I<very> rudimentally per-package memory usage dump. Calculates total +size of strings in variables in the package. + +=back + +During startup options are initialized from C<$ENV{PERLDB_OPTS}>. +You can put additional initialization options C<TTY>, C<noTTY>, +C<ReadLine>, and C<NonStop> there. + +Example rc file: + + &parse_options("NonStop=1 LineInfo=db.out AutoTrace"); + +The script will run without human intervention, putting trace information +into the file I<db.out>. (If you interrupt it, you would better reset +C<LineInfo> to something "interactive"!) + +=over 12 + +=item C<TTY> + +The TTY to use for debugging I/O. + +=item C<noTTY> + +If set, goes in C<NonStop> mode, and would not connect to a TTY. If +interrupt (or if control goes to debugger via explicit setting of +$DB::signal or $DB::single from the Perl script), connects to a TTY +specified by the C<TTY> option at startup, or to a TTY found at +runtime using C<Term::Rendezvous> module of your choice. + +This module should implement a method C<new> which returns an object +with two methods: C<IN> and C<OUT>, returning two filehandles to use +for debugging input and output correspondingly. Method C<new> may +inspect an argument which is a value of C<$ENV{PERLDB_NOTTY}> at +startup, or is C<"/tmp/perldbtty$$"> otherwise. + +=item C<ReadLine> + +If false, readline support in debugger is disabled, so you can debug +ReadLine applications. + +=item C<NonStop> + +If set, debugger goes into noninteractive mode until interrupted, or +programmatically by setting $DB::signal or $DB::single. + +=back + +Here's an example of using the C<$ENV{PERLDB_OPTS}> variable: + + $ PERLDB_OPTS="N f=2" perl -d myprogram + +will run the script C<myprogram> without human intervention, printing +out the call tree with entry and exit points. Note that C<N f=2> is +equivalent to C<NonStop=1 frame=2>. Note also that at the moment when +this documentation was written all the options to the debugger could +be uniquely abbreviated by the first letter (with exception of +C<Dump*> options). + +Other examples may include - V DB filename line + $ PERLDB_OPTS="N f A L=listing" perl -d myprogram -=item X [symbols] +- runs script noninteractively, printing info on each entry into a +subroutine and each executed line into the file F<listing>. (If you +interrupt it, you would better reset C<LineInfo> to something +"interactive"!) -Same as as "V" command, but within the current package. + + $ env "PERLDB_OPTS=R=0 TTY=/dev/ttyc" perl -d myprogram + +may be useful for debugging a program which uses C<Term::ReadLine> +itself. Do not forget detach shell from the TTY in the window which +corresponds to F</dev/ttyc>, say, by issuing a command like + + $ sleep 1000000 + +See L<"Debugger Internals"> below for more details. + +=item E<lt> [ command ] + +Set an action (Perl command) to happen before every debugger prompt. +A multi-line command may be entered by backslashing the newlines. If +C<command> is missing, resets the list of actions. + +=item E<lt>E<lt> command + +Add an action (Perl command) to happen before every debugger prompt. +A multi-line command may be entered by backslashing the newlines. + +=item E<gt> command + +Set an action (Perl command) to happen after the prompt when you've +just given a command to return to executing the script. A multi-line +command may be entered by backslashing the newlines. If C<command> is +missing, resets the list of actions. + +=item E<gt>E<gt> command + +Adds an action (Perl command) to happen after the prompt when you've +just given a command to return to executing the script. A multi-line +command may be entered by backslashing the newlines. + +=item { [ command ] + +Set an action (debugger command) to happen before every debugger prompt. +A multi-line command may be entered by backslashing the newlines. If +C<command> is missing, resets the list of actions. + +=item {{ command + +Add an action (debugger command) to happen before every debugger prompt. +A multi-line command may be entered by backslashing the newlines. =item ! number -Redo a debugging command. If number is omitted, redoes the previous -command. +Redo a previous command (default previous command). =item ! -number -Redo the command that was that many commands ago. +Redo number'th-to-last command. + +=item ! pattern + +Redo last command that started with pattern. +See C<O recallCommand>, too. + +=item !! cmd + +Run cmd in a subprocess (reads from DB::IN, writes to DB::OUT) +See C<O shellBang> too. =item H -number @@ -186,51 +532,548 @@ listed. If number is omitted, lists them all. =item q or ^D -Quit. ("quit" doesn't work for this.) +Quit. ("quit" doesn't work for this.) This is the only supported way +to exit the debugger, though typing C<exit> twice may do it too. + +Set an C<O>ption C<inhibit_exit> to 0 if you want to be able to I<step +off> the end the script. You may also need to set C<$finished> to 0 at +some moment if you want to step through global destruction. + +=item R + +Restart the debugger by B<exec>ing a new session. It tries to maintain +your history across this, but internal settings and command line options +may be lost. + +Currently the following setting are preserved: history, breakpoints, +actions, debugger C<O>ptions, and the following command line +options: B<-w>, B<-I>, and B<-e>. + +=item |dbcmd + +Run debugger command, piping DB::OUT to current pager. + +=item ||dbcmd + +Same as C<|dbcmd> but DB::OUT is temporarily B<select>ed as well. +Often used with commands that would otherwise produce long +output, such as + + |V main + +=item = [alias value] + +Define a command alias, like + + = quit q + +or list current aliases. =item command Execute command as a Perl statement. A missing semicolon will be supplied. -=item p expr +=item m expr + +The expression is evaluated, and the methods which may be applied to +the result are listed. + +=item m package + +The methods which may be applied to objects in the C<package> are listed. + +=back + +=head2 Debugger input/output + +=over 8 + +=item Prompt + +The debugger prompt is something like + + DB<8> + +or even + + DB<<17>> + +where that number is the command number, which you'd use to access with +the builtin B<csh>-like history mechanism, e.g., C<!17> would repeat +command number 17. The number of angle brackets indicates the depth of +the debugger. You could get more than one set of brackets, for example, if +you'd already at a breakpoint and then printed out the result of a +function call that itself also has a breakpoint, or you step into an +expression via C<s/n/t expression> command. -Same as C<print DB::OUT expr>. The DB::OUT filehandle is opened to -/dev/tty, regardless of where STDOUT may be redirected to. +=item Multiline commands + +If you want to enter a multi-line command, such as a subroutine +definition with several statements, or a format, you may escape the +newline that would normally end the debugger command with a backslash. +Here's an example: + + DB<1> for (1..4) { \ + cont: print "ok\n"; \ + cont: } + ok + ok + ok + ok + +Note that this business of escaping a newline is specific to interactive +commands typed into the debugger. + +=item Stack backtrace + +Here's an example of what a stack backtrace via C<T> command might +look like: + + $ = main::infested called from file `Ambulation.pm' line 10 + @ = Ambulation::legs(1, 2, 3, 4) called from file `camel_flea' line 7 + $ = main::pests('bactrian', 4) called from file `camel_flea' line 4 + +The left-hand character up there tells whether the function was called +in a scalar or list context (we bet you can tell which is which). What +that says is that you were in the function C<main::infested> when you ran +the stack dump, and that it was called in a scalar context from line 10 +of the file I<Ambulation.pm>, but without any arguments at all, meaning +it was called as C<&infested>. The next stack frame shows that the +function C<Ambulation::legs> was called in a list context from the +I<camel_flea> file with four arguments. The last stack frame shows that +C<main::pests> was called in a scalar context, also from I<camel_flea>, +but from line 4. + +Note that if you execute C<T> command from inside an active C<use> +statement, the backtrace will contain both C<L<perlfunc/require>> +frame and an C<L<perlfunc/eval EXPR>>) frame. + +=item Listing + +Listing given via different flavors of C<l> command looks like this: + + DB<<13>> l + 101: @i{@i} = (); + 102:b @isa{@i,$pack} = () + 103 if(exists $i{$prevpack} || exists $isa{$pack}); + 104 } + 105 + 106 next + 107==> if(exists $isa{$pack}); + 108 + 109:a if ($extra-- > 0) { + 110: %isa = ($pack,1); + +Note that the breakable lines are marked with C<:>, lines with +breakpoints are marked by C<b>, with actions by C<a>, and the +next executed line is marked by C<==E<gt>>. + +=item Frame listing + +When C<frame> option is set, debugger would print entered (and +optionally exited) subroutines in different styles. + +What follows is the start of the listing of + + env "PERLDB_OPTS=f=n N" perl -d -V + +for different values of C<n>: + +=over 4 + +=item 1 + + entering main::BEGIN + entering Config::BEGIN + Package lib/Exporter.pm. + Package lib/Carp.pm. + Package lib/Config.pm. + entering Config::TIEHASH + entering Exporter::import + entering Exporter::export + entering Config::myconfig + entering Config::FETCH + entering Config::FETCH + entering Config::FETCH + entering Config::FETCH + +=item 2 + + entering main::BEGIN + entering Config::BEGIN + Package lib/Exporter.pm. + Package lib/Carp.pm. + exited Config::BEGIN + Package lib/Config.pm. + entering Config::TIEHASH + exited Config::TIEHASH + entering Exporter::import + entering Exporter::export + exited Exporter::export + exited Exporter::import + exited main::BEGIN + entering Config::myconfig + entering Config::FETCH + exited Config::FETCH + entering Config::FETCH + exited Config::FETCH + entering Config::FETCH + +=item 4 + + in $=main::BEGIN() from /dev/nul:0 + in $=Config::BEGIN() from lib/Config.pm:2 + Package lib/Exporter.pm. + Package lib/Carp.pm. + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:644 + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from li + in @=Config::myconfig() from /dev/nul:0 + in $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'PATCHLEVEL') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'SUBVERSION') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'osname') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'osvers') from lib/Config.pm:574 + +=item 6 + + in $=main::BEGIN() from /dev/nul:0 + in $=Config::BEGIN() from lib/Config.pm:2 + Package lib/Exporter.pm. + Package lib/Carp.pm. + out $=Config::BEGIN() from lib/Config.pm:0 + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:644 + out $=Config::TIEHASH('Config') from lib/Config.pm:644 + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/ + out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/ + out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 + out $=main::BEGIN() from /dev/nul:0 + in @=Config::myconfig() from /dev/nul:0 + in $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 + out $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 + out $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'PATCHLEVEL') from lib/Config.pm:574 + out $=Config::FETCH(ref(Config), 'PATCHLEVEL') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'SUBVERSION') from lib/Config.pm:574 + +=item 14 + + in $=main::BEGIN() from /dev/nul:0 + in $=Config::BEGIN() from lib/Config.pm:2 + Package lib/Exporter.pm. + Package lib/Carp.pm. + out $=Config::BEGIN() from lib/Config.pm:0 + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:644 + out $=Config::TIEHASH('Config') from lib/Config.pm:644 + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/E + out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/E + out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 + out $=main::BEGIN() from /dev/nul:0 + in @=Config::myconfig() from /dev/nul:0 + in $=Config::FETCH('Config=HASH(0x1aa444)', 'package') from lib/Config.pm:574 + out $=Config::FETCH('Config=HASH(0x1aa444)', 'package') from lib/Config.pm:574 + in $=Config::FETCH('Config=HASH(0x1aa444)', 'baserev') from lib/Config.pm:574 + out $=Config::FETCH('Config=HASH(0x1aa444)', 'baserev') from lib/Config.pm:574 + +=item 30 + + in $=CODE(0x15eca4)() from /dev/null:0 + in $=CODE(0x182528)() from lib/Config.pm:2 + Package lib/Exporter.pm. + out $=CODE(0x182528)() from lib/Config.pm:0 + scalar context return from CODE(0x182528): undef + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:628 + out $=Config::TIEHASH('Config') from lib/Config.pm:628 + scalar context return from Config::TIEHASH: empty hash + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/Exporter.pm:171 + out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/Exporter.pm:171 + scalar context return from Exporter::export: '' + out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + scalar context return from Exporter::import: '' + + +=back + +In all the cases indentation of lines shows the call tree, if bit 2 of +C<frame> is set, then a line is printed on exit from a subroutine as +well, if bit 4 is set, then the arguments are printed as well as the +caller info, if bit 8 is set, the arguments are printed even if they +are tied or references, if bit 16 is set, the return value is printed +as well. + +When a package is compiled, a line like this + + Package lib/Carp.pm. + +is printed with proper indentation. =back -Any command you type in that isn't recognized by the debugger will be -directly executed (C<eval>'d) as Perl code. Leading white space will -cause the debugger to think it's C<NOT> a debugger command. +=head2 Debugging compile-time statements -If you have any compile-time executable statements (code within a BEGIN -block or a C<use> statement), these will I<NOT> be stopped by debugger, -although C<require>s will. From your own code, however, you can transfer -control back to the debugger using the following statement, which is harmless -if the debugger is not running: +If you have any compile-time executable statements (code within a BEGIN +block or a C<use> statement), these will C<NOT> be stopped by debugger, +although C<require>s will (and compile-time statements can be traced +with C<AutoTrace> option set in C<PERLDB_OPTS>). From your own Perl +code, however, you can +transfer control back to the debugger using the following statement, +which is harmless if the debugger is not running: $DB::single = 1; -=head2 Customization +If you set C<$DB::single> to the value 2, it's equivalent to having +just typed the C<n> command, whereas a value of 1 means the C<s> +command. The C<$DB::trace> variable should be set to 1 to simulate +having typed the C<t> command. -If you want to modify the debugger, copy F<perl5db.pl> from the Perl -library to another name and modify it as necessary. You'll also want -to set environment variable PERL5DB to say something like this: +Another way to debug compile-time code is to start debugger, set a +breakpoint on I<load> of some module thusly - BEGIN { require "myperl5db.pl" } + DB<7> b load f:/perllib/lib/Carp.pm + Will stop on load of `f:/perllib/lib/Carp.pm'. + +and restart debugger by C<R> command (if possible). One can use C<b +compile subname> for the same purpose. + +=head2 Debugger Customization + +Most probably you not want to modify the debugger, it contains enough +hooks to satisfy most needs. You may change the behaviour of debugger +from the debugger itself, using C<O>ptions, from the command line via +C<PERLDB_OPTS> environment variable, and from I<customization files>. You can do some customization by setting up a F<.perldb> file which contains initialization code. For instance, you could make aliases -like these (the last one in particular most people seem to expect to -be there): +like these (the last one is one people expect to be there): - $DB::alias{'len'} = 's/^len(.*)/p length($1)/'; + $DB::alias{'len'} = 's/^len(.*)/p length($1)/'; $DB::alias{'stop'} = 's/^stop (at|in)/b/'; - $DB::alias{'.'} = 's/^\./p ' - . '"\$DB::sub(\$DB::filename:\$DB::line):\t"' - . ',\$DB::dbline[\$DB::line]/' ; + $DB::alias{'ps'} = 's/^ps\b/p scalar /'; + $DB::alias{'quit'} = 's/^quit(\s*)/exit\$/'; + +One changes options from F<.perldb> file via calls like this one; + + parse_options("NonStop=1 LineInfo=db.out AutoTrace=1 frame=2"); +(the code is executed in the package C<DB>). Note that F<.perldb> is +processed before processing C<PERLDB_OPTS>. If F<.perldb> defines the +subroutine C<afterinit>, it is called after all the debugger +initialization ends. F<.perldb> may be contained in the current +directory, or in the C<LOGDIR>/C<HOME> directory. + +If you want to modify the debugger, copy F<perl5db.pl> from the Perl +library to another name and modify it as necessary. You'll also want +to set your C<PERL5DB> environment variable to say something like this: + + BEGIN { require "myperl5db.pl" } + +As the last resort, one can use C<PERL5DB> to customize debugger by +directly setting internal variables or calling debugger functions. + +=head2 Readline Support + +As shipped, the only command line history supplied is a simplistic one +that checks for leading exclamation points. However, if you install +the Term::ReadKey and Term::ReadLine modules from CPAN, you will +have full editing capabilities much like GNU I<readline>(3) provides. +Look for these in the F<modules/by-module/Term> directory on CPAN. + +A rudimentary command line completion is also available. +Unfortunately, the names of lexical variables are not available for +completion. + +=head2 Editor Support for Debugging + +If you have GNU B<emacs> installed on your system, it can interact with +the Perl debugger to provide an integrated software development +environment reminiscent of its interactions with C debuggers. + +Perl is also delivered with a start file for making B<emacs> act like a +syntax-directed editor that understands (some of) Perl's syntax. Look in +the I<emacs> directory of the Perl source distribution. + +(Historically, a similar setup for interacting with B<vi> and the +X11 window system had also been available, but at the time of this +writing, no debugger support for B<vi> currently exists.) + +=head2 The Perl Profiler + +If you wish to supply an alternative debugger for Perl to run, just +invoke your script with a colon and a package argument given to the B<-d> +flag. One of the most popular alternative debuggers for Perl is +B<DProf>, the Perl profiler. As of this writing, B<DProf> is not +included with the standard Perl distribution, but it is expected to +be included soon, for certain values of "soon". + +Meanwhile, you can fetch the Devel::Dprof module from CPAN. Assuming +it's properly installed on your system, to profile your Perl program in +the file F<mycode.pl>, just type: + + perl -d:DProf mycode.pl + +When the script terminates the profiler will dump the profile information +to a file called F<tmon.out>. A tool like B<dprofpp> (also supplied with +the Devel::DProf package) can be used to interpret the information which is +in that profile. + +=head2 Debugger support in perl + +When you call the B<caller> function (see L<perlfunc/caller>) from the +package DB, Perl sets the array @DB::args to contain the arguments the +corresponding stack frame was called with. + +If perl is run with B<-d> option, the following additional features +are enabled: + +=over + +=item * + +Perl inserts the contents of C<$ENV{PERL5DB}> (or C<BEGIN {require +'perl5db.pl'}> if not present) before the first line of the +application. + +=item * + +The array C<@{"_<$filename"}> is the line-by-line contents of +$filename for all the compiled files. Same for C<eval>ed strings which +contain subroutines, or which are currently executed. The C<$filename> +for C<eval>ed strings looks like C<(eval 34)>. + +=item * + +The hash C<%{"_<$filename"}> contains breakpoints and action (it is +keyed by line number), and individual entries are settable (as opposed +to the whole hash). Only true/false is important to Perl, though the +values used by F<perl5db.pl> have the form +C<"$break_condition\0$action">. Values are magical in numeric context: +they are zeros if the line is not breakable. + +Same for evaluated strings which contain subroutines, or which are +currently executed. The C<$filename> for C<eval>ed strings looks like +C<(eval 34)>. + +=item * + +The scalar C<${"_<$filename"}> contains C<"_<$filename">. Same for +evaluated strings which contain subroutines, or which are currently +executed. The C<$filename> for C<eval>ed strings looks like C<(eval +34)>. + +=item * + +After each C<require>d file is compiled, but before it is executed, +C<DB::postponed(*{"_<$filename"})> is called (if subroutine +C<DB::postponed> exists). Here the $filename is the expanded name of +the C<require>d file (as found in values of C<%INC>). + +=item * + +After each subroutine C<subname> is compiled existence of +C<$DB::postponed{subname}> is checked. If this key exists, +C<DB::postponed(subname)> is called (if subroutine C<DB::postponed> +exists). + +=item * + +A hash C<%DB::sub> is maintained, with keys being subroutine names, +values having the form C<filename:startline-endline>. C<filename> has +the form C<(eval 31)> for subroutines defined inside C<eval>s. + +=item * + +When execution of the application reaches a place that can have +a breakpoint, a call to C<DB::DB()> is performed if any one of +variables $DB::trace, $DB::single, or $DB::signal is true. (Note that +these variables are not C<local>izable.) This feature is disabled when +the control is inside C<DB::DB()> or functions called from it (unless +C<$^D & (1E<lt>E<lt>30)>). + +=item * + +When execution of the application reaches a subroutine call, a call +to C<&DB::sub>(I<args>) is performed instead, with C<$DB::sub> being +the name of the called subroutine. (Unless the subroutine is compiled +in the package C<DB>.) + +=back + +Note that no subroutine call is possible until C<&DB::sub> is defined +(for subroutines outside of package C<DB>). (This restriction is +recently lifted.) + +(In fact, for the standard debugger the same is true if C<$DB::deep> +(how many levels of recursion deep into the debugger you can go before +a mandatory break) is not defined.) + +With the recent updates the minimal possible debugger consists of one +line + + sub DB::DB {} + +which is quite handy as contents of C<PERL5DB> environment +variable: + + env "PERL5DB=sub DB::DB {}" perl -d your-script + +Another (a little bit more useful) minimal debugger can be created +with the only line being + + sub DB::DB {print ++$i; scalar <STDIN>} + +This debugger would print the sequential number of encountered +statement, and would wait for your C<CR> to continue. + +The following debugger is quite functional: + + { + package DB; + sub DB {} + sub sub {print ++$i, " $sub\n"; &$sub} + } + +It prints the sequential number of subroutine call and the name of the +called subroutine. Note that C<&DB::sub> should be compiled into the +package C<DB>. + +=head2 Debugger Internals + +At the start, the debugger reads your rc file (F<./.perldb> or +F<~/.perldb> under Unix), which can set important options. This file may +define a subroutine C<&afterinit> to be executed after the debugger is +initialized. + +After the rc file is read, the debugger reads environment variable +PERLDB_OPTS and parses it as a rest of C<O ...> line in debugger prompt. + +It also maintains magical internal variables, such as C<@DB::dbline>, +C<%DB::dbline>, which are aliases for C<@{"::_<current_file"}> +C<%{"::_<current_file"}>. Here C<current_file> is the currently +selected (with the debugger's C<f> command, or by flow of execution) +file. + +Some functions are provided to simplify customization. See L<"Debugger +Customization"> for description of C<DB::parse_options(string)>. The +function C<DB::dump_trace(skip[, count])> skips the specified number +of frames, and returns an array containing info about the caller +frames (all if C<count> is missing). Each entry is a hash with keys +C<context> (C<$> or C<@>), C<sub> (subroutine name, or info about +eval), C<args> (C<undef> or a reference to an array), C<file>, and +C<line>. + +The function C<DB::print_trace(FH, skip[, count[, short]])> prints +formatted info about caller frames. The last two functions may be +convenient as arguments to C<E<lt>>, C<E<lt>E<lt>> commands. =head2 Other resources @@ -238,12 +1081,8 @@ You did try the B<-w> switch, didn't you? =head1 BUGS -If your program exit()s or die()s, so does the debugger. - -There's no builtin way to restart the debugger without exiting and coming back -into it. You could use an alias like this: - - $DB::alias{'rerun'} = 'exec "perl -d $DB::filename"'; +You cannot get the stack frame information or otherwise debug functions +that were not compiled by Perl, such as C or C++ extensions. -But you'd lose any pending breakpoint information, and that might not -be the right path, etc. +If you alter your @_ arguments in a subroutine (such as with B<shift> +or B<pop>, the stack backtrace will not show the original values. diff --git a/pod/perldelta.pod b/pod/perldelta.pod new file mode 100644 index 0000000000..9c85450dd0 --- /dev/null +++ b/pod/perldelta.pod @@ -0,0 +1,1549 @@ +=head1 NAME + +perldelta - what's new for perl5.004 + +=head1 DESCRIPTION + +This document describes differences between the 5.003 release (as +documented in I<Programming Perl>, second edition--the Camel Book) and +this one. + +=head1 Supported Environments + +Perl5.004 builds out of the box on Unix, Plan 9, LynxOS, VMS, OS/2, +QNX, AmigaOS, and Windows NT. Perl runs on Windows 95 as well, but it +cannot be built there, for lack of a reasonable command interpreter. + +=head1 Core Changes + +Most importantly, many bugs were fixed, including several security +problems. See the F<Changes> file in the distribution for details. + +=head2 Compilation option: Binary compatibility with 5.003 + +There is a new Configure question that asks if you want to maintain +binary compatibility with Perl 5.003. If you choose binary +compatibility, you do not have to recompile your extensions, but you +might have symbol conflicts if you embed Perl in another application, +just as in the 5.003 release. By default, binary compatibility +is preserved at the expense of symbol table pollution. + +=head2 $PERL5OPT environment variable + +You may now put Perl options in the $PERL5OPT environment variable. +Unless Perl is running with taint checks, it will interpret this +variable as if its contents had appeared on a "#!perl" line at the +beginning of your script, except that hyphens are optional. PERL5OPT +may only be used to set the following switches: B<-[DIMUdmw]>. + +=head2 Limitations on B<-M>, B<-m>, and B<-T> options + +The C<-M> and C<-m> options are no longer allowed on the C<#!> line of +a script. If a script needs a module, it should invoke it with the +C<use> pragma. + +The B<-T> option is also forbidden on the C<#!> line of a script, +unless it was present on the Perl command line. Due to the way C<#!> +works, this usually means that B<-T> must be in the first argument. +Thus: + + #!/usr/bin/perl -T -w + +will probably work for an executable script invoked as C<scriptname>, +while: + + #!/usr/bin/perl -w -T + +will probably fail under the same conditions. (Non-Unix systems will +probably not follow this rule.) But C<perl scriptname> is guaranteed +to fail, since then there is no chance of B<-T> being found on the +command line before it is found on the C<#!> line. + +=head2 More precise warnings + +If you removed the B<-w> option from your Perl 5.003 scripts because it +made Perl too verbose, we recommend that you try putting it back when +you upgrade to Perl 5.004. Each new perl version tends to remove some +undesirable warnings, while adding new warnings that may catch bugs in +your scripts. + +=head2 Deprecated: Inherited C<AUTOLOAD> for non-methods + +Before Perl 5.004, C<AUTOLOAD> functions were looked up as methods +(using the C<@ISA> hierarchy), even when the function to be autoloaded +was called as a plain function (e.g. C<Foo::bar()>), not a method +(e.g. C<Foo-E<gt>bar()> or C<$obj-E<gt>bar()>). + +Perl 5.005 will use method lookup only for methods' C<AUTOLOAD>s. +However, there is a significant base of existing code that may be using +the old behavior. So, as an interim step, Perl 5.004 issues an optional +warning when a non-method uses an inherited C<AUTOLOAD>. + +The simple rule is: Inheritance will not work when autoloading +non-methods. The simple fix for old code is: In any module that used to +depend on inheriting C<AUTOLOAD> for non-methods from a base class named +C<BaseClass>, execute C<*AUTOLOAD = \&BaseClass::AUTOLOAD> during startup. + +=head2 Subroutine arguments created only when they're modified + +In Perl 5.004, nonexistent array and hash elements used as subroutine +parameters are brought into existence only if they are actually +assigned to (via C<@_>). + +Earlier versions of Perl vary in their handling of such arguments. +Perl versions 5.002 and 5.003 always brought them into existence. +Perl versions 5.000 and 5.001 brought them into existence only if +they were not the first argument (which was almost certainly a bug). +Earlier versions of Perl never brought them into existence. + +For example, given this code: + + undef @a; undef %a; + sub show { print $_[0] }; + sub change { $_[0]++ }; + show($a[2]); + change($a{b}); + +After this code executes in Perl 5.004, $a{b} exists but $a[2] does +not. In Perl 5.002 and 5.003, both $a{b} and $a[2] would have existed +(but $a[2]'s value would have been undefined). + +=head2 Group vector changeable with C<$)> + +The C<$)> special variable has always (well, in Perl 5, at least) +reflected not only the current effective group, but also the group list +as returned by the C<getgroups()> C function (if there is one). +However, until this release, there has not been a way to call the +C<setgroups()> C function from Perl. + +In Perl 5.004, assigning to C<$)> is exactly symmetrical with examining +it: The first number in its string value is used as the effective gid; +if there are any numbers after the first one, they are passed to the +C<setgroups()> C function (if there is one). + +=head2 Fixed parsing of $$<digit>, &$<digit>, etc. + +Perl versions before 5.004 misinterpreted any type marker followed by +"$" and a digit. For example, "$$0" was incorrectly taken to mean +"${$}0" instead of "${$0}". This bug is (mostly) fixed in Perl 5.004. + +However, the developers of Perl 5.004 could not fix this bug completely, +because at least two widely-used modules depend on the old meaning of +"$$0" in a string. So Perl 5.004 still interprets "$$<digit>" in the +old (broken) way inside strings; but it generates this message as a +warning. And in Perl 5.005, this special treatment will cease. + +=head2 No resetting of $. on implicit close + +The documentation for Perl 5.0 has always stated that C<$.> is I<not> +reset when an already-open file handle is reopened with no intervening +call to C<close>. Due to a bug, perl versions 5.000 through 5.003 +I<did> reset C<$.> under that circumstance; Perl 5.004 does not. + +=head2 C<wantarray> may return undef + +The C<wantarray> operator returns true if a subroutine is expected to +return a list, and false otherwise. In Perl 5.004, C<wantarray> can +also return the undefined value if a subroutine's return value will +not be used at all, which allows subroutines to avoid a time-consuming +calculation of a return value if it isn't going to be used. + +=head2 Changes to tainting checks + +A bug in previous versions may have failed to detect some insecure +conditions when taint checks are turned on. (Taint checks are used +in setuid or setgid scripts, or when explicitly turned on with the +C<-T> invocation option.) Although it's unlikely, this may cause a +previously-working script to now fail -- which should be construed +as a blessing, since that indicates a potentially-serious security +hole was just plugged. + +The new restrictions when tainting include: + +=over + +=item No glob() or <*> + +These operators may spawn the C shell (csh), which cannot be made +safe. This restriction will be lifted in a future version of Perl +when globbing is implemented without the use of an external program. + +=item No spawning if tainted $CDPATH, $ENV, $BASH_ENV + +These environment variables may alter the behavior of spawned programs +(especially shells) in ways that subvert security. So now they are +treated as dangerous, in the manner of $IFS and $PATH. + +=item No spawning if tainted $TERM doesn't look like a terminal name + +Some termcap libraries do unsafe things with $TERM. However, it would be +unnecessarily harsh to treat all $TERM values as unsafe, since only shell +metacharacters can cause trouble in $TERM. So a tainted $TERM is +considered to be safe if it contains only alphanumerics, underscores, +dashes, and colons, and unsafe if it contains other characters (including +whitespace). + +=back + +=head2 New Opcode module and revised Safe module + +A new Opcode module supports the creation, manipulation and +application of opcode masks. The revised Safe module has a new API +and is implemented using the new Opcode module. Please read the new +Opcode and Safe documentation. + +=head2 Embedding improvements + +In older versions of Perl it was not possible to create more than one +Perl interpreter instance inside a single process without leaking like a +sieve and/or crashing. The bugs that caused this behavior have all been +fixed. However, you still must take care when embedding Perl in a C +program. See the updated perlembed manpage for tips on how to manage +your interpreters. + +=head2 Internal change: FileHandle class based on IO::* classes + +File handles are now stored internally as type IO::Handle. The +FileHandle module is still supported for backwards compatibility, but +it is now merely a front end to the IO::* modules -- specifically, +IO::Handle, IO::Seekable, and IO::File. We suggest, but do not +require, that you use the IO::* modules in new code. + +In harmony with this change, C<*GLOB{FILEHANDLE}> is now just a +backward-compatible synonym for C<*GLOB{IO}>. + +=head2 Internal change: PerlIO abstraction interface + +It is now possible to build Perl with AT&T's sfio IO package +instead of stdio. See L<perlapio> for more details, and +the F<INSTALL> file for how to use it. + +=head2 New and changed syntax + +=over + +=item $coderef->(PARAMS) + +A subroutine reference may now be suffixed with an arrow and a +(possibly empty) parameter list. This syntax denotes a call of the +referenced subroutine, with the given parameters (if any). + +This new syntax follows the pattern of S<C<$hashref-E<gt>{FOO}>> and +S<C<$aryref-E<gt>[$foo]>>: You may now write S<C<&$subref($foo)>> as +S<C<$subref-E<gt>($foo)>>. All of these arrow terms may be chained; +thus, S<C<&{$table-E<gt>{FOO}}($bar)>> may now be written +S<C<$table-E<gt>{FOO}-E<gt>($bar)>>. + +=back + +=head2 New and changed builtin constants + +=over + +=item __PACKAGE__ + +The current package name at compile time, or the undefined value if +there is no current package (due to a C<package;> directive). Like +C<__FILE__> and C<__LINE__>, C<__PACKAGE__> does I<not> interpolate +into strings. + +=back + +=head2 New and changed builtin variables + +=over + +=item $^E + +Extended error message on some platforms. (Also known as +$EXTENDED_OS_ERROR if you C<use English>). + +=item $^H + +The current set of syntax checks enabled by C<use strict>. See the +documentation of C<strict> for more details. Not actually new, but +newly documented. +Because it is intended for internal use by Perl core components, +there is no C<use English> long name for this variable. + +=item $^M + +By default, running out of memory it is not trappable. However, if +compiled for this, Perl may use the contents of C<$^M> as an emergency +pool after die()ing with this message. Suppose that your Perl were +compiled with -DEMERGENCY_SBRK and used Perl's malloc. Then + + $^M = 'a' x (1<<16); + +would allocate a 64K buffer for use when in emergency. +See the F<INSTALL> file for information on how to enable this option. +As a disincentive to casual use of this advanced feature, +there is no C<use English> long name for this variable. + +=back + +=head2 New and changed builtin functions + +=over + +=item delete on slices + +This now works. (e.g. C<delete @ENV{'PATH', 'MANPATH'}>) + +=item flock + +is now supported on more platforms, prefers fcntl to lockf when +emulating, and always flushes before (un)locking. + +=item printf and sprintf + +Perl now implements these functions itself; it doesn't use the C +library function sprintf() any more, except for floating-point +numbers, and even then only known flags are allowed. As a result, it +is now possible to know which conversions and flags will work, and +what they will do. + +The new conversions in Perl's sprintf() are: + + %i a synonym for %d + %p a pointer (the address of the Perl value, in hexadecimal) + %n special: *stores* the number of characters output so far + into the next variable in the parameter list + +The new flags that go between the C<%> and the conversion are: + + # prefix octal with "0", hex with "0x" + h interpret integer as C type "short" or "unsigned short" + V interpret integer as Perl's standard integer type + +Also, where a number would appear in the flags, an asterisk ("*") may +be used instead, in which case Perl uses the next item in the +parameter list as the given number (that is, as the field width or +precision). If a field width obtained through "*" is negative, it has +the same effect as the '-' flag: left-justification. + +See L<perlfunc/sprintf> for a complete list of conversion and flags. + +=item keys as an lvalue + +As an lvalue, C<keys> allows you to increase the number of hash buckets +allocated for the given hash. This can gain you a measure of efficiency if +you know the hash is going to get big. (This is similar to pre-extending +an array by assigning a larger number to $#array.) If you say + + keys %hash = 200; + +then C<%hash> will have at least 200 buckets allocated for it. These +buckets will be retained even if you do C<%hash = ()>; use C<undef +%hash> if you want to free the storage while C<%hash> is still in scope. +You can't shrink the number of buckets allocated for the hash using +C<keys> in this way (but you needn't worry about doing this by accident, +as trying has no effect). + +=item my() in Control Structures + +You can now use my() (with or without the parentheses) in the control +expressions of control structures such as: + + while (defined(my $line = <>)) { + $line = lc $line; + } continue { + print $line; + } + + if ((my $answer = <STDIN>) =~ /^y(es)?$/i) { + user_agrees(); + } elsif ($answer =~ /^n(o)?$/i) { + user_disagrees(); + } else { + chomp $answer; + die "`$answer' is neither `yes' nor `no'"; + } + +Also, you can declare a foreach loop control variable as lexical by +preceding it with the word "my". For example, in: + + foreach my $i (1, 2, 3) { + some_function(); + } + +$i is a lexical variable, and the scope of $i extends to the end of +the loop, but not beyond it. + +Note that you still cannot use my() on global punctuation variables +such as $_ and the like. + +=item pack() and unpack() + +A new format 'w' represents a BER compressed integer (as defined in +ASN.1). Its format is a sequence of one or more bytes, each of which +provides seven bits of the total value, with the most significant +first. Bit eight of each byte is set, except for the last byte, in +which bit eight is clear. + +Both pack() and unpack() now fail when their templates contain invalid +types. (Invalid types used to be ignored.) + +=item sysseek() + +The new sysseek() operator is a variant of seek() that sets and gets the +file's system read/write position, using the lseek(2) system call. It is +the only reliable way to seek before using sysread() or syswrite(). Its +return value is the new position, or the undefined value on failure. + +=item use VERSION + +If the first argument to C<use> is a number, it is treated as a version +number instead of a module name. If the version of the Perl interpreter +is less than VERSION, then an error message is printed and Perl exits +immediately. Because C<use> occurs at compile time, this check happens +immediately during the compilation process, unlike C<require VERSION>, +which waits until runtime for the check. This is often useful if you +need to check the current Perl version before C<use>ing library modules +which have changed in incompatible ways from older versions of Perl. +(We try not to do this more than we have to.) + +=item use Module VERSION LIST + +If the VERSION argument is present between Module and LIST, then the +C<use> will call the VERSION method in class Module with the given +version as an argument. The default VERSION method, inherited from +the UNIVERSAL class, croaks if the given version is larger than the +value of the variable $Module::VERSION. (Note that there is not a +comma after VERSION!) + +This version-checking mechanism is similar to the one currently used +in the Exporter module, but it is faster and can be used with modules +that don't use the Exporter. It is the recommended method for new +code. + +=item prototype(FUNCTION) + +Returns the prototype of a function as a string (or C<undef> if the +function has no prototype). FUNCTION is a reference to or the name of the +function whose prototype you want to retrieve. +(Not actually new; just never documented before.) + +=item srand + +The default seed for C<srand>, which used to be C<time>, has been changed. +Now it's a heady mix of difficult-to-predict system-dependent values, +which should be sufficient for most everyday purposes. + +Previous to version 5.004, calling C<rand> without first calling C<srand> +would yield the same sequence of random numbers on most or all machines. +Now, when perl sees that you're calling C<rand> and haven't yet called +C<srand>, it calls C<srand> with the default seed. You should still call +C<srand> manually if your code might ever be run on a pre-5.004 system, +of course, or if you want a seed other than the default. + +=item $_ as Default + +Functions documented in the Camel to default to $_ now in +fact do, and all those that do are so documented in L<perlfunc>. + +=item C<m//gc> does not reset search position on failure + +The C<m//g> match iteration construct has always reset its target +string's search position (which is visible through the C<pos> operator) +when a match fails; as a result, the next C<m//g> match after a failure +starts again at the beginning of the string. With Perl 5.004, this +reset may be disabled by adding the "c" (for "continue") modifier, +i.e. C<m//gc>. This feature, in conjunction with the C<\G> zero-width +assertion, makes it possible to chain matches together. See L<perlop> +and L<perlre>. + +=item C<m//x> ignores whitespace before ?*+{} + +The C<m//x> construct has always been intended to ignore all unescaped +whitespace. However, before Perl 5.004, whitespace had the effect of +escaping repeat modifiers like "*" or "?"; for example, C</a *b/x> was +(mis)interpreted as C</a\*b/x>. This bug has been fixed in 5.004. + +=item nested C<sub{}> closures work now + +Prior to the 5.004 release, nested anonymous functions didn't work +right. They do now. + +=item formats work right on changing lexicals + +Just like anonymous functions that contain lexical variables +that change (like a lexical index variable for a C<foreach> loop), +formats now work properly. For example, this silently failed +before (printed only zeros), but is fine now: + + my $i; + foreach $i ( 1 .. 10 ) { + write; + } + format = + my i is @# + $i + . + +=back + +=head2 New builtin methods + +The C<UNIVERSAL> package automatically contains the following methods that +are inherited by all other classes: + +=over + +=item isa(CLASS) + +C<isa> returns I<true> if its object is blessed into a subclass of C<CLASS> + +C<isa> is also exportable and can be called as a sub with two arguments. This +allows the ability to check what a reference points to. Example: + + use UNIVERSAL qw(isa); + + if(isa($ref, 'ARRAY')) { + ... + } + +=item can(METHOD) + +C<can> checks to see if its object has a method called C<METHOD>, +if it does then a reference to the sub is returned; if it does not then +I<undef> is returned. + +=item VERSION( [NEED] ) + +C<VERSION> returns the version number of the class (package). If the +NEED argument is given then it will check that the current version (as +defined by the $VERSION variable in the given package) not less than +NEED; it will die if this is not the case. This method is normally +called as a class method. This method is called automatically by the +C<VERSION> form of C<use>. + + use A 1.2 qw(some imported subs); + # implies: + A->VERSION(1.2); + +=back + +B<NOTE:> C<can> directly uses Perl's internal code for method lookup, and +C<isa> uses a very similar method and caching strategy. This may cause +strange effects if the Perl code dynamically changes @ISA in any package. + +You may add other methods to the UNIVERSAL class via Perl or XS code. +You do not need to C<use UNIVERSAL> in order to make these methods +available to your program. This is necessary only if you wish to +have C<isa> available as a plain subroutine in the current package. + +=head2 TIEHANDLE now supported + +See L<perltie> for other kinds of tie()s. + +=over + +=item TIEHANDLE classname, LIST + +This is the constructor for the class. That means it is expected to +return an object of some sort. The reference can be used to +hold some internal information. + + sub TIEHANDLE { + print "<shout>\n"; + my $i; + return bless \$i, shift; + } + +=item PRINT this, LIST + +This method will be triggered every time the tied handle is printed to. +Beyond its self reference it also expects the list that was passed to +the print function. + + sub PRINT { + $r = shift; + $$r++; + return print join( $, => map {uc} @_), $\; + } + +=item PRINTF this, LIST + +This method will be triggered every time the tied handle is printed to +with the C<printf()> function. +Beyond its self reference it also expects the format and list that was +passed to the printf function. + + sub PRINTF { + shift; + my $fmt = shift; + print sprintf($fmt, @_)."\n"; + } + +=item READ this LIST + +This method will be called when the handle is read from via the C<read> +or C<sysread> functions. + + sub READ { + $r = shift; + my($buf,$len,$offset) = @_; + print "READ called, \$buf=$buf, \$len=$len, \$offset=$offset"; + } + +=item READLINE this + +This method will be called when the handle is read from. The method +should return undef when there is no more data. + + sub READLINE { + $r = shift; + return "PRINT called $$r times\n" + } + +=item GETC this + +This method will be called when the C<getc> function is called. + + sub GETC { print "Don't GETC, Get Perl"; return "a"; } + +=item DESTROY this + +As with the other types of ties, this method will be called when the +tied handle is about to be destroyed. This is useful for debugging and +possibly for cleaning up. + + sub DESTROY { + print "</shout>\n"; + } + +=back + +=head2 Malloc enhancements + +Four new compilation flags are recognized by malloc.c. (They have no +effect if perl is compiled with system malloc().) + +=over + +=item -DDEBUGGING_MSTATS + +If perl is compiled with C<DEBUGGING_MSTATS> defined, you can print +memory statistics at runtime by running Perl thusly: + + env PERL_DEBUG_MSTATS=2 perl your_script_here + +The value of 2 means to print statistics after compilation and on +exit; with a value of 1, the statistics ares printed only on exit. +(If you want the statistics at an arbitrary time, you'll need to +install the optional module Devel::Peek.) + +=item -DEMERGENCY_SBRK + +If this macro is defined, running out of memory need not be a fatal +error: a memory pool can allocated by assigning to the special +variable C<$^M>. See L<"$^M">. + +=item -DPACK_MALLOC + +Perl memory allocation is by bucket with sizes close to powers of two. +Because of these malloc overhead may be big, especially for data of +size exactly a power of two. If C<PACK_MALLOC> is defined, perl uses +a slightly different algorithm for small allocations (up to 64 bytes +long), which makes it possible to have overhead down to 1 byte for +allocations which are powers of two (and appear quite often). + +Expected memory savings (with 8-byte alignment in C<alignbytes>) is +about 20% for typical Perl usage. Expected slowdown due to additional +malloc overhead is in fractions of a percent (hard to measure, because +of the effect of saved memory on speed). + +=item -DTWO_POT_OPTIMIZE + +Similarly to C<PACK_MALLOC>, this macro improves allocations of data +with size close to a power of two; but this works for big allocations +(starting with 16K by default). Such allocations are typical for big +hashes and special-purpose scripts, especially image processing. + +On recent systems, the fact that perl requires 2M from system for 1M +allocation will not affect speed of execution, since the tail of such +a chunk is not going to be touched (and thus will not require real +memory). However, it may result in a premature out-of-memory error. +So if you will be manipulating very large blocks with sizes close to +powers of two, it would be wise to define this macro. + +Expected saving of memory is 0-100% (100% in applications which +require most memory in such 2**n chunks); expected slowdown is +negligible. + +=back + +=head2 Miscellaneous efficiency enhancements + +Functions that have an empty prototype and that do nothing but return +a fixed value are now inlined (e.g. C<sub PI () { 3.14159 }>). + +Each unique hash key is only allocated once, no matter how many hashes +have an entry with that key. So even if you have 100 copies of the +same hash, the hash keys never have to be reallocated. + +=head1 Support for More Operating Systems + +Support for the following operating systems is new in Perl 5.004. + +=head2 Win32 + +Perl 5.004 now includes support for building a "native" perl under +Windows NT, using the Microsoft Visual C++ compiler (versions 2.0 +and above). The resulting perl can be used under Windows 95 (if it +is installed in the same directory locations as it got installed +in Windows NT). This port includes support for perl extension +building tools like L<MakeMaker> and L<h2xs>, so that many extensions +available on the Comprehensive Perl Archive Network (CPAN) can now be +readily built under Windows NT. See http://www.perl.com/ for more +information on CPAN, and L<README.win32> for more details on how to +get started with building this port. + +There is also support for building perl under the Cygwin32 environment. +Cygwin32 is a set of GNU tools that make it possible to compile and run +many UNIX programs under Windows NT by providing a mostly UNIX-like +interface for compilation and execution. See L<README.cygwin32> for +more details on this port, and how to obtain the Cygwin32 toolkit. +This port has not been as well tested as the "native" port described +above (which is not as well tested as we'd like either :) + +=head2 Plan 9 + +See L<README.plan9>. + +=head2 QNX + +See L<README.qnx>. + +=head2 AmigaOS + +See L<README.amigaos>. + +=head1 Pragmata + +Six new pragmatic modules exist: + +=over + +=item use autouse MODULE => qw(sub1 sub2 sub3) + +Defers C<require MODULE> until someone calls one of the specified +subroutines (which must be exported by MODULE). This pragma should be +used with caution, and only when necessary. + +=item use blib + +=item use blib 'dir' + +Looks for MakeMaker-like I<'blib'> directory structure starting in +I<dir> (or current directory) and working back up to five levels of +parent directories. + +Intended for use on command line with B<-M> option as a way of testing +arbitrary scripts against an uninstalled version of a package. + +=item use constant NAME => VALUE + +Provides a convenient interface for creating compile-time constants, +See L<perlsub/"Constant Functions">. + +=item use locale + +Tells the compiler to enable (or disable) the use of POSIX locales for +builtin operations. + +When C<use locale> is in effect, the current LC_CTYPE locale is used +for regular expressions and case mapping; LC_COLLATE for string +ordering; and LC_NUMERIC for numeric formating in printf and sprintf +(but B<not> in print). LC_NUMERIC is always used in write, since +lexical scoping of formats is problematic at best. + +Each C<use locale> or C<no locale> affects statements to the end of +the enclosing BLOCK or, if not inside a BLOCK, to the end of the +current file. Locales can be switched and queried with +POSIX::setlocale(). + +See L<perllocale> for more information. + +=item use ops + +Disable unsafe opcodes, or any named opcodes, when compiling Perl code. + +=item use vmsish + +Enable VMS-specific language features. Currently, there are three +VMS-specific features available: 'status', which makes C<$?> and +C<system> return genuine VMS status values instead of emulating POSIX; +'exit', which makes C<exit> take a genuine VMS status value instead of +assuming that C<exit 1> is an error; and 'time', which makes all times +relative to the local time zone, in the VMS tradition. + +=back + +=head1 Modules + +=head2 Required Updates + +Though Perl 5.004 is compatible with almost all modules that work +with Perl 5.003, there are a few exceptions: + + Module Required Version for Perl 5.004 + ------ ------------------------------- + Filter Filter-1.12 + LWP libwww-perl-5.08 + Tk Tk400.202 (-w makes noise) + +Also, the majordomo mailing list program, version 1.94.1, doesn't work +with Perl 5.004 (nor with perl 4), because it executes an invalid +regular expression. This bug is fixed in majordomo version 1.94.2. + +=head2 Installation directories + +The I<installperl> script now places the Perl source files for +extensions in the architecture-specific library directory, which is +where the shared libraries for extensions have always been. This +change is intended to allow administrators to keep the Perl 5.004 +library directory unchanged from a previous version, without running +the risk of binary incompatibility between extensions' Perl source and +shared libraries. + +=head2 Module information summary + +Brand new modules, arranged by topic rather than strictly +alphabetically: + + CGI.pm Web server interface ("Common Gateway Interface") + CGI/Apache.pm Support for Apache's Perl module + CGI/Carp.pm Log server errors with helpful context + CGI/Fast.pm Support for FastCGI (persistent server process) + CGI/Push.pm Support for server push + CGI/Switch.pm Simple interface for multiple server types + + CPAN Interface to Comprehensive Perl Archive Network + CPAN::FirstTime Utility for creating CPAN configuration file + CPAN::Nox Runs CPAN while avoiding compiled extensions + + IO.pm Top-level interface to IO::* classes + IO/File.pm IO::File extension Perl module + IO/Handle.pm IO::Handle extension Perl module + IO/Pipe.pm IO::Pipe extension Perl module + IO/Seekable.pm IO::Seekable extension Perl module + IO/Select.pm IO::Select extension Perl module + IO/Socket.pm IO::Socket extension Perl module + + Opcode.pm Disable named opcodes when compiling Perl code + + ExtUtils/Embed.pm Utilities for embedding Perl in C programs + ExtUtils/testlib.pm Fixes up @INC to use just-built extension + + FindBin.pm Find path of currently executing program + + Class/Struct.pm Declare struct-like datatypes as Perl classes + File/stat.pm By-name interface to Perl's builtin stat + Net/hostent.pm By-name interface to Perl's builtin gethost* + Net/netent.pm By-name interface to Perl's builtin getnet* + Net/protoent.pm By-name interface to Perl's builtin getproto* + Net/servent.pm By-name interface to Perl's builtin getserv* + Time/gmtime.pm By-name interface to Perl's builtin gmtime + Time/localtime.pm By-name interface to Perl's builtin localtime + Time/tm.pm Internal object for Time::{gm,local}time + User/grent.pm By-name interface to Perl's builtin getgr* + User/pwent.pm By-name interface to Perl's builtin getpw* + + Tie/RefHash.pm Base class for tied hashes with references as keys + + UNIVERSAL.pm Base class for *ALL* classes + +=head2 Fcntl + +New constants in the existing Fcntl modules are now supported, +provided that your operating system happens to support them: + + F_GETOWN F_SETOWN + O_ASYNC O_DEFER O_DSYNC O_FSYNC O_SYNC + O_EXLOCK O_SHLOCK + +These constants are intended for use with the Perl operators sysopen() +and fcntl() and the basic database modules like SDBM_File. For the +exact meaning of these and other Fcntl constants please refer to your +operating system's documentation for fcntl() and open(). + +In addition, the Fcntl module now provides these constants for use +with the Perl operator flock(): + + LOCK_SH LOCK_EX LOCK_NB LOCK_UN + +These constants are defined in all environments (because where there is +no flock() system call, Perl emulates it). However, for historical +reasons, these constants are not exported unless they are explicitly +requested with the ":flock" tag (e.g. C<use Fcntl ':flock'>). + +=head2 IO + +The IO module provides a simple mechanism to load all of the IO modules at one +go. Currently this includes: + + IO::Handle + IO::Seekable + IO::File + IO::Pipe + IO::Socket + +For more information on any of these modules, please see its +respective documentation. + +=head2 Math::Complex + +The Math::Complex module has been totally rewritten, and now supports +more operations. These are overloaded: + + + - * / ** <=> neg ~ abs sqrt exp log sin cos atan2 "" (stringify) + +And these functions are now exported: + + pi i Re Im arg + log10 logn ln cbrt root + tan + csc sec cot + asin acos atan + acsc asec acot + sinh cosh tanh + csch sech coth + asinh acosh atanh + acsch asech acoth + cplx cplxe + +=head2 Math::Trig + +This new module provides a simpler interface to parts of Math::Complex for +those who need trigonometric functions only for real numbers. + +=head2 DB_File + +There have been quite a few changes made to DB_File. Here are a few of +the highlights: + +=over + +=item * + +Fixed a handful of bugs. + +=item * + +By public demand, added support for the standard hash function exists(). + +=item * + +Made it compatible with Berkeley DB 1.86. + +=item * + +Made negative subscripts work with RECNO interface. + +=item * + +Changed the default flags from O_RDWR to O_CREAT|O_RDWR and the default +mode from 0640 to 0666. + +=item * + +Made DB_File automatically import the open() constants (O_RDWR, +O_CREAT etc.) from Fcntl, if available. + +=item * + +Updated documentation. + +=back + +Refer to the HISTORY section in DB_File.pm for a complete list of +changes. Everything after DB_File 1.01 has been added since 5.003. + +=head2 Net::Ping + +Major rewrite - support added for both udp echo and real icmp pings. + +=head2 Object-oriented overrides for builtin operators + +Many of the Perl builtins returning lists now have +object-oriented overrides. These are: + + File::stat + Net::hostent + Net::netent + Net::protoent + Net::servent + Time::gmtime + Time::localtime + User::grent + User::pwent + +For example, you can now say + + use File::stat; + use User::pwent; + $his = (stat($filename)->st_uid == pwent($whoever)->pw_uid); + +=head1 Utility Changes + +=head2 pod2html + +=over + +=item Sends converted HTML to standard output + +The I<pod2html> utility included with Perl 5.004 is entirely new. +By default, it sends the converted HTML to its standard output, +instead of writing it to a file like Perl 5.003's I<pod2html> did. +Use the B<--outfile=FILENAME> option to write to a file. + +=back + +=head2 xsubpp + +=over + +=item C<void> XSUBs now default to returning nothing + +Due to a documentation/implementation bug in previous versions of +Perl, XSUBs with a return type of C<void> have actually been +returning one value. Usually that value was the GV for the XSUB, +but sometimes it was some already freed or reused value, which would +sometimes lead to program failure. + +In Perl 5.004, if an XSUB is declared as returning C<void>, it +actually returns no value, i.e. an empty list (though there is a +backward-compatibility exception; see below). If your XSUB really +does return an SV, you should give it a return type of C<SV *>. + +For backward compatibility, I<xsubpp> tries to guess whether a +C<void> XSUB is really C<void> or if it wants to return an C<SV *>. +It does so by examining the text of the XSUB: if I<xsubpp> finds +what looks like an assignment to C<ST(0)>, it assumes that the +XSUB's return type is really C<SV *>. + +=back + +=head1 C Language API Changes + +=over + +=item C<gv_fetchmethod> and C<perl_call_sv> + +The C<gv_fetchmethod> function finds a method for an object, just like +in Perl 5.003. The GV it returns may be a method cache entry. +However, in Perl 5.004, method cache entries are not visible to users; +therefore, they can no longer be passed directly to C<perl_call_sv>. +Instead, you should use the C<GvCV> macro on the GV to extract its CV, +and pass the CV to C<perl_call_sv>. + +The most likely symptom of passing the result of C<gv_fetchmethod> to +C<perl_call_sv> is Perl's producing an "Undefined subroutine called" +error on the I<second> call to a given method (since there is no cache +on the first call). + +=item C<perl_eval_pv> + +A new function handy for eval'ing strings of Perl code inside C code. +This function returns the value from the eval statement, which can +be used instead of fetching globals from the symbol table. See +L<perlguts>, L<perlembed> and L<perlcall> for details and examples. + +=item Extended API for manipulating hashes + +Internal handling of hash keys has changed. The old hashtable API is +still fully supported, and will likely remain so. The additions to the +API allow passing keys as C<SV*>s, so that C<tied> hashes can be given +real scalars as keys rather than plain strings (nontied hashes still +can only use strings as keys). New extensions must use the new hash +access functions and macros if they wish to use C<SV*> keys. These +additions also make it feasible to manipulate C<HE*>s (hash entries), +which can be more efficient. See L<perlguts> for details. + +=back + +=head1 Documentation Changes + +Many of the base and library pods were updated. These +new pods are included in section 1: + +=over + +=item L<perldelta> + +This document. + +=item L<perlfaq> + +Frequently asked questions. + +=item L<perllocale> + +Locale support (internationalization and localization). + +=item L<perltoot> + +Tutorial on Perl OO programming. + +=item L<perlapio> + +Perl internal IO abstraction interface. + +=item L<perlmodlib> + +Perl module library and recommended practice for module creation. +Extracted from L<perlmod> (which is much smaller as a result). + +=item L<perldebug> + +Although not new, this has been massively updated. + +=item L<perlsec> + +Although not new, this has been massively updated. + +=back + +=head1 New Diagnostics + +Several new conditions will trigger warnings that were +silent before. Some only affect certain platforms. +The following new warnings and errors outline these. +These messages are classified as follows (listed in +increasing order of desperation): + + (W) A warning (optional). + (D) A deprecation (optional). + (S) A severe warning (mandatory). + (F) A fatal error (trappable). + (P) An internal error you should never see (trappable). + (X) A very fatal error (nontrappable). + (A) An alien error message (not generated by Perl). + +=over + +=item "my" variable %s masks earlier declaration in same scope + +(S) A lexical variable has been redeclared in the same scope, effectively +eliminating all access to the previous instance. This is almost always +a typographical error. Note that the earlier variable will still exist +until the end of the scope or until all closure referents to it are +destroyed. + +=item %s argument is not a HASH element or slice + +(F) The argument to delete() must be either a hash element, such as + + $foo{$bar} + $ref->[12]->{"susie"} + +or a hash slice, such as + + @foo{$bar, $baz, $xyzzy} + @{$ref->[12]}{"susie", "queue"} + +=item Allocation too large: %lx + +(X) You can't allocate more than 64K on an MS-DOS machine. + +=item Allocation too large + +(F) You can't allocate more than 2^31+"small amount" bytes. + +=item Applying %s to %s will act on scalar(%s) + +(W) The pattern match (//), substitution (s///), and translation (tr///) +operators work on scalar values. If you apply one of them to an array +or a hash, it will convert the array or hash to a scalar value -- the +length of an array, or the population info of a hash -- and then work on +that scalar value. This is probably not what you meant to do. See +L<perlfunc/grep> and L<perlfunc/map> for alternatives. + +=item Attempt to free nonexistent shared string + +(P) Perl maintains a reference counted internal table of strings to +optimize the storage and access of hash keys and other strings. This +indicates someone tried to decrement the reference count of a string +that can no longer be found in the table. + +=item Attempt to use reference as lvalue in substr + +(W) You supplied a reference as the first argument to substr() used +as an lvalue, which is pretty strange. Perhaps you forgot to +dereference it first. See L<perlfunc/substr>. + +=item Can't redefine active sort subroutine %s + +(F) Perl optimizes the internal handling of sort subroutines and keeps +pointers into them. You tried to redefine one such sort subroutine when it +was currently active, which is not allowed. If you really want to do +this, you should write C<sort { &func } @x> instead of C<sort func @x>. + +=item Can't use bareword ("%s") as %s ref while "strict refs" in use + +(F) Only hard references are allowed by "strict refs". Symbolic references +are disallowed. See L<perlref>. + +=item Cannot resolve method `%s' overloading `%s' in package `%s' + +(P) Internal error trying to resolve overloading specified by a method +name (as opposed to a subroutine reference). + +=item Constant subroutine %s redefined + +(S) You redefined a subroutine which had previously been eligible for +inlining. See L<perlsub/"Constant Functions"> for commentary and +workarounds. + +=item Constant subroutine %s undefined + +(S) You undefined a subroutine which had previously been eligible for +inlining. See L<perlsub/"Constant Functions"> for commentary and +workarounds. + +=item Copy method did not return a reference + +(F) The method which overloads "=" is buggy. See L<overload/Copy Constructor>. + +=item Died + +(F) You passed die() an empty string (the equivalent of C<die "">) or +you called it with no args and both C<$@> and C<$_> were empty. + +=item Exiting pseudo-block via %s + +(W) You are exiting a rather special block construct (like a sort block or +subroutine) by unconventional means, such as a goto, or a loop control +statement. See L<perlfunc/sort>. + +=item Identifier too long + +(F) Perl limits identifiers (names for variables, functions, etc.) to +252 characters for simple names, somewhat more for compound names (like +C<$A::B>). You've exceeded Perl's limits. Future versions of Perl are +likely to eliminate these arbitrary limitations. + +=item Illegal character %s (carriage return) + +(F) A carriage return character was found in the input. This is an +error, and not a warning, because carriage return characters can break +multi-line strings, including here documents (e.g., C<print E<lt>E<lt>EOF;>). + +=item Illegal switch in PERL5OPT: %s + +(X) The PERL5OPT environment variable may only be used to set the +following switches: B<-[DIMUdmw]>. + +=item Integer overflow in hex number + +(S) The literal hex number you have specified is too big for your +architecture. On a 32-bit architecture the largest hex literal is +0xFFFFFFFF. + +=item Integer overflow in octal number + +(S) The literal octal number you have specified is too big for your +architecture. On a 32-bit architecture the largest octal literal is +037777777777. + +=item internal error: glob failed + +(P) Something went wrong with the external program(s) used for C<glob> +and C<E<lt>*.cE<gt>>. This may mean that your csh (C shell) is +broken. If so, you should change all of the csh-related variables in +config.sh: If you have tcsh, make the variables refer to it as if it +were csh (e.g. C<full_csh='/usr/bin/tcsh'>); otherwise, make them all +empty (except that C<d_csh> should be C<'undef'>) so that Perl will +think csh is missing. In either case, after editing config.sh, run +C<./Configure -S> and rebuild Perl. + +=item Invalid conversion in %s: "%s" + +(W) Perl does not understand the given format conversion. +See L<perlfunc/sprintf>. + +=item Invalid type in pack: '%s' + +(F) The given character is not a valid pack type. See L<perlfunc/pack>. + +=item Invalid type in unpack: '%s' + +(F) The given character is not a valid unpack type. See L<perlfunc/unpack>. + +=item Name "%s::%s" used only once: possible typo + +(W) Typographical errors often show up as unique variable names. +If you had a good reason for having a unique name, then just mention +it again somehow to suppress the message (the C<use vars> pragma is +provided for just this purpose). + +=item Null picture in formline + +(F) The first argument to formline must be a valid format picture +specification. It was found to be empty, which probably means you +supplied it an uninitialized value. See L<perlform>. + +=item Offset outside string + +(F) You tried to do a read/write/send/recv operation with an offset +pointing outside the buffer. This is difficult to imagine. +The sole exception to this is that C<sysread()>ing past the buffer +will extend the buffer and zero pad the new area. + +=item Out of memory! + +(X|F) The malloc() function returned 0, indicating there was insufficient +remaining memory (or virtual memory) to satisfy the request. + +The request was judged to be small, so the possibility to trap it +depends on the way Perl was compiled. By default it is not trappable. +However, if compiled for this, Perl may use the contents of C<$^M> as +an emergency pool after die()ing with this message. In this case the +error is trappable I<once>. + +=item Out of memory during request for %s + +(F) The malloc() function returned 0, indicating there was insufficient +remaining memory (or virtual memory) to satisfy the request. However, +the request was judged large enough (compile-time default is 64K), so +a possibility to shut down by trapping this error is granted. + +=item panic: frexp + +(P) The library function frexp() failed, making printf("%f") impossible. + +=item Possible attempt to put comments in qw() list + +(W) qw() lists contain items separated by whitespace; as with literal +strings, comment characters are not ignored, but are instead treated +as literal data. (You may have used different delimiters than the +exclamation marks parentheses shown here; braces are also frequently +used.) + +You probably wrote something like this: + + @list = qw( + a # a comment + b # another comment + ); + +when you should have written this: + + @list = qw( + a + b + ); + +If you really want comments, build your list the +old-fashioned way, with quotes and commas: + + @list = ( + 'a', # a comment + 'b', # another comment + ); + +=item Possible attempt to separate words with commas + +(W) qw() lists contain items separated by whitespace; therefore commas +aren't needed to separate the items. (You may have used different +delimiters than the parentheses shown here; braces are also frequently +used.) + +You probably wrote something like this: + + qw! a, b, c !; + +which puts literal commas into some of the list items. Write it without +commas if you don't want them to appear in your data: + + qw! a b c !; + +=item Scalar value @%s{%s} better written as $%s{%s} + +(W) You've used a hash slice (indicated by @) to select a single element of +a hash. Generally it's better to ask for a scalar value (indicated by $). +The difference is that C<$foo{&bar}> always behaves like a scalar, both when +assigning to it and when evaluating its argument, while C<@foo{&bar}> behaves +like a list when you assign to it, and provides a list context to its +subscript, which can do weird things if you're expecting only one subscript. + +=item Stub found while resolving method `%s' overloading `%s' in package `%s' + +(P) Overloading resolution over @ISA tree may be broken by importing stubs. +Stubs should never be implicitely created, but explicit calls to C<can> +may break this. + +=item Too late for "B<-T>" option + +(X) The #! line (or local equivalent) in a Perl script contains the +B<-T> option, but Perl was not invoked with B<-T> in its argument +list. This is an error because, by the time Perl discovers a B<-T> in +a script, it's too late to properly taint everything from the +environment. So Perl gives up. + +=item untie attempted while %d inner references still exist + +(W) A copy of the object returned from C<tie> (or C<tied>) was still +valid when C<untie> was called. + +=item Unrecognized character %s + +(F) The Perl parser has no idea what to do with the specified character +in your Perl script (or eval). Perhaps you tried to run a compressed +script, a binary program, or a directory as a Perl program. + +=item Unsupported function fork + +(F) Your version of executable does not support forking. + +Note that under some systems, like OS/2, there may be different flavors of +Perl executables, some of which may support fork, some not. Try changing +the name you call Perl by to C<perl_>, C<perl__>, and so on. + +=item Use of "$$<digit>" to mean "${$}<digit>" is deprecated + +(D) Perl versions before 5.004 misinterpreted any type marker followed +by "$" and a digit. For example, "$$0" was incorrectly taken to mean +"${$}0" instead of "${$0}". This bug is (mostly) fixed in Perl 5.004. + +However, the developers of Perl 5.004 could not fix this bug completely, +because at least two widely-used modules depend on the old meaning of +"$$0" in a string. So Perl 5.004 still interprets "$$<digit>" in the +old (broken) way inside strings; but it generates this message as a +warning. And in Perl 5.005, this special treatment will cease. + +=item Value of %s can be "0"; test with defined() + +(W) In a conditional expression, you used <HANDLE>, <*> (glob), C<each()>, +or C<readdir()> as a boolean value. Each of these constructs can return a +value of "0"; that would make the conditional expression false, which is +probably not what you intended. When using these constructs in conditional +expressions, test their values with the C<defined> operator. + +=item Variable "%s" may be unavailable + +(W) An inner (nested) I<anonymous> subroutine is inside a I<named> +subroutine, and outside that is another subroutine; and the anonymous +(innermost) subroutine is referencing a lexical variable defined in +the outermost subroutine. For example: + + sub outermost { my $a; sub middle { sub { $a } } } + +If the anonymous subroutine is called or referenced (directly or +indirectly) from the outermost subroutine, it will share the variable +as you would expect. But if the anonymous subroutine is called or +referenced when the outermost subroutine is not active, it will see +the value of the shared variable as it was before and during the +*first* call to the outermost subroutine, which is probably not what +you want. + +In these circumstances, it is usually best to make the middle +subroutine anonymous, using the C<sub {}> syntax. Perl has specific +support for shared variables in nested anonymous subroutines; a named +subroutine in between interferes with this feature. + +=item Variable "%s" will not stay shared + +(W) An inner (nested) I<named> subroutine is referencing a lexical +variable defined in an outer subroutine. + +When the inner subroutine is called, it will probably see the value of +the outer subroutine's variable as it was before and during the +*first* call to the outer subroutine; in this case, after the first +call to the outer subroutine is complete, the inner and outer +subroutines will no longer share a common value for the variable. In +other words, the variable will no longer be shared. + +Furthermore, if the outer subroutine is anonymous and references a +lexical variable outside itself, then the outer and inner subroutines +will I<never> share the given variable. + +This problem can usually be solved by making the inner subroutine +anonymous, using the C<sub {}> syntax. When inner anonymous subs that +reference variables in outer subroutines are called or referenced, +they are automatically rebound to the current values of such +variables. + +=item Warning: something's wrong + +(W) You passed warn() an empty string (the equivalent of C<warn "">) or +you called it with no args and C<$_> was empty. + +=item Ill-formed logical name |%s| in prime_env_iter + +(W) A warning peculiar to VMS. A logical name was encountered when preparing +to iterate over %ENV which violates the syntactic rules governing logical +names. Since it cannot be translated normally, it is skipped, and will not +appear in %ENV. This may be a benign occurrence, as some software packages +might directly modify logical name tables and introduce nonstandard names, +or it may indicate that a logical name table has been corrupted. + +=item Got an error from DosAllocMem + +(P) An error peculiar to OS/2. Most probably you're using an obsolete +version of Perl, and this should not happen anyway. + +=item Malformed PERLLIB_PREFIX + +(F) An error peculiar to OS/2. PERLLIB_PREFIX should be of the form + + prefix1;prefix2 + +or + + prefix1 prefix2 + +with nonempty prefix1 and prefix2. If C<prefix1> is indeed a prefix +of a builtin library search path, prefix2 is substituted. The error +may appear if components are not found, or are too long. See +"PERLLIB_PREFIX" in F<README.os2>. + +=item PERL_SH_DIR too long + +(F) An error peculiar to OS/2. PERL_SH_DIR is the directory to find the +C<sh>-shell in. See "PERL_SH_DIR" in F<README.os2>. + +=item Process terminated by SIG%s + +(W) This is a standard message issued by OS/2 applications, while *nix +applications die in silence. It is considered a feature of the OS/2 +port. One can easily disable this by appropriate sighandlers, see +L<perlipc/"Signals">. See also "Process terminated by SIGTERM/SIGINT" +in F<README.os2>. + +=back + +=head1 BUGS + +If you find what you think is a bug, you might check the headers of +recently posted articles in the comp.lang.perl.misc newsgroup. +There may also be information at http://www.perl.com/perl/, the Perl +Home Page. + +If you believe you have an unreported bug, please run the B<perlbug> +program included with your release. Make sure you trim your bug down +to a tiny but sufficient test case. Your bug report, along with the +output of C<perl -V>, will be sent off to <F<perlbug@perl.com>> to be +analysed by the Perl porting team. + +=head1 SEE ALSO + +The F<Changes> file for exhaustive details on what changed. + +The F<INSTALL> file for how to build Perl. This file has been +significantly updated for 5.004, so even veteran users should +look through it. + +The F<README> file for general stuff. + +The F<Copying> file for copyright information. + +=head1 HISTORY + +Constructed by Tom Christiansen, grabbing material with permission +from innumerable contributors, with kibitzing by more than a few Perl +porters. + +Last update: Wed May 14 11:14:09 EDT 1997 diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 38edda1982..ea33f50f9f 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -12,17 +12,17 @@ desperation): (S) A severe warning (mandatory). (F) A fatal error (trappable). (P) An internal error you should never see (trappable). - (X) A very fatal error (non-trappable). + (X) A very fatal error (nontrappable). (A) An alien error message (not generated by Perl). Optional warnings are enabled by using the B<-w> switch. Warnings may -be captured by setting C<$^Q> to a reference to a routine that will be -called on each warning instead of printing it. See L<perlvar>. +be captured by setting C<$SIG{__WARN__}> to a reference to a routine that +will be called on each warning instead of printing it. See L<perlvar>. Trappable errors may be trapped using the eval operator. See L<perlfunc/eval>. Some of these messages are generic. Spots that vary are denoted with a %s, -just as in a printf format. Note that some message start with a %s! +just as in a printf format. Note that some messages start with a %s! The symbols C<"%-?@> sort before the letters, while C<[> and C<\> sort after. =over 4 @@ -33,6 +33,14 @@ The symbols C<"%-?@> sort before the letters, while C<[> and C<\> sort after. to try to declare one with a package qualifier on the front. Use local() if you want to localize a package variable. +=item "my" variable %s masks earlier declaration in same scope + +(S) A lexical variable has been redeclared in the same scope, effectively +eliminating all access to the previous instance. This is almost always +a typographical error. Note that the earlier variable will still exist +until the end of the scope or until all closure referents to it are +destroyed. + =item "no" not allowed in expression (F) The "no" keyword is recognized and executed at compile time, and returns @@ -45,23 +53,35 @@ no useful value. See L<perlmod>. =item % may only be used in unpack -(F) You can't pack a string by supplying a checksum, since the +(F) You can't pack a string by supplying a checksum, because the checksumming process loses information, and you can't go the other way. See L<perlfunc/unpack>. =item %s (...) interpreted as function (W) You've run afoul of the rule that says that any list operator followed -by parentheses turns into a function, with all the list operators arguments -found inside the parens. See L<perlop/Terms and List Operators (Leftward)>. +by parentheses turns into a function, with all the list operators arguments +found inside the parentheses. See L<perlop/Terms and List Operators (Leftward)>. =item %s argument is not a HASH element -(F) The argument to delete() or exists() must be a hash element, such as +(F) The argument to exists() must be a hash element, such as + + $foo{$bar} + $ref->[12]->{"susie"} + +=item %s argument is not a HASH element or slice + +(F) The argument to delete() must be either a hash element, such as $foo{$bar} $ref->[12]->{"susie"} +or a hash slice, such as + + @foo{$bar, $baz, $xyzzy} + @{$ref->[12]}{"susie", "queue"} + =item %s did not return a true value (F) A required (or used) file must return a true value to indicate that @@ -76,11 +96,11 @@ sees what it knows to be a term when it was expecting to see an operator, it gives you this warning. Usually it indicates that an operator or delimiter was omitted, such as a semicolon. -=item %s had compilation errors. +=item %s had compilation errors (F) The final summary message when a C<perl -c> fails. -=item %s has too many errors. +=item %s has too many errors (F) The parser has given up trying to parse the program after 10 errors. Further error messages would likely be uninformative. @@ -99,28 +119,28 @@ before it could possibly have been used. (F) The final summary message when a C<perl -c> succeeds. -=item %s: Command not found. +=item %s: Command not found (A) You've accidentally run your script through B<csh> instead -of Perl. Check the <#!> line, or manually feed your script -into Perl yourself. +of Perl. Check the #! line, or manually feed your script into +Perl yourself. -=item %s: Expression syntax. +=item %s: Expression syntax (A) You've accidentally run your script through B<csh> instead -of Perl. Check the <#!> line, or manually feed your script -into Perl yourself. +of Perl. Check the #! line, or manually feed your script into +Perl yourself. -=item %s: Undefined variable. +=item %s: Undefined variable (A) You've accidentally run your script through B<csh> instead -of Perl. Check the <#!> line, or manually feed your script -into Perl yourself. +of Perl. Check the #! line, or manually feed your script into +Perl yourself. =item %s: not found -(A) You've accidentally run your script through the Bourne shell -instead of Perl. Check the <#!> line, or manually feed your script +(A) You've accidentally run your script through the Bourne shell +instead of Perl. Check the #! line, or manually feed your script into Perl yourself. =item B<-P> not allowed for setuid/setgid script @@ -144,7 +164,7 @@ if you meant it literally. See L<perlre>. =item @ outside of string -(F) You had a pack template that specified an absolution position outside +(F) You had a pack template that specified an absolute position outside the string being unpacked. See L<perlfunc/pack>. =item accept() on closed fd @@ -154,7 +174,20 @@ the return value of your socket() call? See L<perlfunc/accept>. =item Allocation too large: %lx -(F) You can't allocate more than 64K on an MSDOS machine. +(X) You can't allocate more than 64K on an MS-DOS machine. + +=item Allocation too large + +(F) You can't allocate more than 2^31+"small amount" bytes. + +=item Applying %s to %s will act on scalar(%s) + +(W) The pattern match (//), substitution (s///), and translation (tr///) +operators work on scalar values. If you apply one of them to an array +or a hash, it will convert the array or hash to a scalar value -- the +length of an array, or the population info of a hash -- and then work on +that scalar value. This is probably not what you meant to do. See +L<perlfunc/grep> and L<perlfunc/map> for alternatives. =item Arg too short for msgsnd @@ -164,14 +197,16 @@ the return value of your socket() call? See L<perlfunc/accept>. (W)(S) You said something that may not be interpreted the way you thought. Normally it's pretty easy to disambiguate it by supplying -a missing quote, operator, paren pair or declaration. +a missing quote, operator, parenthesis pair or declaration. =item Args must match #! line (F) The setuid emulator requires that the arguments Perl was invoked -with match the arguments specified on the #! line. +with match the arguments specified on the #! line. Since some systems +impose a one-argument limit on the #! line, try combining switches; +for example, turn C<-w -U> into C<-wU>. -=item Argument "%s" isn't numeric +=item Argument "%s" isn't numeric%s (W) The indicated string was fed as an argument to an operator that expected a numeric value instead. If you're fortunate the message @@ -202,6 +237,13 @@ know which context to supply to the right side. be garbage collected on exit. An SV was discovered to be outside any of those arenas. +=item Attempt to free nonexistent shared string + +(P) Perl maintains a reference counted internal table of strings to +optimize the storage and access of hash keys and other strings. This +indicates someone tried to decrement the reference count of a string +that can no longer be found in the table. + =item Attempt to free temp prematurely (W) Mortalized values are supposed to be freed by the free_tmps() @@ -223,17 +265,19 @@ could indicate that SvREFCNT_dec() was called too many times, or that SvREFCNT_inc() was called too few times, or that the SV was mortalized when it shouldn't have been, or that memory has been corrupted. +=item Attempt to use reference as lvalue in substr + +(W) You supplied a reference as the first argument to substr() used +as an lvalue, which is pretty strange. Perhaps you forgot to +dereference it first. See L<perlfunc/substr>. + =item Bad arg length for %s, is %d, should be %d (F) You passed a buffer of the wrong size to one of msgctl(), semctl() or -shmctl(). In C parlance, the correct sized are, respectively, -S<sizeof(struct msqid_ds *)>, S<sizeof(struct semid_ds *)> and +shmctl(). In C parlance, the correct sizes are, respectively, +S<sizeof(struct msqid_ds *)>, S<sizeof(struct semid_ds *)>, and S<sizeof(struct shmid_ds *)>. -=item Bad associative array - -(P) One of the internal hash routines was passed a null HV pointer. - =item Bad filehandle: %s (F) A symbol was passed to something wanting a filehandle, but the symbol @@ -243,7 +287,17 @@ did it in another package. =item Bad free() ignored (S) An internal routine called free() on something that had never been -malloc()ed in the first place. +malloc()ed in the first place. Mandatory, but can be disabled by +setting environment variable C<PERL_BADFREE> to 1. + +This message can be quite often seen with DB_File on systems with +"hard" dynamic linking, like C<AIX> and C<OS/2>. It is a bug of +C<Berkeley DB> which is left unnoticed if C<DB> uses I<forgiving> +system malloc(). + +=item Bad hash + +(P) One of the internal hash routines was passed a null HV pointer. =item Bad name after %s:: @@ -274,17 +328,31 @@ wasn't a symbol table entry. (P) An internal request asked to add a hash entry to something that wasn't a symbol table entry. -=item Badly places ()'s +=item Badly placed ()'s (A) You've accidentally run your script through B<csh> instead -of Perl. Check the <#!> line, or manually feed your script -into Perl yourself. +of Perl. Check the #! line, or manually feed your script into +Perl yourself. + +=item Bareword "%s" not allowed while "strict subs" in use + +(F) With "strict subs" in use, a bareword is only allowed as a +subroutine identifier, in curly braces or to the left of the "=>" symbol. +Perhaps you need to predeclare a subroutine? =item BEGIN failed--compilation aborted (F) An untrapped exception was raised while executing a BEGIN subroutine. Compilation stops immediately and the interpreter is exited. +=item BEGIN not safe after errors--compilation aborted + +(F) Perl found a C<BEGIN {}> subroutine (or a C<use> directive, which +implies a C<BEGIN {}>) after one or more compilation errors had +already occurred. Since the intended environment for the C<BEGIN {}> +could not be guaranteed (due to the errors), and since subsequent code +likely depends on its correct operation, Perl just gave up. + =item bind() on closed fd (W) You tried to do a bind on a closed socket. Did you forget to check @@ -299,30 +367,37 @@ the return value of your socket() call? See L<perlfunc/bind>. (F) A subroutine invoked from an external package via perl_call_sv() exited by calling exit. +=item Can't "goto" outside a block + +(F) A "goto" statement was executed to jump out of what might look +like a block, except that it isn't a proper block. This usually +occurs if you tried to jump out of a sort() block or subroutine, which +is a no-no. See L<perlfunc/goto>. + =item Can't "last" outside a block (F) A "last" statement was executed to break out of the current block, except that there's this itty bitty problem called there isn't a current block. Note that an "if" or "else" block doesn't count as a -"loopish" block. You can usually double the curlies to get the same -effect though, since the inner curlies will be considered a block -that loops once. See L<perlfunc/last>. +"loopish" block, as doesn't a block given to sort(). You can usually double +the curlies to get the same effect though, because the inner curlies +will be considered a block that loops once. See L<perlfunc/last>. =item Can't "next" outside a block (F) A "next" statement was executed to reiterate the current block, but there isn't a current block. Note that an "if" or "else" block doesn't -count as a "loopish" block. You can usually double the curlies to get -the same effect though, since the inner curlies will be considered a block -that loops once. See L<perlfunc/last>. +count as a "loopish" block, as doesn't a block given to sort(). You can +usually double the curlies to get the same effect though, because the inner +curlies will be considered a block that loops once. See L<perlfunc/next>. =item Can't "redo" outside a block (F) A "redo" statement was executed to restart the current block, but there isn't a current block. Note that an "if" or "else" block doesn't -count as a "loopish" block. You can usually double the curlies to get -the same effect though, since the inner curlies will be considered a block -that loops once. See L<perlfunc/last>. +count as a "loopish" block, as doesn't a block given to sort(). You can +usually double the curlies to get the same effect though, because the inner +curlies will be considered a block that loops once. See L<perlfunc/redo>. =item Can't bless non-reference value @@ -331,7 +406,7 @@ encapsulation of objects. See L<perlobj>. =item Can't break at that line -(S) A warning intended for while running within the debugger, indicating +(S) A warning intended to only be printed while running within the debugger, indicating the line number specified wasn't the location of a statement that could be stopped at. @@ -343,7 +418,7 @@ in it, let alone methods. See L<perlobj>. =item Can't call method "%s" on unblessed reference -(F) A method call must know what package it's supposed to run in. It +(F) A method call must know in what package it's supposed to run. It ordinarily finds this out from the object reference you supply, but you didn't supply an object reference in this case. A reference isn't an object reference until it has been blessed. See L<perlobj>. @@ -367,7 +442,7 @@ that you can chdir to, possibly because it doesn't exist. =item Can't coerce %s to integer in %s (F) Certain types of SVs, in particular real symbol table entries -(type GLOB), can't be forced to stop being what they are. So you can't +(typeglobs), can't be forced to stop being what they are. So you can't say things like: *foo += 1; @@ -382,12 +457,12 @@ but then $foo no longer contains a glob. =item Can't coerce %s to number in %s (F) Certain types of SVs, in particular real symbol table entries -(type GLOB), can't be forced to stop being what they are. +(typeglobs), can't be forced to stop being what they are. =item Can't coerce %s to string in %s (F) Certain types of SVs, in particular real symbol table entries -(type GLOB), can't be forced to stop being what they are. +(typeglobs), can't be forced to stop being what they are. =item Can't create pipe mailbox @@ -396,7 +471,7 @@ or other plumbing problems. =item Can't declare %s in my -(F) Only scalar, array and hash variables may be declared as lexical variables. +(F) Only scalar, array, and hash variables may be declared as lexical variables. They must have ordinary identifiers as names. =item Can't do inplace edit on %s: %s @@ -405,11 +480,11 @@ They must have ordinary identifiers as names. =item Can't do inplace edit without backup -(F) You're on a system such as MSDOS that gets confused if you try reading -from a deleted (but still opened) file. You have to say B<-i>C<.bak>, or some +(F) You're on a system such as MS-DOS that gets confused if you try reading +from a deleted (but still opened) file. You have to say C<-i.bak>, or some such. -=item Can't do inplace edit: %s > 14 characters +=item Can't do inplace edit: %s E<gt> 14 characters (S) There isn't enough room in the filename to make a backup name for the file. @@ -441,7 +516,7 @@ your sysadmin why he and/or she removed it. (F) This machine doesn't have either waitpid() or wait4(), so only waitpid() without flags is emulated. -=item Can't do {n,m} with n > m +=item Can't do {n,m} with n E<gt> m (F) Minima must be less than or equal to maxima. If you really want your regexp to match something 0 times, just put {0}. See L<perlre>. @@ -453,7 +528,7 @@ For example, it'd be kind of silly to put a B<-x> on the #! line. =item Can't exec "%s": %s -(W) An system(), exec() or piped open call could not execute the named +(W) An system(), exec(), or piped open call could not execute the named program for the indicated reason. Typical reasons include: the permissions were wrong on the file, the file wasn't found in C<$ENV{PATH}>, the executable in question was compiled for another architecture, or the @@ -479,7 +554,7 @@ for us to go to. See L<perlfunc/goto>. =item Can't find string terminator %s anywhere before EOF (F) Perl strings can stretch over multiple lines. This message means that -the closing delimiter was omitted. Since bracketed quotes count nesting +the closing delimiter was omitted. Because bracketed quotes count nesting levels, the following is missing its final parenthesis: print q(The character '(' starts a side comment.) @@ -498,7 +573,7 @@ assumes that the stat buffer contains all the necessary information, and passes it, instead of the filespec, to the access checking routine. It will try to retrieve the filespec using the device name and FID present in the stat buffer, but this works only if you haven't made a subsequent call to the CRTL stat() -routine, since the device name is overwritten with each call. If this warning +routine, because the device name is overwritten with each call. If this warning appears, the name lookup failed, and the access checking routine gave up and returned FALSE, just to be conservative. (Note: The access checking routine knows about the Perl C<stat> operator and file tests, so you shouldn't ever @@ -519,7 +594,7 @@ mailbox buffers to be, and didn't get an answer. (F) The deeply magical "goto subroutine" call can only replace one subroutine call for another. It can't manufacture one out of whole cloth. In general -you should only be calling it out of an AUTOLOAD routine anyway. See +you should be calling it out of only an AUTOLOAD routine anyway. See L<perlfunc/goto>. =item Can't localize a reference @@ -531,7 +606,7 @@ do a local. =item Can't localize lexical variable %s -(F) You used local on a variable name that was previous declared as a +(F) You used local on a variable name that was previously declared as a lexical variable using "my". This is not allowed. If you want to localize a package variable of the same name, qualify it with the package name. @@ -539,16 +614,16 @@ package name. =item Can't locate %s in @INC (F) You said to do (or require, or use) a file that couldn't be found -in any of the libraries mentioned in @INC. Perhaps you need to set -the PERL5LIB environment variable to say where the extra library is, -or maybe the script needs to add the library name to @INC. Or maybe +in any of the libraries mentioned in @INC. Perhaps you need to set the +PERL5LIB or PERL5OPT environment variable to say where the extra library +is, or maybe the script needs to add the library name to @INC. Or maybe you just misspelled the name of the file. See L<perlfunc/require>. =item Can't locate object method "%s" via package "%s" (F) You called a method correctly, and it correctly indicated a package functioning as a class, but that package doesn't define that particular -method, nor does any of it's base classes. See L<perlobj>. +method, nor does any of its base classes. See L<perlobj>. =item Can't locate package %s for @%s::ISA @@ -563,16 +638,16 @@ a B<-e> switch. Maybe your /tmp partition is full, or clobbered. =item Can't modify %s in %s (F) You aren't allowed to assign to the item indicated, or otherwise try to -change it, such as with an autoincrement. +change it, such as with an auto-increment. -=item Can't modify non-existent substring +=item Can't modify nonexistent substring (P) The internal routine that does assignment to a substr() was handed a NULL. -=item Can't msgrcv to readonly var +=item Can't msgrcv to read-only var -(F) The target of a msgrcv must be modifiable in order to be used as a receive +(F) The target of a msgrcv must be modifiable to be used as a receive buffer. =item Can't open %s: %s @@ -584,25 +659,25 @@ Usually this is because you don't have read permission for the file. (W) You tried to say C<open(CMD, "|cmd|")>, which is not supported. You can try any of several modules in the Perl library to do this, such as -"open2.pl". Alternately, direct the pipe's output to a file using ">", +IPC::Open2. Alternately, direct the pipe's output to a file using "E<gt>", and then read it in under a different file handle. =item Can't open error file %s as stderr (F) An error peculiar to VMS. Perl does its own command line redirection, and -couldn't open the file specified after '2>' or '2>>' on the command line for -writing. +couldn't open the file specified after '2E<gt>' or '2E<gt>E<gt>' on the +command line for writing. =item Can't open input file %s as stdin (F) An error peculiar to VMS. Perl does its own command line redirection, and -couldn't open the file specified after '<' on the command line for reading. +couldn't open the file specified after 'E<lt>' on the command line for reading. =item Can't open output file %s as stdout (F) An error peculiar to VMS. Perl does its own command line redirection, and -couldn't open the file specified after '>' or '>>' on the command line for -writing. +couldn't open the file specified after 'E<gt>' or 'E<gt>E<gt>' on the command +line for writing. =item Can't open output pipe (name: %s) @@ -613,6 +688,13 @@ couldn't open the pipe into which to send data destined for stdout. (F) The script you specified can't be opened for the indicated reason. +=item Can't redefine active sort subroutine %s + +(F) Perl optimizes the internal handling of sort subroutines and keeps +pointers into them. You tried to redefine one such sort subroutine when it +was currently active, which is not allowed. If you really want to do +this, you should write C<sort { &func } @x> instead of C<sort func @x>. + =item Can't rename %s to %s: %s, skipping file (S) The rename done by the B<-i> switch failed for some reason, probably because @@ -645,7 +727,7 @@ of suidperl. =item Can't take log of %g -(F) Logarithms are only defined on positive real numbers. +(F) Logarithms are defined on only positive real numbers. =item Can't take sqrt of %g @@ -680,7 +762,7 @@ code calling sv_upgrade. =item Can't use "my %s" in sort comparison (F) The global variables $a and $b are reserved for sort comparisons. -You mentioned $a or $b in the same line as the <=> or cmp operator, +You mentioned $a or $b in the same line as the E<lt>=E<gt> or cmp operator, and the variable had earlier been declared as a lexical variable. Either qualify the sort variable with the package name, or rename the lexical variable. @@ -699,10 +781,15 @@ test the type of the reference, if need be. (W) In an ordinary expression, backslash is a unary operator that creates a reference to its argument. The use of backslash to indicate a backreference -to a matched substring is only valid as part of a regular expression pattern. +to a matched substring is valid only as part of a regular expression pattern. Trying to do this in ordinary Perl code produces a value that prints out looking like SCALAR(0xdecaf). Use the $1 form instead. +=item Can't use bareword ("%s") as %s ref while \"strict refs\" in use + +(F) Only hard references are allowed by "strict refs". Symbolic references +are disallowed. See L<perlref>. + =item Can't use string ("%s") as %s ref while "strict refs" in use (F) Only hard references are allowed by "strict refs". Symbolic references @@ -711,17 +798,12 @@ are disallowed. See L<perlref>. =item Can't use an undefined value as %s reference (F) A value used as either a hard reference or a symbolic reference must -be a defined value. This helps to de-lurk some insidious errors. - -=item Can't use delimiter brackets within expression - -(F) The ${name} construct is for disambiguating identifiers in strings, not -in ordinary code. +be a defined value. This helps to delurk some insidious errors. =item Can't use global %s in "my" (F) You tried to declare a magical variable as a lexical variable. This is -not allowed, because the magic can only be tied to one location (namely +not allowed, because the magic can be tied to only one location (namely the global variable) and it would be incredibly confusing to have variables in your program that looked like magical variables but weren't. @@ -737,7 +819,7 @@ didn't look like an array reference, or anything else subscriptable. (F) The write routine failed for some reason while trying to process a B<-e> switch. Maybe your /tmp partition is full, or clobbered. -=item Can't x= to readonly value +=item Can't x= to read-only value (F) You tried to repeat a constant value (often the undefined value) with an assignment operator, which implies modifying the value itself. @@ -745,9 +827,15 @@ Perhaps you need to copy the value to a temporary, and repeat that. =item Cannot open temporary file -(F) The create routine failed for some reaon while trying to process +(F) The create routine failed for some reason while trying to process a B<-e> switch. Maybe your /tmp partition is full, or clobbered. +=item Cannot resolve method `%s' overloading `%s' in package `%s' + +(F|P) Error resolving overloading specified by a method name (as +opposed to a subroutine reference): no such method callable via the +package. If method name is C<???>, this is an internal error. + =item chmod: mode argument is missing initial 0 (W) A novice will sometimes say @@ -757,15 +845,37 @@ a B<-e> switch. Maybe your /tmp partition is full, or clobbered. not realizing that 777 will be interpreted as a decimal number, equivalent to 01411. Octal constants are introduced with a leading 0 in Perl, as in C. -=item Close on unopened file <%s> +=item Close on unopened file E<lt>%sE<gt> (W) You tried to close a filehandle that was never opened. +=item Compilation failed in require + +(F) Perl could not compile a file specified in a C<require> statement. +Perl uses this generic message when none of the errors that it encountered +were severe enough to halt compilation immediately. + =item connect() on closed fd (W) You tried to do a connect on a closed socket. Did you forget to check the return value of your socket() call? See L<perlfunc/connect>. +=item Constant subroutine %s redefined + +(S) You redefined a subroutine which had previously been eligible for +inlining. See L<perlsub/"Constant Functions"> for commentary and +workarounds. + +=item Constant subroutine %s undefined + +(S) You undefined a subroutine which had previously been eligible for +inlining. See L<perlsub/"Constant Functions"> for commentary and +workarounds. + +=item Copy method did not return a reference + +(F) The method which overloads "=" is buggy. See L<overload/Copy Constructor>. + =item Corrupt malloc ptr 0x%lx at 0x%lx (P) The malloc package that comes with Perl had an internal failure. @@ -787,6 +897,12 @@ times than it has returned. This probably indicates an infinite recursion, unless you're writing strange benchmark programs, in which case it indicates something else. +=item Delimiter for here document is too long + +(F) In a here document construct like C<E<lt>E<lt>FOO>, the label +C<FOO> is too long for Perl to handle. You have to be seriously +twisted to write code that triggers this error. + =item Did you mean &%s instead? (W) You probably referred to an imported subroutine &FOO as $FOO or some such. @@ -796,6 +912,11 @@ case it indicates something else. (W) You probably said %hash{$key} when you meant $hash{$key} or @hash{@keys}. On the other hand, maybe you just meant %hash and got carried away. +=item Died + +(F) You passed die() an empty string (the equivalent of C<die "">) or +you called it with no args and both C<$@> and C<$_> were empty. + =item Do you need to predeclare %s? (S) This is an educated guess made in conjunction with the message "%s @@ -835,29 +956,42 @@ The interpreter is immediately exited. =item Error converting file specification %s -(F) An error peculiar to VMS. Since Perl may have to deal with file +(F) An error peculiar to VMS. Because Perl may have to deal with file specifications in either VMS or Unix syntax, it converts them to a single form when it must operate on them directly. Either you've passed an invalid file specification to Perl, or you've found a case the conversion routines don't handle. Drat. -=item Execution of %s aborted due to compilation errors. +=item Excessively long <> operator + +(F) The contents of a <> operator may not exceed the maximum size of a +Perl identifier. If you're just trying to glob a long list of +filenames, try using the glob() operator, or put the filenames into a +variable and glob that. + +=item Execution of %s aborted due to compilation errors (F) The final summary message when a Perl compilation fails. =item Exiting eval via %s -(W) You are exiting an eval by unconventional means, such as a +(W) You are exiting an eval by unconventional means, such as a goto, or a loop control statement. +=item Exiting pseudo-block via %s + +(W) You are exiting a rather special block construct (like a sort block or +subroutine) by unconventional means, such as a goto, or a loop control +statement. See L<perlfunc/sort>. + =item Exiting subroutine via %s -(W) You are exiting a subroutine by unconventional means, such as a +(W) You are exiting a subroutine by unconventional means, such as a goto, or a loop control statement. =item Exiting substitution via %s -(W) You are exiting a substitution by unconventional means, such as a +(W) You are exiting a substitution by unconventional means, such as a return, a goto, or a loop control statement. =item Fatal VMS error at %s, line %d @@ -878,19 +1012,21 @@ PDP-11 or something? You need to do an open() or a socket() call, or call a constructor from the FileHandle package. -=item Filehandle %s opened only for input +=item Filehandle %s opened for only input (W) You tried to write on a read-only filehandle. If you intended it to be a read-write filehandle, you needed to open it with -"+<" or "+>" or "+>>" instead of with "<" or nothing. If you only -intended to write the file, use ">" or ">>". See L<perlfunc/open>. +"+E<lt>" or "+E<gt>" or "+E<gt>E<gt>" instead of with "E<lt>" or nothing. If +you intended only to write the file, use "E<gt>" or "E<gt>E<gt>". See +L<perlfunc/open>. -=item Filehandle only opened for input +=item Filehandle opened for only input (W) You tried to write on a read-only filehandle. If you intended it to be a read-write filehandle, you needed to open it with -"+<" or "+>" or "+>>" instead of with "<" or nothing. If you only -intended to write the file, use ">" or ">>". See L<perlfunc/open>. +"+E<lt>" or "+E<gt>" or "+E<gt>E<gt>" instead of with "E<lt>" or nothing. If +you intended only to write the file, use "E<gt>" or "E<gt>E<gt>". See +L<perlfunc/open>. =item Final $ should be \$ or $name @@ -962,8 +1098,8 @@ the line, and you really meant a "less than". =item Global symbol "%s" requires explicit package name -(F) You've said "use strict vars", which indicates that all variables must -either be lexically scoped (using "my"), or explicitly qualified to +(F) You've said "use strict vars", which indicates that all variables +must either be lexically scoped (using "my"), or explicitly qualified to say which package the global variable is in (using "::"). =item goto must have label @@ -982,11 +1118,40 @@ an emergency basis to prevent a core dump. (D) Really old Perl let you omit the % on hash names in some spots. This is now heavily deprecated. -=item Identifier "%s::%s" used only once: possible typo +=item Identifier too long + +(F) Perl limits identifiers (names for variables, functions, etc.) to +about 250 characters for simple names, and somewhat more for compound +names (like C<$A::B>). You've exceeded Perl's limits. Future +versions of Perl are likely to eliminate these arbitrary limitations. + +=item Ill-formed logical name |%s| in prime_env_iter + +(W) A warning peculiar to VMS. A logical name was encountered when preparing +to iterate over %ENV which violates the syntactic rules governing logical +names. Because it cannot be translated normally, it is skipped, and will not +appear in %ENV. This may be a benign occurrence, as some software packages +might directly modify logical name tables and introduce nonstandard names, +or it may indicate that a logical name table has been corrupted. + +=item Illegal character %s (carriage return) + +(F) A carriage return character was found in the input. This is an +error, and not a warning, because carriage return characters can break +multi-line strings, including here documents (e.g., C<print E<lt>E<lt>EOF;>). -(W) Typographical errors often show up as unique identifiers. If you -had a good reason for having a unique identifier, then just mention it -again somehow to suppress the message. +Under Unix, this error is usually caused by executing Perl code -- +either the main program, a module, or an eval'd string -- that was +transferred over a network connection from a non-Unix system without +properly converting the text file format. + +Under systems that use something other than '\n' to delimit lines of +text, this error can also be caused by reading Perl code from a file +handle that is in binary mode (as set by the C<binmode> operator). + +In either case, the Perl code in question will probably need to be +converted with something like C<s/\x0D\x0A?/\n/g> before it can be +executed. =item Illegal division by zero @@ -1007,9 +1172,24 @@ don't take to this kindly. (W) You may have tried to use an 8 or 9 in a octal number. Interpretation of the octal number stopped before the 8 or 9. +=item Illegal switch in PERL5OPT: %s + +(X) The PERL5OPT environment variable may only be used to set the +following switches: B<-[DIMUdmw]>. + +=item In string, @%s now must be written as \@%s + +(F) It used to be that Perl would try to guess whether you wanted an +array interpolated or a literal @. It did this when the string was first +used at runtime. Now strings are parsed at compile time, and ambiguous +instances of @ must be disambiguated, either by prepending a backslash to +indicate a literal, or by declaring (or using) the array within the +program before the string (lexically). (Someday it will simply assume +that an unbackslashed @ interpolates an array.) + =item Insecure dependency in %s -(F) You tried to do something that the tainting mechanism didn't like. +(F) You tried to do something that the tainting mechanism didn't like. The tainting mechanism is turned on when you're running setuid or setgid, or when you specify B<-T> to turn it on explicitly. The tainting mechanism labels all data that's derived directly or indirectly from the user, @@ -1020,21 +1200,33 @@ for more information. =item Insecure directory in %s (F) You can't use system(), exec(), or a piped open in a setuid or setgid -script if $ENV{PATH} contains a directory that is writable by the world. +script if C<$ENV{PATH}> contains a directory that is writable by the world. See L<perlsec>. =item Insecure PATH (F) You can't use system(), exec(), or a piped open in a setuid or -setgid script if $ENV{PATH} is derived from data supplied (or +setgid script if C<$ENV{PATH}> is derived from data supplied (or potentially supplied) by the user. The script must set the path to a known value, using trustworthy data. See L<perlsec>. +=item Integer overflow in hex number + +(S) The literal hex number you have specified is too big for your +architecture. On a 32-bit architecture the largest hex literal is +0xFFFFFFFF. + +=item Integer overflow in octal number + +(S) The literal octal number you have specified is too big for your +architecture. On a 32-bit architecture the largest octal literal is +037777777777. + =item Internal inconsistency in tracking vforks (S) A warning peculiar to VMS. Perl keeps track of the number -of times you've called C<fork> and C<exec>, in order to determine -whether the current call to C<exec> should be affect the current +of times you've called C<fork> and C<exec>, to determine +whether the current call to C<exec> should affect the current script or a subprocess (see L<perlvms/exec>). Somehow, this count has become scrambled, so Perl is making a guess and treating this C<exec> as a request to terminate the Perl script @@ -1044,6 +1236,17 @@ and execute the specified command. (P) Something went badly wrong in the regular expression parser. +=item internal error: glob failed + +(P) Something went wrong with the external program(s) used for C<glob> +and C<E<lt>*.cE<gt>>. This may mean that your csh (C shell) is +broken. If so, you should change all of the csh-related variables in +config.sh: If you have tcsh, make the variables refer to it as if it +were csh (e.g. C<full_csh='/usr/bin/tcsh'>); otherwise, make them all +empty (except that C<d_csh> should be C<'undef'>) so that Perl will +think csh is missing. In either case, after editing config.sh, run +C<./Configure -S> and rebuild Perl. + =item internal urp in regexp at /%s/ (P) Something went badly awry in the regular expression parser. @@ -1053,6 +1256,19 @@ and execute the specified command. (F) The range specified in a character class had a minimum character greater than the maximum character. See L<perlre>. +=item Invalid conversion in %s: "%s" + +(W) Perl does not understand the given format conversion. +See L<perlfunc/sprintf>. + +=item Invalid type in pack: '%s' + +(F) The given character is not a valid pack type. See L<perlfunc/pack>. + +=item Invalid type in unpack: '%s' + +(F) The given character is not a valid unpack type. See L<perlfunc/unpack>. + =item ioctl is not implemented (F) Your machine apparently doesn't implement ioctl(), which is pretty @@ -1085,20 +1301,10 @@ L<perlfunc/last>. (W) You tried to do a listen on a closed socket. Did you forget to check the return value of your socket() call? See L<perlfunc/listen>. -=item Literal @%s now requires backslash - -(F) It used to be that Perl would try to guess whether you wanted an -array interpolated or a literal @. It did this when the string was -first used at runtime. Now strings are parsed at compile time, and -ambiguous instances of @ must be disambiguated, either by putting a -backslash to indicate a literal, or by declaring (or using) the array -within the program before the string (lexically). (Someday it will simply -assume that an unbackslashed @ interpolates an array.) - =item Method for operation %s not found in package %s during blessing (F) An attempt was made to specify an entry in an overloading table that -doesn't somehow point to a valid method. See L<perlovl>. +doesn't resolve to a valid subroutine. See L<overload>. =item Might be a runaway multi-line %s string starting on line %d @@ -1112,8 +1318,8 @@ ended earlier on the current line. =item Missing $ on loop variable -(F) Apparently you've been programming in csh too much. Variables are always -mentioned with the $ in Perl, unlike in the shells, where it can vary from +(F) Apparently you've been programming in B<csh> too much. Variables are always +mentioned with the $ in Perl, unlike in the shells, where it can vary from one line to the next. =item Missing comma after first argument to %s function @@ -1141,7 +1347,7 @@ the previous line just because you saw this message. =item Modification of a read-only value attempted (F) You tried, directly or indirectly, to change the value of a -constant. You didn't, of course, try "2 = 1", since the compiler +constant. You didn't, of course, try "2 = 1", because the compiler catches that. But an easy way to do the same thing is: sub mod { $_[0] = 1 } @@ -1149,13 +1355,13 @@ catches that. But an easy way to do the same thing is: Another way is to assign to a substr() that's off the end of the string. -=item Modification of non-creatable array value attempted, subscript %d +=item Modification of noncreatable array value attempted, subscript %d (F) You tried to make an array value spring into existence, and the subscript was probably negative, even counting from end of the array backwards. -=item Modification of non-creatable hash value attempted, subscript "%s" +=item Modification of noncreatable hash value attempted, subscript "%s" (F) You tried to make a hash value spring into existence, and it couldn't be created for some peculiar reason. @@ -1170,8 +1376,15 @@ be created for some peculiar reason. =item Multidimensional syntax %s not supported -(W) Multidimensional arrays aren't written like $foo[1,2,3]. They're written -like $foo[1][2][3], as in C. +(W) Multidimensional arrays aren't written like C<$foo[1,2,3]>. They're written +like C<$foo[1][2][3]>, as in C. + +=item Name "%s::%s" used only once: possible typo + +(W) Typographical errors often show up as unique variable names. +If you had a good reason for having a unique name, then just mention +it again somehow to suppress the message. The C<use vars> pragma is +provided for just this purpose. =item Negative length @@ -1180,10 +1393,10 @@ that is less than 0. This is difficult to imagine. =item nested *?+ in regexp -(F) You can't quantify a quantifier without intervening parens. So +(F) You can't quantify a quantifier without intervening parentheses. So things like ** or +* or ?* are illegal. -Note, however, that the minimal matching quantifiers, *?, +? and ?? appear +Note, however, that the minimal matching quantifiers, C<*?>, C<+?>, and C<??> appear to be nested quantifiers, but aren't. See L<perlre>. =item No #! line @@ -1208,10 +1421,22 @@ See L<perlsec>. allowed to have a comma between that and the following arguments. Otherwise it'd be just another one of the arguments. +One possible cause for this is that you expected to have imported a +constant to your name space with B<use> or B<import> while no such +importing took place, it may for example be that your operating system +does not support that particular constant. Hopefully you did use an +explicit import list for the constants you expect to see, please see +L<perlfunc/use> and L<perlfunc/import>. While an explicit import list +would probably have caught this error earlier it naturally does not +remedy the fact that your operating system still does not support that +constant. Maybe you have a typo in the constants of the symbol import +list of B<use> or B<import> or in the constant name at the line where +this error was triggered? + =item No command into which to pipe on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a '|' at the end of the command line, so it doesn't know whither you +and found a '|' at the end of the command line, so it doesn't know where you want to pipe the output from this command. =item No DB::DB routine defined @@ -1226,7 +1451,7 @@ right. =item No dbm on this machine (P) This is counted as an internal error, because every machine should -supply dbm nowadays, since Perl comes with SDBM. See L<SDBM_File>. +supply dbm nowadays, because Perl comes with SDBM. See L<SDBM_File>. =item No DBsub routine @@ -1235,29 +1460,29 @@ but for some reason the perl5db.pl file (or some facsimile thereof) didn't define a DB::sub routine to be called at the beginning of each ordinary subroutine call. -=item No error file after 2> or 2>> on command line +=item No error file after 2E<gt> or 2E<gt>E<gt> on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a '2>' or a '2>>' on the command line, but can't find the name of the -file to which to write data destined for stderr. +and found a '2E<gt>' or a '2E<gt>E<gt>' on the command line, but can't find +the name of the file to which to write data destined for stderr. -=item No input file after < on command line +=item No input file after E<lt> on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a '<' on the command line, but can't find the name of the file from -which to read data for stdin. +and found a 'E<lt>' on the command line, but can't find the name of the file +from which to read data for stdin. -=item No output file after > on command line +=item No output file after E<gt> on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a lone '>' at the end of the command line, so it doesn't know whither -you wanted to redirect stdout. +and found a lone 'E<gt>' at the end of the command line, so it doesn't know +where you wanted to redirect stdout. -=item No output file after > or >> on command line +=item No output file after E<gt> or E<gt>E<gt> on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a '>' or a '>>' on the command line, but can't find the name of the -file to which to write data destined for stdout. +and found a 'E<gt>' or a 'E<gt>E<gt>' on the command line, but can't find the +name of the file to which to write data destined for stdout. =item No Perl script found in input @@ -1304,7 +1529,7 @@ format, but this indicates you did, and that it didn't exist. =item Not a GLOB reference -(F) Perl was trying to evaluate a reference to a "type glob" (that is, +(F) Perl was trying to evaluate a reference to a "typeglob" (that is, a symbol table entry that looks like C<*foo>), but found a reference to something else instead. You can use the ref() function to find out what kind of ref it really was. See L<perlref>. @@ -1334,10 +1559,10 @@ subroutine), but found a reference to something else instead. You can use the ref() function to find out what kind of ref it really was. See also L<perlref>. -=item Not a subroutine reference in %OVERLOAD +=item Not a subroutine reference in overload table (F) An attempt was made to specify an entry in an overloading table that -doesn't somehow point to a valid subroutine. See L<perlovl>. +doesn't somehow point to a valid subroutine. See L<overload>. =item Not an ARRAY reference @@ -1356,9 +1581,15 @@ See L<perlform>. =item Null filename used -(F) You can't require the null filename, especially since on many machines +(F) You can't require the null filename, especially because on many machines that means the current directory! See L<perlfunc/require>. +=item Null picture in formline + +(F) The first argument to formline must be a valid format picture +specification. It was found to be empty, which probably means you +supplied it an uninitialized value. See L<perlform>. + =item NULL OP IN RUN (P) Some internal routine called run() with a null opcode pointer. @@ -1369,16 +1600,30 @@ that means the current directory! See L<perlfunc/require>. =item NULL regexp argument -(P) The internal pattern matching routines blew it bigtime. +(P) The internal pattern matching routines blew it big time. =item NULL regexp parameter (P) The internal pattern matching routines are out of their gourd. +=item Number too long + +(F) Perl limits the representation of decimal numbers in programs to about +about 250 characters. You've exceeded that length. Future versions of +Perl are likely to eliminate this arbitrary limitation. In the meantime, +try using scientific notation (e.g. "1e6" instead of "1_000_000"). + =item Odd number of elements in hash list (S) You specified an odd number of elements to a hash list, which is odd, -since hash lists come in key/value pairs. +because hash lists come in key/value pairs. + +=item Offset outside string + +(F) You tried to do a read/write/send/recv operation with an offset +pointing outside the buffer. This is difficult to imagine. +The sole exception to this is that C<sysread()>ing past the buffer +will extend the buffer and zero pad the new area. =item oops: oopsAV @@ -1388,10 +1633,13 @@ since hash lists come in key/value pairs. (S) An internal warning that the grammar is screwed up. -=item Operation `%s' %s: no method found, +=item Operation `%s': no method found,%s -(F) An attempt was made to use an entry in an overloading table that -somehow no longer points to a valid method. See L<perlovl>. +(F) An attempt was made to perform an overloaded operation for which +no handler was defined. While some handlers can be autogenerated in +terms of other handlers, there is no default handler for any +operation, unless C<fallback> overloading key is specified to be +true. See L<overload>. =item Operator or semicolon missing before %s @@ -1408,9 +1656,22 @@ but realloc() wouldn't give it more memory, virtual or otherwise. =item Out of memory! -(X) The malloc() function returned 0, indicating there was insufficient +(X|F) The malloc() function returned 0, indicating there was insufficient remaining memory (or virtual memory) to satisfy the request. +The request was judged to be small, so the possibility to trap it +depends on the way perl was compiled. By default it is not trappable. +However, if compiled for this, Perl may use the contents of C<$^M> as +an emergency pool after die()ing with this message. In this case the +error is trappable I<once>. + +=item Out of memory during request for %s + +(F) The malloc() function returned 0, indicating there was insufficient +remaining memory (or virtual memory) to satisfy the request. However, +the request was judged large enough (compile-time default is 64K), so +a possibility to shut down by trapping this error is granted. + =item page overflow (W) A single call to write() produced more lines than can fit on a page. @@ -1450,6 +1711,10 @@ it wasn't an eval context. (P) The internal do_trans() routine was called with invalid operational data. +=item panic: frexp + +(P) The library function frexp() failed, making printf("%f") impossible. + =item panic: goto (P) We popped the context stack to a context with the specified label, @@ -1470,7 +1735,7 @@ it wasn't a block context. =item panic: leave_scope clearsv -(P) A writable lexical variable became readonly somehow within the scope. +(P) A writable lexical variable became read-only somehow within the scope. =item panic: leave_scope inconsistency @@ -1556,7 +1821,7 @@ was string. (P) The lexer got into a bad state while processing a case modifier. -=item Parens missing around "%s" list +=item Pareneses missing around "%s" list (W) You said something like @@ -1589,6 +1854,52 @@ perspective, it's probably not what you intended. (F) Your C compiler uses POSIX getpgrp(), which takes no argument, unlike the BSD version, which takes a pid. +=item Possible attempt to put comments in qw() list + +(W) qw() lists contain items separated by whitespace; as with literal +strings, comment characters are not ignored, but are instead treated +as literal data. (You may have used different delimiters than the +exclamation marks parentheses shown here; braces are also frequently +used.) + +You probably wrote something like this: + + @list = qw( + a # a comment + b # another comment + ); + +when you should have written this: + + @list = qw( + a + b + ); + +If you really want comments, build your list the +old-fashioned way, with quotes and commas: + + @list = ( + 'a', # a comment + 'b', # another comment + ); + +=item Possible attempt to separate words with commas + +(W) qw() lists contain items separated by whitespace; therefore commas +aren't needed to separate the items. (You may have used different +delimiters than the parentheses shown here; braces are also frequently +used.) + +You probably wrote something like this: + + qw! a, b, c !; + +which puts literal commas into some of the list items. Write it without +commas if you don't want them to appear in your data: + + qw! a b c !; + =item Possible memory corruption: %s overflowed 3rd argument (F) An ioctl() or fcntl() returned more than Perl was bargaining for. @@ -1606,9 +1917,10 @@ is now misinterpreted as open(FOO || die); -because of the strict regularization of Perl 5's grammar into unary and -list operators. (The old open was a little of both.) You must put -parens around the filehandle, or use the new "or" operator instead of "||". +because of the strict regularization of Perl 5's grammar into unary +and list operators. (The old open was a little of both.) You must +put parentheses around the filehandle, or use the new "or" operator +instead of "||". =item print on closed filehandle %s @@ -1622,25 +1934,25 @@ Check your logic flow. =item Probable precedence problem on %s -(W) The compiler found a bare word where it expected a conditional, +(W) The compiler found a bareword where it expected a conditional, which often indicates that an || or && was parsed as part of the last argument of the previous construct, for example: open FOO || die; -=item Prototype mismatch: (%s) vs (%s) +=item Prototype mismatch: %s vs %s -(S) The subroutine being defined had a predeclared (forward) declaration -with a different function prototype. +(S) The subroutine being declared or defined had previously been declared +or defined with a different function prototype. -=item Read on closed filehandle <%s> +=item Read on closed filehandle E<lt>%sE<gt> (W) The filehandle you're reading from got itself closed sometime before now. Check your logic flow. =item Reallocation too large: %lx -(F) You can't allocate more than 64K on an MSDOS machine. +(F) You can't allocate more than 64K on an MS-DOS machine. =item Recompile perl with B<-D>DEBUGGING to use B<-D> switch @@ -1669,7 +1981,7 @@ expression compiler gave it. =item regexp too big -(F) The current implementation of regular expression uses shorts as +(F) The current implementation of regular expressions uses shorts as address offsets within a string. Unfortunately this means that if the regular expression compiles to longer than 32767, it'll blow up. Usually when you want a regular expression this big, there is a better @@ -1690,21 +2002,35 @@ shifting or popping (for array variables). See L<perlform>. =item Scalar value @%s[%s] better written as $%s[%s] -(W) You've used an array slice (indicated by @) to select a single value of +(W) You've used an array slice (indicated by @) to select a single element of an array. Generally it's better to ask for a scalar value (indicated by $). -The difference is that $foo[&bar] always behaves like a scalar, both when -assigning to it and when evaluating its argument, while @foo[&bar] behaves +The difference is that C<$foo[&bar]> always behaves like a scalar, both when +assigning to it and when evaluating its argument, while C<@foo[&bar]> behaves like a list when you assign to it, and provides a list context to its -subscript, which can do weird things if you're only expecting one subscript. +subscript, which can do weird things if you're expecting only one subscript. On the other hand, if you were actually hoping to treat the array -element as a list, you need to look into how references work, since +element as a list, you need to look into how references work, because +Perl will not magically convert between scalars and lists for you. See +L<perlref>. + +=item Scalar value @%s{%s} better written as $%s{%s} + +(W) You've used a hash slice (indicated by @) to select a single element of +a hash. Generally it's better to ask for a scalar value (indicated by $). +The difference is that C<$foo{&bar}> always behaves like a scalar, both when +assigning to it and when evaluating its argument, while C<@foo{&bar}> behaves +like a list when you assign to it, and provides a list context to its +subscript, which can do weird things if you're expecting only one subscript. + +On the other hand, if you were actually hoping to treat the hash +element as a list, you need to look into how references work, because Perl will not magically convert between scalars and lists for you. See L<perlref>. =item Script is not setuid/setgid in suidperl -(F) Oddly, the suidperl program was invoked on a script with its setuid +(F) Oddly, the suidperl program was invoked on a script without a setuid or setgid bit set. This doesn't make much sense. =item Search pattern not terminated @@ -1712,10 +2038,10 @@ or setgid bit set. This doesn't make much sense. (F) The lexer couldn't find the final delimiter of a // or m{} construct. Remember that bracketing delimiters count nesting level. -=item seek() on unopened file +=item %sseek() on unopened file -(W) You tried to use the seek() function on a filehandle that was either -never opened or has been closed since. +(W) You tried to use the seek() or sysseek() function on a filehandle that +was either never opened or has since been closed. =item select not implemented @@ -1743,7 +2069,7 @@ Check your logic flow. =item Sequence (?#... not terminated (F) A regular expression comment must be terminated by a closing -parenthesis. Embedded parens aren't allowed. See L<perlre>. +parenthesis. Embedded parentheses aren't allowed. See L<perlre>. =item Sequence (?%s...) not implemented @@ -1757,34 +2083,44 @@ See L<perlre>. =item Server error -Also known as "500 Server error". This is a CGI error, not a Perl -error. You need to make sure your script is executable, is accessible -by the user CGI is running the script under (which is probably not -the user account you tested it under), does not rely on any environment -variables (like PATH) from the user it isn't running under, and isn't -in a location where the CGI server can't find it, basically, more or less. +Also known as "500 Server error". + +B<This is a CGI error, not a Perl error>. + +You need to make sure your script is executable, is accessible by the user +CGI is running the script under (which is probably not the user account you +tested it under), does not rely on any environment variables (like PATH) +from the user it isn't running under, and isn't in a location where the CGI +server can't find it, basically, more or less. Please see the following +for more information: + + http://www.perl.com/perl/faq/idiots-guide.html + http://www.perl.com/perl/faq/perl-cgi-faq.html + ftp://rtfm.mit.edu/pub/usenet/news.answers/www/cgi-faq + http://hoohoo.ncsa.uiuc.edu/cgi/interface.html + http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html =item setegid() not implemented -(F) You tried to assign to $), and your operating system doesn't support +(F) You tried to assign to C<$)>, and your operating system doesn't support the setegid() system call (or equivalent), or at least Configure didn't think so. =item seteuid() not implemented -(F) You tried to assign to $>, and your operating system doesn't support +(F) You tried to assign to C<$E<gt>>, and your operating system doesn't support the seteuid() system call (or equivalent), or at least Configure didn't think so. =item setrgid() not implemented -(F) You tried to assign to $(, and your operating system doesn't support +(F) You tried to assign to C<$(>, and your operating system doesn't support the setrgid() system call (or equivalent), or at least Configure didn't think so. =item setruid() not implemented -(F) You tried to assign to $<, and your operating system doesn't support +(F) You tried to assign to C<$<lt>>, and your operating system doesn't support the setruid() system call (or equivalent), or at least Configure didn't think so. @@ -1801,7 +2137,7 @@ because the world might have written on it already. (W) You tried to do a shutdown on a closed socket. Seems a bit superfluous. -=item SIG%s handler "%s" not defined. +=item SIG%s handler "%s" not defined (W) The signal handler named in %SIG doesn't, in fact, exist. Perhaps you put it into the wrong package? @@ -1828,10 +2164,10 @@ or less than one element. See L<perlfunc/sort>. more times than there are characters of input, which is what happened.) See L<perlfunc/split>. -=item Stat on unopened file <%s> +=item Stat on unopened file E<lt>%sE<gt> (W) You tried to use the stat() function (or an equivalent file test) -on a filehandle that was either never opened or has been closed since. +on a filehandle that was either never opened or has since been closed. =item Statement unlikely to be reached @@ -1841,6 +2177,12 @@ there was a failure. You probably wanted to use system() instead, which does return. To suppress this warning, put the exec() in a block by itself. +=item Stub found while resolving method `%s' overloading `%s' in package `%s' + +(P) Overloading resolution over @ISA tree may be broken by importation stubs. +Stubs should never be implicitely created, but explicit calls to C<can> +may break this. + =item Subroutine %s redefined (W) You redefined a subroutine. To suppress this warning, say @@ -1854,8 +2196,8 @@ by itself. (P) The substitution was looping infinitely. (Obviously, a substitution shouldn't iterate more times than there are characters of -input, which is what happened.) See the discussion of substitution in -L<perlop/"Quote and Quotelike Operators">. +input, which is what happened.) See the discussion of substitution in +L<perlop/"Quote and Quote-like Operators">. =item Substitution pattern not terminated @@ -1873,7 +2215,7 @@ construct. Remember that bracketing delimiters count nesting level. That is, the absolute value of the offset was larger than the length of the string. See L<perlfunc/substr>. -=item suidperl is no longer needed since... +=item suidperl is no longer needed since %s (F) Your Perl was compiled with B<-D>SETUID_SCRIPTS_ARE_SECURE_NOW, but a version of the setuid emulator somehow got run anyway. @@ -1893,7 +2235,7 @@ Often there will be another error message associated with the syntax error giving more information. (Sometimes it helps to turn on B<-w>.) The error message itself often tells you where it was in the line when it decided to give up. Sometimes the actual error is several tokens -before this, since Perl is good at understanding random input. +before this, because Perl is good at understanding random input. Occasionally the line number may be misleading, and once in a blue moon the only way to figure out what's triggering the error is to call C<perl -c> repeatedly, chopping away half the program each time to see @@ -1901,13 +2243,13 @@ if the error went away. Sort of the cybernetic version of S<20 questions>. =item syntax error at line %d: `%s' unexpected -(A) You've accidentally run your script through the Bourne shell -instead of Perl. Check the <#!> line, or manually feed your script +(A) You've accidentally run your script through the Bourne shell +instead of Perl. Check the #! line, or manually feed your script into Perl yourself. =item System V IPC is not implemented on this machine -(F) You tried to do something with a function beginning with "sem", "shm" +(F) You tried to do something with a function beginning with "sem", "shm", or "msg". See L<perlfunc/semctl>, for example. =item Syswrite on closed filehandle @@ -1915,20 +2257,25 @@ or "msg". See L<perlfunc/semctl>, for example. (W) The filehandle you're writing to got itself closed sometime before now. Check your logic flow. +=item Target of goto is too deeply nested + +(F) You tried to use C<goto> to reach a label that was too deeply +nested for Perl to reach. Perl is doing you a favor by refusing. + =item tell() on unopened file (W) You tried to use the tell() function on a filehandle that was either -never opened or has been closed since. +never opened or has since been closed. -=item Test on unopened file <%s> +=item Test on unopened file E<lt>%sE<gt> (W) You tried to invoke a file test operator on a filehandle that isn't open. Check your logic. See also L<perlfunc/-X>. =item That use of $[ is unsupported -(F) Assignment to $[ is now strictly circumscribed, and interpreted as -a compiler directive. You may only say one of +(F) Assignment to C<$[> is now strictly circumscribed, and interpreted as +a compiler directive. You may say only one of $[ = 0; $[ = 1; @@ -1945,11 +2292,11 @@ out from under another module inadvertently. See L<perlvar/$[>. The function indicated isn't implemented on this architecture, according to the probings of Configure. -=item The crypt() function is unimplemented due to excessive paranoia. +=item The crypt() function is unimplemented due to excessive paranoia (F) Configure couldn't find the crypt() function on your machine, probably because your vendor didn't supply it, probably because they -think the U.S. Govermnment thinks it's a secret, or at least that they +think the U.S. Government thinks it's a secret, or at least that they will continue to pretend that it is. And if you quote me on that, I will deny it. @@ -1969,17 +2316,39 @@ you're not running on Unix. (F) There has to be at least one argument to syscall() to specify the system call to call, silly dilly. +=item Too late for "B<-T>" option + +(X) The #! line (or local equivalent) in a Perl script contains the +B<-T> option, but Perl was not invoked with B<-T> in its command line. +This is an error because, by the time Perl discovers a B<-T> in a +script, it's too late to properly taint everything from the environment. +So Perl gives up. + +If the Perl script is being executed as a command using the #! +mechanism (or its local equivalent), this error can usually be fixed +by editing the #! line so that the B<-T> option is a part of Perl's +first argument: e.g. change C<perl -n -T> to C<perl -T -n>. + +If the Perl script is being executed as C<perl scriptname>, then the +B<-T> option must appear on the command line: C<perl -T scriptname>. + +=item Too late for "-%s" option + +(X) The #! line (or local equivalent) in a Perl script contains the +B<-M> or B<-m> option. This is an error because B<-M> and B<-m> options +are not intended for use inside scripts. Use the C<use> pragma instead. + =item Too many ('s =item Too many )'s (A) You've accidentally run your script through B<csh> instead -of Perl. Check the <#!> line, or manually feed your script -into Perl yourself. +of Perl. Check the #! line, or manually feed your script into +Perl yourself. =item Too many args to syscall -(F) Perl only supports a maximum of 14 args to syscall(). +(F) Perl supports a maximum of only 14 args to syscall(). =item Too many arguments for %s @@ -2008,13 +2377,13 @@ Configure knows about. =item Type of arg %d to %s must be %s (not %s) (F) This function requires the argument in that position to be of a -certain type. Arrays must be @NAME or @{EXPR}. Hashes must be -%NAME or %{EXPR}. No implicit dereferencing is allowed--use the +certain type. Arrays must be @NAME or C<@{EXPR}>. Hashes must be +%NAME or C<%{EXPR}>. No implicit dereferencing is allowed--use the {EXPR} forms as an explicit dereference. See L<perlref>. =item umask: argument is missing initial 0 -(W) A umask of 222 is incorrect. It should be 0222, since octal literals +(W) A umask of 222 is incorrect. It should be 0222, because octal literals always start with 0 in Perl, as in C. =item Unable to create sub named "%s" @@ -2078,13 +2447,13 @@ representative, who probably put it there in the first place. =item Unknown BYTEORDER -(F) There are no byteswapping functions for a machine with this byte order. +(F) There are no byte-swapping functions for a machine with this byte order. =item unmatched () in regexp (F) Unbackslashed parentheses must always be balanced in regular expressions. If you're a vi user, the % key is valuable for finding -the matching paren. See L<perlre>. +the matching parenthesis. See L<perlre>. =item Unmatched right bracket @@ -2101,14 +2470,15 @@ See L<perlre>. =item Unquoted string "%s" may clash with future reserved word -(W) You used a bare word that might someday be claimed as a reserved word. +(W) You used a bareword that might someday be claimed as a reserved word. It's best to put such a word in quotes, or capitalize it somehow, or insert an underbar into it. You might also declare it as a subroutine. -=item Unrecognized character \%03o ignored +=item Unrecognized character %s -(S) A garbage character was found in the input, and ignored, in case it's -a weird control character on an EBCDIC machine, or some such. +(F) The Perl parser has no idea what to do with the specified character +in your Perl script (or eval). Perhaps you tried to run a compressed +script, a binary program, or a directory as a Perl program. =item Unrecognized signal name "%s" @@ -2125,12 +2495,20 @@ supplying the bad switch on your behalf.) (W) A file operation was attempted on a filename, and that operation failed, PROBABLY because the filename contained a newline, PROBABLY -because you forgot to chop() or chomp() it off. See L<perlfunc/chop>. +because you forgot to chop() or chomp() it off. See L<perlfunc/chomp>. =item Unsupported directory function "%s" called (F) Your machine doesn't support opendir() and readdir(). +=item Unsupported function fork + +(F) Your version of executable does not support forking. + +Note that under some systems, like OS/2, there may be different flavors of +Perl executables, some of which may support fork, some not. Try changing +the name you call Perl by to C<perl_>, C<perl__>, and so on. + =item Unsupported function %s (F) This machines doesn't implement the indicated function, apparently. @@ -2141,40 +2519,46 @@ At least, Configure doesn't think so. (F) Your machine doesn't support the Berkeley socket mechanism, or at least that's what Configure thought. -=item Unterminated <> operator +=item Unterminated E<lt>E<gt> operator (F) The lexer saw a left angle bracket in a place where it was expecting a term, so it's looking for the corresponding right angle bracket, and not finding it. Chances are you left some needed parentheses out earlier in the line, and you really meant a "less than". +=item Use of "$$<digit>" to mean "${$}<digit>" is deprecated + +(D) Perl versions before 5.004 misinterpreted any type marker followed +by "$" and a digit. For example, "$$0" was incorrectly taken to mean +"${$}0" instead of "${$0}". This bug is (mostly) fixed in Perl 5.004. + +However, the developers of Perl 5.004 could not fix this bug completely, +because at least two widely-used modules depend on the old meaning of +"$$0" in a string. So Perl 5.004 still interprets "$$<digit>" in the +old (broken) way inside strings; but it generates this message as a +warning. And in Perl 5.005, this special treatment will cease. + =item Use of $# is deprecated -(D) This was an ill-advised attempt to emulate a poorly defined awk feature. +(D) This was an ill-advised attempt to emulate a poorly defined B<awk> feature. Use an explicit printf() or sprintf() instead. =item Use of $* is deprecated -(D) This variable magically turned on multiline pattern matching, both for +(D) This variable magically turned on multi-line pattern matching, both for you and for any luckless subroutine that you happen to call. You should use the new C<//m> and C<//s> modifiers now to do that without the dangerous action-at-a-distance effects of C<$*>. =item Use of %s in printf format not supported -(F) You attempted to use a feature of printf that is accessible only -from C. This usually means there's a better way to do it in Perl. - -=item Use of %s is deprecated - -(D) The construct indicated is no longer recommended for use, generally -because there's a better way to do it, and also because the old way has -bad side effects. +(F) You attempted to use a feature of printf that is accessible from +only C. This usually means there's a better way to do it in Perl. -=item Use of bare << to mean <<"" is deprecated +=item Use of bare E<lt>E<lt> to mean E<lt>E<lt>"" is deprecated (D) You are now encouraged to use the explicitly quoted form if you -wish to use a blank line as the terminator of the here-document. +wish to use an empty line as the terminator of the here-document. =item Use of implicit split to @_ is deprecated @@ -2182,6 +2566,30 @@ wish to use a blank line as the terminator of the here-document. subroutine's argument list, so it's better if you assign the results of a split() explicitly to an array (or list). +=item Use of inherited AUTOLOAD for non-method %s() is deprecated + +(D) As an (ahem) accidental feature, C<AUTOLOAD> subroutines are looked +up as methods (using the C<@ISA> hierarchy) even when the subroutines to +be autoloaded were called as plain functions (e.g. C<Foo::bar()>), not +as methods (e.g. C<Foo->bar()> or C<$obj->bar()>). + +This bug will be rectified in Perl 5.005, which will use method lookup +only for methods' C<AUTOLOAD>s. However, there is a significant base +of existing code that may be using the old behavior. So, as an +interim step, Perl 5.004 issues an optional warning when non-methods +use inherited C<AUTOLOAD>s. + +The simple rule is: Inheritance will not work when autoloading +non-methods. The simple fix for old code is: In any module that used to +depend on inheriting C<AUTOLOAD> for non-methods from a base class named +C<BaseClass>, execute C<*AUTOLOAD = \&BaseClass::AUTOLOAD> during startup. + +=item Use of %s is deprecated + +(D) The construct indicated is no longer recommended for use, generally +because there's a better way to do it, and also because the old way has +bad side effects. + =item Use of uninitialized value (W) An undefined value was used as if it were already defined. It was @@ -2219,7 +2627,20 @@ a scalar context, the comma is treated like C's comma operator, which throws away the left argument, which is not what you want. See L<perlref> for more on this. -=item Variable "%s" is not exported +=item untie attempted while %d inner references still exist + +(W) A copy of the object returned from C<tie> (or C<tied>) was still +valid when C<untie> was called. + +=item Value of %s can be "0"; test with defined() + +(W) In a conditional expression, you used <HANDLE>, <*> (glob), C<each()>, +or C<readdir()> as a boolean value. Each of these constructs can return a +value of "0"; that would make the conditional expression false, which is +probably not what you intended. When using these constructs in conditional +expressions, test their values with the C<defined> operator. + +=item Variable "%s" is not imported%s (F) While "use strict" in effect, you referred to a global variable that you apparently thought was imported from another module, because @@ -2227,18 +2648,67 @@ something else of the same name (usually a subroutine) is exported by that module. It usually means you put the wrong funny character on the front of your variable. -=item Variable syntax. +=item Variable "%s" may be unavailable + +(W) An inner (nested) I<anonymous> subroutine is inside a I<named> +subroutine, and outside that is another subroutine; and the anonymous +(innermost) subroutine is referencing a lexical variable defined in +the outermost subroutine. For example: + + sub outermost { my $a; sub middle { sub { $a } } } + +If the anonymous subroutine is called or referenced (directly or +indirectly) from the outermost subroutine, it will share the variable +as you would expect. But if the anonymous subroutine is called or +referenced when the outermost subroutine is not active, it will see +the value of the shared variable as it was before and during the +*first* call to the outermost subroutine, which is probably not what +you want. + +In these circumstances, it is usually best to make the middle +subroutine anonymous, using the C<sub {}> syntax. Perl has specific +support for shared variables in nested anonymous subroutines; a named +subroutine in between interferes with this feature. + +=item Variable "%s" will not stay shared + +(W) An inner (nested) I<named> subroutine is referencing a lexical +variable defined in an outer subroutine. + +When the inner subroutine is called, it will probably see the value of +the outer subroutine's variable as it was before and during the +*first* call to the outer subroutine; in this case, after the first +call to the outer subroutine is complete, the inner and outer +subroutines will no longer share a common value for the variable. In +other words, the variable will no longer be shared. + +Furthermore, if the outer subroutine is anonymous and references a +lexical variable outside itself, then the outer and inner subroutines +will I<never> share the given variable. + +This problem can usually be solved by making the inner subroutine +anonymous, using the C<sub {}> syntax. When inner anonymous subs that +reference variables in outer subroutines are called or referenced, +they are automatically rebound to the current values of such +variables. + +=item Variable syntax (A) You've accidentally run your script through B<csh> instead -of Perl. Check the <#!> line, or manually feed your script -into Perl yourself. +of Perl. Check the #! line, or manually feed your script into +Perl yourself. -=item Warning: unable to close filehandle %s properly. +=item Warning: something's wrong -(S) The implicit close() done by an open() got an error indication on the -close(0. This usually indicates your filesystem ran out of disk space. +(W) You passed warn() an empty string (the equivalent of C<warn "">) or +you called it with no args and C<$_> was empty. -=item Warning: Use of "%s" without parens is ambiguous +=item Warning: unable to close filehandle %s properly + +(S) The implicit close() done by an open() got an error indication on the +close(). This usually indicates your file system ran out of disk space. + +=item Warning: Use of "%s" without parentheses is ambiguous (S) You wrote a unary operator followed by something that looks like a binary operator that could also have been interpreted as a term or @@ -2255,7 +2725,7 @@ but in actual fact, you got rand(+5); -So put in parens to say what you really mean. +So put in parentheses to say what you really mean. =item Write on closed filehandle @@ -2288,7 +2758,7 @@ Use a filename instead. =item YOU HAVEN'T DISABLED SET-ID SCRIPTS IN THE KERNEL YET! -(F) And you probably never will, since you probably don't have the +(F) And you probably never will, because you probably don't have the sources to your kernel, and your vendor probably doesn't give a rip about what you want. Your best bet is to use the wrapsuid script in the eg directory to put a setuid C wrapper around your script. @@ -2309,18 +2779,18 @@ See L<perlfunc/getsockopt>. =item \1 better written as $1 (W) Outside of patterns, backreferences live on as variables. The use -of backslashes is grandfathered on the righthand side of a +of backslashes is grandfathered on the right-hand side of a substitution, but stylistically it's better to use the variable form because other Perl programmers will expect it, and it works better if there are more than 9 backreferences. -=item '|' and '<' may not both be specified on command line +=item '|' and 'E<lt>' may not both be specified on command line (F) An error peculiar to VMS. Perl does its own command line redirection, and found that STDIN was a pipe, and that you also tried to redirect STDIN using -'<'. Only one STDIN stream to a customer, please. +'E<lt>'. Only one STDIN stream to a customer, please. -=item '|' and '>' may not both be specified on command line +=item '|' and 'E<gt>' may not both be specified on command line (F) An error peculiar to VMS. Perl does its own command line redirection, and thinks you tried to redirect stdout both to a file and into a pipe to another @@ -2335,5 +2805,38 @@ streams, such as } close OUT; +=item Got an error from DosAllocMem + +(P) An error peculiar to OS/2. Most probably you're using an obsolete +version of Perl, and this should not happen anyway. + +=item Malformed PERLLIB_PREFIX + +(F) An error peculiar to OS/2. PERLLIB_PREFIX should be of the form + + prefix1;prefix2 + +or + + prefix1 prefix2 + +with nonempty prefix1 and prefix2. If C<prefix1> is indeed a prefix +of a builtin library search path, prefix2 is substituted. The error +may appear if components are not found, or are too long. See +"PERLLIB_PREFIX" in F<README.os2>. + +=item PERL_SH_DIR too long + +(F) An error peculiar to OS/2. PERL_SH_DIR is the directory to find the +C<sh>-shell in. See "PERL_SH_DIR" in F<README.os2>. + +=item Process terminated by SIG%s + +(W) This is a standard message issued by OS/2 applications, while *nix +applications die in silence. It is considered a feature of the OS/2 +port. One can easily disable this by appropriate sighandlers, see +L<perlipc/"Signals">. See also "Process terminated by SIGTERM/SIGINT" +in F<README.os2>. + =back diff --git a/pod/perldsc.pod b/pod/perldsc.pod index 7e18e7405c..48750dd5de 100644 --- a/pod/perldsc.pod +++ b/pod/perldsc.pod @@ -21,7 +21,7 @@ with three dimensions! for $x (1 .. 10) { for $y (1 .. 10) { for $z (1 .. 10) { - $LoL[$x][$y][$z] = + $LoL[$x][$y][$z] = $x ** $y + $z; } } @@ -30,25 +30,25 @@ with three dimensions! Alas, however simple this may appear, underneath it's a much more elaborate construct than meets the eye! -How do you print it out? Why can't you just say C<print @LoL>? How do +How do you print it out? Why can't you say just C<print @LoL>? How do you sort it? How can you pass it to a function or get one of these back from a function? Is is an object? Can you save it to disk to read back later? How do you access whole rows or columns of that matrix? Do -all the values have to be numeric? +all the values have to be numeric? As you see, it's quite easy to become confused. While some small portion of the blame for this can be attributed to the reference-based implementation, it's really more due to a lack of existing documentation with examples designed for the beginner. -This document is meant to be a detailed but understandable treatment of -the many different sorts of data structures you might want to develop. It should -also serve as a cookbook of examples. That way, when you need to create one of these -complex data structures, you can just pinch, pilfer, or purloin -a drop-in example from here. +This document is meant to be a detailed but understandable treatment of the +many different sorts of data structures you might want to develop. It +should also serve as a cookbook of examples. That way, when you need to +create one of these complex data structures, you can just pinch, pilfer, or +purloin a drop-in example from here. Let's look at each of these possible constructs in detail. There are separate -documents on each of the following: +sections on each of the following: =over 5 @@ -62,36 +62,32 @@ documents on each of the following: =item * more elaborate constructs -=item * recursive and self-referential data structures - -=item * objects - =back But for now, let's look at some of the general issues common to all -of these types of data structures. +of these types of data structures. =head1 REFERENCES The most important thing to understand about all data structures in Perl -- including multidimensional arrays--is that even though they might appear otherwise, Perl C<@ARRAY>s and C<%HASH>es are all internally -one-dimensional. They can only hold scalar values (meaning a string, +one-dimensional. They can hold only scalar values (meaning a string, number, or a reference). They cannot directly contain other arrays or hashes, but instead contain I<references> to other arrays or hashes. -You can't use a reference to a array or hash in quite the same way that -you would a real array or hash. For C or C++ programmers unused to distinguishing -between arrays and pointers to the same, this can be confusing. If so, -just think of it as the difference between a structure and a pointer to a -structure. +You can't use a reference to a array or hash in quite the same way that you +would a real array or hash. For C or C++ programmers unused to +distinguishing between arrays and pointers to the same, this can be +confusing. If so, just think of it as the difference between a structure +and a pointer to a structure. You can (and should) read more about references in the perlref(1) man page. Briefly, references are rather like pointers that know what they point to. (Objects are also a kind of reference, but we won't be needing -them right away--if ever.) That means that when you have something that -looks to you like an access to two-or-more-dimensional array and/or hash, -that what's really going on is that in all these cases, the base type is +them right away--if ever.) This means that when you have something which +looks to you like an access to a two-or-more-dimensional array and/or hash, +what's really going on is that the base type is merely a one-dimensional entity that contains references to the next level. It's just that you can I<use> it as though it were a two-dimensional one. This is actually the way almost all C @@ -102,7 +98,7 @@ multidimensional arrays work as well. $hash{string}[7] # hash of arrays $hash{string}{'another string'} # hash of hashes -Now, because the top level only contains references, if you try to print +Now, because the top level contains only references, if you try to print out your array in with a simple print() function, you'll get something that doesn't look very nice, like this: @@ -130,7 +126,7 @@ of a nested array: for $i (1..10) { @list = somefunc($i); $LoL[$i] = @list; # WRONG! - } + } That's just the simple case of assigning a list to a scalar and getting its element count. If that's what you really and truly want, then you @@ -138,8 +134,8 @@ might do well to consider being a tad more explicit about it, like this: for $i (1..10) { @list = somefunc($i); - $counts[$i] = scalar @list; - } + $counts[$i] = scalar @list; + } Here's the case of taking a reference to the same memory location again and again: @@ -147,9 +143,9 @@ again and again: for $i (1..10) { @list = somefunc($i); $LoL[$i] = \@list; # WRONG! - } + } -So, just what's the big problem with that? It looks right, doesn't it? +So, what's the big problem with that? It looks right, doesn't it? After all, I just told you that you need an array of references, so by golly, you've made me one! @@ -164,29 +160,29 @@ the following C program: rp = getpwnam("root"); dp = getpwnam("daemon"); - printf("daemon name is %s\nroot name is %s\n", + printf("daemon name is %s\nroot name is %s\n", dp->pw_name, rp->pw_name); } Which will print daemon name is daemon - root name is daemon + root name is daemon The problem is that both C<rp> and C<dp> are pointers to the same location in memory! In C, you'd have to remember to malloc() yourself some new memory. In Perl, you'll want to use the array constructor C<[]> or the hash constructor C<{}> instead. Here's the right way to do the preceding -broken code fragments +broken code fragments: for $i (1..10) { @list = somefunc($i); $LoL[$i] = [ @list ]; - } + } The square brackets make a reference to a new array with a I<copy> of what's in @list at the time of the assignment. This is what -you want. +you want. Note that this will produce something similar, but it's much harder to read: @@ -194,7 +190,7 @@ much harder to read: for $i (1..10) { @list = 0 .. $i; @{$LoL[$i]} = @list; - } + } Is it the same? Well, maybe so--and maybe not. The subtle difference is that when you assign something in square brackets, you know for sure @@ -218,9 +214,9 @@ something is "interesting", that rather than meaning "intriguing", they're disturbingly more apt to mean that it's "annoying", "difficult", or both? :-) -So just remember to always use the array or hash constructors with C<[]> +So just remember always to use the array or hash constructors with C<[]> or C<{}>, and you'll be fine, although it's not always optimally -efficient. +efficient. Surprisingly, the following dangerous-looking construct will actually work out fine: @@ -228,7 +224,7 @@ actually work out fine: for $i (1..10) { my @list = somefunc($i); $LoL[$i] = \@list; - } + } That's because my() is more of a run-time statement than it is a compile-time declaration I<per se>. This means that the my() variable is @@ -251,7 +247,7 @@ In summary: @{ $LoL[$i] } = @list; # way too tricky for most programmers -=head1 CAVEAT ON PRECEDENCE +=head1 CAVEAT ON PRECEDENCE Speaking of things like C<@{$LoL[$i]}>, the following are actually the same thing: @@ -290,29 +286,24 @@ this: my $listref = [ [ "fred", "barney", "pebbles", "bambam", "dino", ], [ "homer", "bart", "marge", "maggie", ], - [ "george", "jane", "alroy", "judy", ], + [ "george", "jane", "elroy", "judy", ], ]; print $listref[2][2]; The compiler would immediately flag that as an error I<at compile time>, because you were accidentally accessing C<@listref>, an undeclared -variable, and it would thereby remind you to instead write: +variable, and it would thereby remind you to write instead: print $listref->[2][2] =head1 DEBUGGING -The standard Perl debugger in 5.001 doesn't do a very nice job of -printing out complex data structures. However, the perl5db that -Ilya Zakharevich E<lt>F<ilya@math.ohio-state.edu>E<gt> -wrote, which is accessible at - - ftp://ftp.perl.com/pub/perl/ext/perl5db-kit-0.9.tar.gz - -has several new features, including command line editing as well -as the C<x> command to dump out complex data structures. For example, -given the assignment to $LoL above, here's the debugger output: +Before version 5.002, the standard Perl debugger didn't do a very nice job of +printing out complex data structures. With 5.002 or above, the +debugger includes several new features, including command line editing as +well as the C<x> command to dump out complex data structures. For +example, given the assignment to $LoL above, here's the debugger output: DB<1> X $LoL $LoL = ARRAY(0x13b5a0) @@ -330,15 +321,15 @@ given the assignment to $LoL above, here's the debugger output: 2 ARRAY(0x13b540) 0 'george' 1 'jane' - 2 'alroy' + 2 'elroy' 3 'judy' -There's also a lower-case B<x> command which is nearly the same. +There's also a lowercase B<x> command which is nearly the same. =head1 CODE EXAMPLES -Presented with little comment (these will get their own man pages someday) -here are short code examples illustrating access of various +Presented with little comment (these will get their own manpages someday) +here are short code examples illustrating access of various types of data structures. =head1 LISTS OF LISTS @@ -356,18 +347,18 @@ types of data structures. # reading from file while ( <> ) { push @LoL, [ split ]; - + } # calling a function for $i ( 1 .. 10 ) { $LoL[$i] = [ somefunc($i) ]; - + } # using temp vars for $i ( 1 .. 10 ) { @tmp = somefunc($i); $LoL[$i] = [ @tmp ]; - + } # add to an existing row push @{ $LoL[0] }, "wilma", "betty"; @@ -383,28 +374,28 @@ types of data structures. # print the whole thing with refs for $aref ( @LoL ) { print "\t [ @$aref ],\n"; - + } # print the whole thing with indices for $i ( 0 .. $#LoL ) { print "\t [ @{$LoL[$i]} ],\n"; - + } # print the whole thing one at a time for $i ( 0 .. $#LoL ) { - for $j ( 0 .. $#{$LoL[$i]} ) { + for $j ( 0 .. $#{ $LoL[$i] } ) { print "elt $i $j is $LoL[$i][$j]\n"; } - + } =head1 HASHES OF LISTS =head2 Declaration of a HASH OF LISTS %HoL = ( - "flintstones" => [ "fred", "barney" ], - "jetsons" => [ "george", "jane", "elroy" ], - "simpsons" => [ "homer", "marge", "bart" ], + flintstones => [ "fred", "barney" ], + jetsons => [ "george", "jane", "elroy" ], + simpsons => [ "homer", "marge", "bart" ], ); =head2 Generation of a HASH OF LISTS @@ -414,7 +405,7 @@ types of data structures. while ( <> ) { next unless s/^(.*?):\s*//; $HoL{$1} = [ split ]; - + } # reading from file; more temps # flintstones: fred barney wilma dino @@ -422,18 +413,18 @@ types of data structures. ($who, $rest) = split /:\s*/, $line, 2; @fields = split ' ', $rest; $HoL{$who} = [ @fields ]; - + } # calling a function that returns a list for $group ( "simpsons", "jetsons", "flintstones" ) { $HoL{$group} = [ get_family($group) ]; - + } # likewise, but using temps for $group ( "simpsons", "jetsons", "flintstones" ) { @members = get_family($group); $HoL{$group} = [ @members ]; - + } # append new members to an existing family push @{ $HoL{"flintstones"} }, "wilma", "betty"; @@ -449,24 +440,31 @@ types of data structures. # print the whole thing foreach $family ( keys %HoL ) { print "$family: @{ $HoL{$family} }\n" - + } # print the whole thing with indices foreach $family ( keys %HoL ) { print "family: "; - foreach $i ( 0 .. $#{ $HoL{$family} ) { + foreach $i ( 0 .. $#{ $HoL{$family} } ) { print " $i = $HoL{$family}[$i]"; } print "\n"; - + } # print the whole thing sorted by number of members - foreach $family ( sort { @{$HoL{$b}} <=> @{$HoL{$b}} } keys %HoL ) { + foreach $family ( sort { @{$HoL{$b}} <=> @{$HoL{$a}} } keys %HoL ) { print "$family: @{ $HoL{$family} }\n" + } # print the whole thing sorted by number of members and name - foreach $family ( sort { @{$HoL{$b}} <=> @{$HoL{$a}} } keys %HoL ) { + foreach $family ( sort { + @{$HoL{$b}} <=> @{$HoL{$a}} + || + $a cmp $b + } keys %HoL ) + { print "$family: ", join(", ", sort @{ $HoL{$family}), "\n"; + } =head1 LISTS OF HASHES @@ -474,8 +472,8 @@ types of data structures. @LoH = ( { - Lead => "fred", - Friend => "barney", + Lead => "fred", + Friend => "barney", }, { Lead => "george", @@ -500,6 +498,7 @@ types of data structures. $rec->{$key} = $value; } push @LoH, $rec; + } # reading from file @@ -507,30 +506,30 @@ types of data structures. # no temp while ( <> ) { push @LoH, { split /[\s+=]/ }; - + } # calling a function that returns a key,value list, like # "lead","fred","daughter","pebbles" - while ( %fields = getnextpairset() ) + while ( %fields = getnextpairset() ) { push @LoH, { %fields }; - + } # likewise, but using no temp vars while (<>) { push @LoH, { parsepairs($_) }; - + } # add key/value to an element - $LoH[0]{"pet"} = "dino"; - $LoH[2]{"pet"} = "santa's little helper"; + $LoH[0]{pet} = "dino"; + $LoH[2]{pet} = "santa's little helper"; =head2 Access and Printing of a LIST OF HASHES # one element - $LoH[0]{"lead"} = "fred"; + $LoH[0]{lead} = "fred"; # another element - $LoH[1]{"lead"} =~ s/(\w)/\u$1/; + $LoH[1]{lead} =~ s/(\w)/\u$1/; # print the whole thing with refs for $href ( @LoH ) { @@ -539,7 +538,7 @@ types of data structures. print "$role=$href->{$role} "; } print "}\n"; - + } # print the whole thing with indices for $i ( 0 .. $#LoH ) { @@ -548,33 +547,35 @@ types of data structures. print "$role=$LoH[$i]{$role} "; } print "}\n"; - + } # print the whole thing one at a time for $i ( 0 .. $#LoH ) { for $role ( keys %{ $LoH[$i] } ) { print "elt $i $role is $LoH[$i]{$role}\n"; } + } =head1 HASHES OF HASHES =head2 Declaration of a HASH OF HASHES %HoH = ( - "flintstones" => { - "lead" => "fred", - "pal" => "barney", + flintstones => { + lead => "fred", + pal => "barney", }, - "jetsons" => { - "lead" => "george", - "wife" => "jane", - "his boy"=> "elroy", - } - "simpsons" => { - "lead" => "homer", - "wife" => "marge", - "kid" => "bart", - ); + jetsons => { + lead => "george", + wife => "jane", + "his boy" => "elroy", + }, + simpsons => { + lead => "homer", + wife => "marge", + kid => "bart", + }, + ); =head2 Generation of a HASH OF HASHES @@ -599,81 +600,78 @@ types of data structures. ($key, $value) = split /=/, $field; $rec->{$key} = $value; } - - - # calling a function that returns a key,value list, like - # "lead","fred","daughter","pebbles" - while ( %fields = getnextpairset() ) - push @a, { %fields }; - + } # calling a function that returns a key,value hash for $group ( "simpsons", "jetsons", "flintstones" ) { $HoH{$group} = { get_family($group) }; - + } # likewise, but using temps for $group ( "simpsons", "jetsons", "flintstones" ) { %members = get_family($group); $HoH{$group} = { %members }; - + } # append new members to an existing family %new_folks = ( - "wife" => "wilma", - "pet" => "dino"; + wife => "wilma", + pet => "dino"; ); + for $what (keys %new_folks) { $HoH{flintstones}{$what} = $new_folks{$what}; - + } =head2 Access and Printing of a HASH OF HASHES # one element - $HoH{"flintstones"}{"wife"} = "wilma"; + $HoH{flintstones}{wife} = "wilma"; # another element $HoH{simpsons}{lead} =~ s/(\w)/\u$1/; # print the whole thing foreach $family ( keys %HoH ) { - print "$family: "; - for $role ( keys %{ $HoH{$family} } { + print "$family: { "; + for $role ( keys %{ $HoH{$family} } ) { print "$role=$HoH{$family}{$role} "; } print "}\n"; - + } # print the whole thing somewhat sorted foreach $family ( sort keys %HoH ) { - print "$family: "; - for $role ( sort keys %{ $HoH{$family} } { + print "$family: { "; + for $role ( sort keys %{ $HoH{$family} } ) { print "$role=$HoH{$family}{$role} "; } print "}\n"; + } # print the whole thing sorted by number of members - foreach $family ( sort { keys %{$HoH{$b}} <=> keys %{$HoH{$b}} } keys %HoH ) { - print "$family: "; - for $role ( sort keys %{ $HoH{$family} } { + foreach $family ( sort { keys %{$HoH{$b}} <=> keys %{$HoH{$a}} } keys %HoH ) { + print "$family: { "; + for $role ( sort keys %{ $HoH{$family} } ) { print "$role=$HoH{$family}{$role} "; } print "}\n"; - + } # establish a sort order (rank) for each role $i = 0; for ( qw(lead wife son daughter pal pet) ) { $rank{$_} = ++$i } # now print the whole thing sorted by number of members - foreach $family ( sort { keys %{$HoH{$b}} <=> keys %{$HoH{$b}} } keys %HoH ) { - print "$family: "; + foreach $family ( sort { keys %{ $HoH{$b} } <=> keys %{ $HoH{$a} } } keys %HoH ) { + print "$family: { "; # and print these according to rank order - for $role ( sort { $rank{$a} <=> $rank{$b} keys %{ $HoH{$family} } { + for $role ( sort { $rank{$a} <=> $rank{$b} } keys %{ $HoH{$family} } ) { print "$role=$HoH{$family}{$role} "; } print "}\n"; + } =head1 MORE ELABORATE RECORDS @@ -684,48 +682,48 @@ Here's a sample showing how to create and use a record whose fields are of many different sorts: $rec = { - STRING => $string, - LIST => [ @old_values ], - LOOKUP => { %some_table }, - FUNC => \&some_function, - FANON => sub { $_[0] ** $_[1] }, - FH => \*STDOUT, + TEXT => $string, + SEQUENCE => [ @old_values ], + LOOKUP => { %some_table }, + THATCODE => \&some_function, + THISCODE => sub { $_[0] ** $_[1] }, + HANDLE => \*STDOUT, }; - print $rec->{STRING}; + print $rec->{TEXT}; print $rec->{LIST}[0]; - $last = pop @ { $rec->{LIST} }; + $last = pop @ { $rec->{SEQUENCE} }; print $rec->{LOOKUP}{"key"}; ($first_k, $first_v) = each %{ $rec->{LOOKUP} }; - $answer = &{ $rec->{FUNC} }($arg); - $answer = &{ $rec->{FANON} }($arg1, $arg2); + $answer = $rec->{THATCODE}->($arg); + $answer = $rec->{THISCODE}->($arg1, $arg2); # careful of extra block braces on fh ref - print { $rec->{FH} } "a string\n"; + print { $rec->{HANDLE} } "a string\n"; use FileHandle; - $rec->{FH}->autoflush(1); - $rec->{FH}->print(" a string\n"); + $rec->{HANDLE}->autoflush(1); + $rec->{HANDLE}->print(" a string\n"); =head2 Declaration of a HASH OF COMPLEX RECORDS %TV = ( - "flintstones" => { + flintstones => { series => "flintstones", - nights => [ qw(monday thursday friday) ]; + nights => [ qw(monday thursday friday) ], members => [ { name => "fred", role => "lead", age => 36, }, { name => "wilma", role => "wife", age => 31, }, - { name => "pebbles", role => "kid", age => 4, }, + { name => "pebbles", role => "kid", age => 4, }, ], }, - "jetsons" => { + jetsons => { series => "jetsons", - nights => [ qw(wednesday saturday) ]; + nights => [ qw(wednesday saturday) ], members => [ { name => "george", role => "lead", age => 41, }, { name => "jane", role => "wife", age => 39, }, @@ -733,9 +731,9 @@ many different sorts: ], }, - "simpsons" => { + simpsons => { series => "simpsons", - nights => [ qw(monday) ]; + nights => [ qw(monday) ], members => [ { name => "homer", role => "lead", age => 34, }, { name => "marge", role => "wife", age => 37, }, @@ -749,7 +747,7 @@ many different sorts: # reading from file # this is most easily done by having the file itself be # in the raw data format as shown above. perl is happy - # to parse complex datastructures if declared as data, so + # to parse complex data structures if declared as data, so # sometimes it's easiest to do that # here's a piece by piece build up @@ -759,7 +757,7 @@ many different sorts: @members = (); # assume this file in field=value syntax - while () { + while (<>) { %fields = split /[\s=]+/; push @members, { %fields }; } @@ -779,7 +777,7 @@ many different sorts: foreach $family (keys %TV) { $rec = $TV{$family}; # temp pointer @kids = (); - for $person ( @{$rec->{members}} ) { + for $person ( @{ $rec->{members} } ) { if ($person->{role} =~ /kid|son|daughter/) { push @kids, $person; } @@ -808,7 +806,7 @@ many different sorts: for $who ( @{ $TV{$family}{members} } ) { print " $who->{name} ($who->{role}), age $who->{age}\n"; } - print "it turns out that $TV{$family}{'lead'} has "; + print "it turns out that $TV{$family}{lead} has "; print scalar ( @{ $TV{$family}{kids} } ), " kids named "; print join (", ", map { $_->{name} } @{ $TV{$family}{kids} } ); print "\n"; @@ -820,18 +818,17 @@ You cannot easily tie a multilevel data structure (such as a hash of hashes) to a dbm file. The first problem is that all but GDBM and Berkeley DB have size limitations, but beyond that, you also have problems with how references are to be represented on disk. One experimental -module that does attempt to partially address this need is the MLDBM -module. Check your nearest CPAN site as described in L<perlmod> for +module that does partially attempt to address this need is the MLDBM +module. Check your nearest CPAN site as described in L<perlmodlib> for source code to MLDBM. =head1 SEE ALSO -L<perlref>, L<perllol>, L<perldata>, L<perlobj> +perlref(1), perllol(1), perldata(1), perlobj(1) =head1 AUTHOR -Tom Christiansen E<lt>F<tchrist@perl.com>E<gt> - -Last update: -Tue Dec 12 09:20:26 MST 1995 +Tom Christiansen <F<tchrist@perl.com>> +Last update: +Wed Oct 23 04:57:50 MET DST 1996 diff --git a/pod/perlembed.pod b/pod/perlembed.pod index 2f0e9c30fb..79783a7d30 100644 --- a/pod/perlembed.pod +++ b/pod/perlembed.pod @@ -10,24 +10,24 @@ Do you want to: =over 5 -=item B<Use C from Perl?> +=item B<Use C from Perl?> Read L<perlcall> and L<perlxs>. -=item B<Use a UNIX program from Perl?> +=item B<Use a Unix program from Perl?> -Read about backquotes and L<perlfunc/system> and L<perlfunc/exec>. +Read about back-quotes and about C<system> and C<exec> in L<perlfunc>. -=item B<Use Perl from Perl?> +=item B<Use Perl from Perl?> -Read about L<perlfunc/do> and L<perlfunc/eval> and L<perlmod/use> -and L<perlmod/require>. +Read about L<perlfunc/do> and L<perlfunc/eval> and L<perlfunc/require> +and L<perlfunc/use>. -=item B<Use C from C?> +=item B<Use C from C?> Rethink your design. -=item B<Use Perl from C?> +=item B<Use Perl from C?> Read on... @@ -37,7 +37,7 @@ Read on... L<Compiling your C program> -There's one example in each of the five sections: +There's one example in each of the eight sections: L<Adding a Perl interpreter to your C program> @@ -49,12 +49,22 @@ L<Performing Perl pattern matches and substitutions from your C program> L<Fiddling with the Perl stack from your C program> -This documentation is UNIX specific. +L<Maintaining a persistent interpreter> + +L<Maintaining multiple interpreter instances> + +L<Using Perl modules, which themselves use C libraries, from your C program> + +This documentation is Unix specific; if you have information about how +to embed Perl on other platforms, please send e-mail to <F<orwant@tpj.com>>. =head2 Compiling your C program -Every C program that uses Perl must link in the I<perl library>. +If you have trouble compiling the scripts in this documentation, +you're not alone. The cardinal rule: COMPILE THE PROGRAMS IN EXACTLY +THE SAME WAY THAT YOUR PERL WAS COMPILED. (Sorry for yelling.) +Also, every C program that uses Perl must link in the I<perl library>. What's that, you ask? Perl is itself written in C; the perl library is the collection of compiled C programs that were used to create your perl executable (I</usr/bin/perl> or equivalent). (Corollary: you @@ -63,13 +73,14 @@ your machine, or installed properly--that's why you shouldn't blithely copy Perl executables from machine to machine without also copying the I<lib> directory.) -Your C program will--usually--allocate, "run", and deallocate a -I<PerlInterpreter> object, which is defined in the perl library. +When you use Perl from C, your C program will--usually--allocate, +"run", and deallocate a I<PerlInterpreter> object, which is defined by +the perl library. If your copy of Perl is recent enough to contain this documentation -(5.002 or later), then the perl library (and I<EXTERN.h> and -I<perl.h>, which you'll also need) will -reside in a directory resembling this: +(version 5.002 or later), then the perl library (and I<EXTERN.h> and +I<perl.h>, which you'll also need) will reside in a directory +that looks like this: /usr/local/lib/perl5/your_architecture_here/CORE @@ -83,54 +94,89 @@ or maybe something like Execute this statement for a hint about where to find CORE: - perl -e 'use Config; print $Config{archlib}' + perl -MConfig -e 'print $Config{archlib}' + +Here's how you'd compile the example in the next section, +L<Adding a Perl interpreter to your C program>, on my Linux box: + + % gcc -O2 -Dbool=char -DHAS_BOOL -I/usr/local/include + -I/usr/local/lib/perl5/i586-linux/5.003/CORE + -L/usr/local/lib/perl5/i586-linux/5.003/CORE + -o interp interp.c -lperl -lm + +(That's all one line.) On my DEC Alpha running 5.003_05, the incantation +is a bit different: -Here's how you might compile the example in the next section, -L<Adding a Perl interpreter to your C program>, -on a DEC Alpha running the OSF operating system: + % cc -O2 -Olimit 2900 -DSTANDARD_C -I/usr/local/include + -I/usr/local/lib/perl5/alpha-dec_osf/5.00305/CORE + -L/usr/local/lib/perl5/alpha-dec_osf/5.00305/CORE -L/usr/local/lib + -D__LANGUAGE_C__ -D_NO_PROTO -o interp interp.c -lperl -lm - % cc -o interp interp.c -L/usr/local/lib/perl5/alpha-dec_osf/CORE - -I/usr/local/lib/perl5/alpha-dec_osf/CORE -lperl -lm +How can you figure out what to add? Assuming your Perl is post-5.001, +execute a C<perl -V> command and pay special attention to the "cc" and +"ccflags" information. -You'll have to choose the appropriate compiler (I<cc>, I<gcc>, et al.) and -library directory (I</usr/local/lib/...>) for your machine. If your -compiler complains that certain functions are undefined, or that it -can't locate I<-lperl>, then you need to change the path following the --L. If it complains that it can't find I<EXTERN.h> or I<perl.h>, you need -to change the path following the -I. +You'll have to choose the appropriate compiler (I<cc>, I<gcc>, et al.) for +your machine: C<perl -MConfig -e 'print $Config{cc}'> will tell you what +to use. + +You'll also have to choose the appropriate library directory +(I</usr/local/lib/...>) for your machine. If your compiler complains +that certain functions are undefined, or that it can't locate +I<-lperl>, then you need to change the path following the C<-L>. If it +complains that it can't find I<EXTERN.h> and I<perl.h>, you need to +change the path following the C<-I>. You may have to add extra libraries as well. Which ones? -Perhaps those printed by +Perhaps those printed by + + perl -MConfig -e 'print $Config{libs}' + +Provided your perl binary was properly configured and installed the +B<ExtUtils::Embed> module will determine all of this information for +you: + + % cc -o interp interp.c `perl -MExtUtils::Embed -e ccopts -e ldopts` + +If the B<ExtUtils::Embed> module isn't part of your Perl distribution, +you can retrieve it from +http://www.perl.com/perl/CPAN/modules/by-module/ExtUtils::Embed. (If +this documentation came from your Perl distribution, then you're +running 5.004 or better and you already have it.) - perl -e 'use Config; print $Config{libs}' +The B<ExtUtils::Embed> kit on CPAN also contains all source code for +the examples in this document, tests, additional examples and other +information you may find useful. =head2 Adding a Perl interpreter to your C program In a sense, perl (the C program) is a good example of embedding Perl (the language), so I'll demonstrate embedding with I<miniperlmain.c>, -from the source distribution. Here's a bastardized, non-portable version of -I<miniperlmain.c> containing the essentials of embedding: +from the source distribution. Here's a bastardized, nonportable +version of I<miniperlmain.c> containing the essentials of embedding: - #include <stdio.h> #include <EXTERN.h> /* from the Perl distribution */ #include <perl.h> /* from the Perl distribution */ - + static PerlInterpreter *my_perl; /*** The Perl interpreter ***/ - + int main(int argc, char **argv, char **env) { my_perl = perl_alloc(); perl_construct(my_perl); - perl_parse(my_perl, NULL, argc, argv, env); + perl_parse(my_perl, NULL, argc, argv, (char **)NULL); perl_run(my_perl); perl_destruct(my_perl); perl_free(my_perl); } +Notice that we don't use the C<env> pointer. Normally handed to +C<perl_parse> as its final argument, C<env> here is replaced by +C<NULL>, which means that the current environment will be used. + Now compile this program (I'll call it I<interp.c>) into an executable: - % cc -o interp interp.c -L/usr/local/lib/perl5/alpha-dec_osf/CORE - -I/usr/local/lib/perl5/alpha-dec_osf/CORE -lperl -lm + % cc -o interp interp.c `perl -MExtUtils::Embed -e ccopts -e ldopts` After a successful compilation, you'll be able to use I<interp> just like perl itself: @@ -149,122 +195,117 @@ or You can also read and execute Perl statements from a file while in the midst of your C program, by placing the filename in I<argv[1]> before -calling I<perl_run()>. +calling I<perl_run()>. =head2 Calling a Perl subroutine from your C program -To call individual Perl subroutines, you'll need to remove the call to -I<perl_run()> and replace it with a call to I<perl_call_argv()>. +To call individual Perl subroutines, you can use any of the B<perl_call_*> +functions documented in the L<perlcall> manpage. +In this example we'll use I<perl_call_argv>. That's shown below, in a program I'll call I<showtime.c>. - #include <stdio.h> #include <EXTERN.h> - #include <perl.h> - - static PerlInterpreter *my_perl; - + #include <perl.h> + + static PerlInterpreter *my_perl; + int main(int argc, char **argv, char **env) { + char *args[] = { NULL }; my_perl = perl_alloc(); perl_construct(my_perl); - - perl_parse(my_perl, NULL, argc, argv, env); - - /*** This replaces perl_run() ***/ - perl_call_argv("showtime", G_DISCARD | G_NOARGS, argv); + + perl_parse(my_perl, NULL, argc, argv, NULL); + + /*** skipping perl_run() ***/ + + perl_call_argv("showtime", G_DISCARD | G_NOARGS, args); + perl_destruct(my_perl); perl_free(my_perl); } where I<showtime> is a Perl subroutine that takes no arguments (that's the -I<G_NOARGS>) and for which I'll ignore the return value (that's the +I<G_NOARGS>) and for which I'll ignore the return value (that's the I<G_DISCARD>). Those flags, and others, are discussed in L<perlcall>. I'll define the I<showtime> subroutine in a file called I<showtime.pl>: print "I shan't be printed."; - + sub showtime { print time; } Simple enough. Now compile and run: - % cc -o showtime showtime.c -L/usr/local/lib/perl5/alpha-dec_osf/CORE - -I/usr/local/lib/perl5/alpha-dec_osf/CORE -lperl -lm - + % cc -o showtime showtime.c `perl -MExtUtils::Embed -e ccopts -e ldopts` + % showtime showtime.pl 818284590 yielding the number of seconds that elapsed between January 1, 1970 -(the beginning of the UNIX epoch), and the moment I began writing this +(the beginning of the Unix epoch), and the moment I began writing this sentence. -If you want to pass some arguments to the Perl subroutine, or -you want to access the return value, you'll need to manipulate the -Perl stack, demonstrated in the last section of this document: -L<Fiddling with the Perl stack from your C program> +In this particular case we don't have to call I<perl_run>, but in +general it's considered good practice to ensure proper initialization +of library code, including execution of all object C<DESTROY> methods +and package C<END {}> blocks. -=head2 Evaluating a Perl statement from your C program +If you want to pass arguments to the Perl subroutine, you can add +strings to the C<NULL>-terminated C<args> list passed to +I<perl_call_argv>. For other data types, or to examine return values, +you'll need to manipulate the Perl stack. That's demonstrated in the +last section of this document: L<Fiddling with the Perl stack from +your C program>. -NOTE: This section, and the next, employ some very brittle techniques -for evaluting strings of Perl code. Perl 5.002 contains some nifty -features that enable A Better Way (such as with L<perlguts/perl_eval_sv>). -Look for updates to this document soon. +=head2 Evaluating a Perl statement from your C program -One way to evaluate a Perl string is to define a function (we'll call -ours I<perl_eval()>) that wraps around Perl's L<perlfunc/eval>. +Perl provides two API functions to evaluate pieces of Perl code. +These are L<perlguts/perl_eval_sv()> and L<perlguts/perl_eval_pv()>. -Arguably, this is the only routine you'll ever need to execute -snippets of Perl code from within your C program. Your string can be -as long as you wish; it can contain multiple statements; it can -use L<perlmod/require> or L<perlfunc/do> to include external Perl -files. +Arguably, these are the only routines you'll ever need to execute +snippets of Perl code from within your C program. Your code can be +as long as you wish; it can contain multiple statements; it can employ +L<perlfunc/use>, L<perlfunc/require> and L<perlfunc/do> to include +external Perl files. -Our I<perl_eval()> lets us evaluate individual Perl strings, and then -extract variables for coercion into C types. The following program, +I<perl_eval_pv()> lets us evaluate individual Perl strings, and then +extract variables for coercion into C types. The following program, I<string.c>, executes three Perl strings, extracting an C<int> from the first, a C<float> from the second, and a C<char *> from the third. - #include <stdio.h> #include <EXTERN.h> #include <perl.h> static PerlInterpreter *my_perl; - int perl_eval(char *string) - { - char *argv[2]; - argv[0] = string; - argv[1] = NULL; - perl_call_argv("_eval_", 0, argv); - } - main (int argc, char **argv, char **env) { - char *embedding[] = { "", "-e", "sub _eval_ { eval $_[0] }" }; - STRLEN length; + char *embedding[] = { "", "-e", "0" }; - my_perl = perl_alloc(); - perl_construct( my_perl ); + my_perl = perl_alloc(); + perl_construct( my_perl ); - perl_parse(my_perl, NULL, 3, embedding, env); + perl_parse(my_perl, NULL, 3, embedding, NULL); + perl_run(my_perl); - /** Treat $a as an integer **/ - perl_eval("$a = 3; $a **= 2"); - printf("a = %d\n", SvIV(perl_get_sv("a", FALSE))); + /** Treat $a as an integer **/ + perl_eval_pv("$a = 3; $a **= 2", TRUE); + printf("a = %d\n", SvIV(perl_get_sv("a", FALSE))); - /** Treat $a as a float **/ - perl_eval("$a = 3.14; $a **= 2"); - printf("a = %f\n", SvNV(perl_get_sv("a", FALSE))); + /** Treat $a as a float **/ + perl_eval_pv("$a = 3.14; $a **= 2", TRUE); + printf("a = %f\n", SvNV(perl_get_sv("a", FALSE))); - /** Treat $a as a string **/ - perl_eval("$a = 'rekcaH lreP rehtonA tsuJ'; $a = reverse($a); "); - printf("a = %s\n", SvPV(perl_get_sv("a", FALSE), length)); + /** Treat $a as a string **/ + perl_eval_pv("$a = 'rekcaH lreP rehtonA tsuJ'; $a = reverse($a);", TRUE); + printf("a = %s\n", SvPV(perl_get_sv("a", FALSE), na)); - perl_destruct(my_perl); - perl_free(my_perl); + perl_destruct(my_perl); + perl_free(my_perl); } All of those strange functions with I<sv> in their names help convert Perl scalars to C types. They're described in L<perlguts>. @@ -277,94 +318,96 @@ I<SvPV()> to create a string: a = 9.859600 a = Just Another Perl Hacker +In the example above, we've created a global variable to temporarily +store the computed value of our eval'd expression. It is also +possible and in most cases a better strategy to fetch the return value +from L<perl_eval_pv> instead. Example: + + ... + SV *val = perl_eval_pv("reverse 'rekcaH lreP rehtonA tsuJ'", TRUE); + printf("%s\n", SvPV(val,na)); + ... + +This way, we avoid namespace pollution by not creating global +variables and we've simplified our code as well. =head2 Performing Perl pattern matches and substitutions from your C program -Our I<perl_eval()> lets us evaluate strings of Perl code, so we can +The I<perl_eval_pv()> function lets us evaluate strings of Perl code, so we can define some functions that use it to "specialize" in matches and substitutions: I<match()>, I<substitute()>, and I<matches()>. - char match(char *string, char *pattern); + char match(char *string, char *pattern); -Given a string and a pattern (e.g. "m/clasp/" or "/\b\w*\b/", which in -your program might be represented as C<"/\\b\\w*\\b/">), +Given a string and a pattern (e.g., C<m/clasp/> or C</\b\w*\b/>, which +in your C program might appear as "/\\b\\w*\\b/"), match() returns 1 if the string matches the pattern and 0 otherwise. - int substitute(char *string[], char *pattern); -Given a pointer to a string and an "=~" operation (e.g. "s/bob/robert/g" or -"tr[A-Z][a-z]"), modifies the string according to the operation, -returning the number of substitutions made. +Given a pointer to a string and an C<=~> operation (e.g., +C<s/bob/robert/g> or C<tr[A-Z][a-z]>), substitute() modifies the string +according to the operation, returning the number of substitutions +made. int matches(char *string, char *pattern, char **matches[]); Given a string, a pattern, and a pointer to an empty array of strings, -evaluates C<$string =~ $pattern> in an array context, and fills in -I<matches> with the array elements (allocating memory as it does so), -returning the number of matches found. +matches() evaluates C<$string =~ $pattern> in an array context, and +fills in I<matches> with the array elements (allocating memory as it +does so), returning the number of matches found. -Here's a sample program, I<match.c>, that uses all three: +Here's a sample program, I<match.c>, that uses all three (long lines have +been wrapped here): - #include <stdio.h> #include <EXTERN.h> #include <perl.h> - + static PerlInterpreter *my_perl; - - int eval(char *string) - { - char *argv[2]; - argv[0] = string; - argv[1] = NULL; - perl_call_argv("_eval_", 0, argv); - } - + /** match(string, pattern) - ** - ** Used for matches in a scalar context. - ** - ** Returns 1 if the match was successful; 0 otherwise. - **/ - char match(char *string, char *pattern) + ** + ** Used for matches in a scalar context. + ** + ** Returns 1 if the match was successful; 0 otherwise. + **/ + char match(char *string, char *pattern) { char *command; command = malloc(sizeof(char) * strlen(string) + strlen(pattern) + 37); - sprintf(command, "$string = '%s'; $return = $string =~ %s", - string, pattern); - perl_eval(command); + sprintf(command, "$string = '%s'; $return = $string =~ %s", + string, pattern); + perl_eval_pv(command, TRUE); free(command); return SvIV(perl_get_sv("return", FALSE)); } - /** substitute(string, pattern) - ** - ** Used for =~ operations that modify their left-hand side (s/// and tr///) - ** - ** Returns the number of successful matches, and - ** modifies the input string if there were any. - **/ - int substitute(char *string[], char *pattern) + ** + ** Used for =~ operations that modify their left-hand side (s/// and tr///) + ** + ** Returns the number of successful matches, and + ** modifies the input string if there were any. + **/ + int substitute(char *string[], char *pattern) { char *command; STRLEN length; command = malloc(sizeof(char) * strlen(*string) + strlen(pattern) + 35); - sprintf(command, "$string = '%s'; $ret = ($string =~ %s)", - *string, pattern); - perl_eval(command); + sprintf(command, "$string = '%s'; $ret = ($string =~ %s)", + *string, pattern); + perl_eval_pv(command, TRUE); free(command); *string = SvPV(perl_get_sv("string", FALSE), length); return SvIV(perl_get_sv("ret", FALSE)); } - /** matches(string, pattern, matches) - ** - ** Used for matches in an array context. - ** - ** Returns the number of matches, - ** and fills in **matches with the matching substrings (allocates memory!) - **/ - int matches(char *string, char *pattern, char **matches[]) + ** + ** Used for matches in an array context. + ** + ** Returns the number of matches, + ** and fills in **matches with the matching substrings (allocates memory!) + **/ + int matches(char *string, char *pattern, char **match_list[]) { char *command; SV *current_match; @@ -372,93 +415,93 @@ Here's a sample program, I<match.c>, that uses all three: I32 num_matches; STRLEN length; int i; - command = malloc(sizeof(char) * strlen(string) + strlen(pattern) + 38); - sprintf(command, "$string = '%s'; @array = ($string =~ %s)", - string, pattern); - perl_eval(command); + sprintf(command, "$string = '%s'; @array = ($string =~ %s)", + string, pattern); + perl_eval_pv(command, TRUE); free(command); array = perl_get_av("array", FALSE); num_matches = av_len(array) + 1; /** assume $[ is 0 **/ - *matches = (char **) malloc(sizeof(char *) * num_matches); - for (i = 0; i <= num_matches; i++) { + *match_list = (char **) malloc(sizeof(char *) * num_matches); + for (i = 0; i <= num_matches; i++) { current_match = av_shift(array); - (*matches)[i] = SvPV(current_match, length); + (*match_list)[i] = SvPV(current_match, length); } return num_matches; } - main (int argc, char **argv, char **env) { - char *embedding[] = { "", "-e", "sub _eval_ { eval $_[0] }" }; - char *text, **matches; + char *embedding[] = { "", "-e", "0" }; + char *text, **match_list; int num_matches, i; int j; - my_perl = perl_alloc(); perl_construct( my_perl ); - - perl_parse(my_perl, NULL, 3, embedding, env); - + perl_parse(my_perl, NULL, 3, embedding, NULL); + perl_run(my_perl); + text = (char *) malloc(sizeof(char) * 486); /** A long string follows! **/ - sprintf(text, "%s", "When he is at a convenience store and the bill comes to some amount like 76 cents, Maynard is aware that there is something he *should* do, something that will enable him to get back a quarter, but he has no idea *what*. He fumbles through his red squeezey changepurse and gives the boy three extra pennies with his dollar, hoping that he might luck into the correct amount. The boy gives him back two of his own pennies and then the big shiny quarter that is his prize. -RICHH"); - - if (perl_match(text, "m/quarter/")) /** Does text contain 'quarter'? **/ - printf("perl_match: Text contains the word 'quarter'.\n\n"); - else - printf("perl_match: Text doesn't contain the word 'quarter'.\n\n"); - - if (perl_match(text, "m/eighth/")) /** Does text contain 'eighth'? **/ - printf("perl_match: Text contains the word 'eighth'.\n\n"); - else - printf("perl_match: Text doesn't contain the word 'eighth'.\n\n"); - - /** Match all occurrences of /wi../ **/ - num_matches = perl_matches(text, "m/(wi..)/g", &matches); - - printf("perl_matches: m/(wi..)/g found %d matches...\n", num_matches); - for (i = 0; i < num_matches; i++) - printf("match: %s\n", matches[i]); + sprintf(text, "%s", "When he is at a convenience store and the bill \ + comes to some amount like 76 cents, Maynard is aware that there is \ + something he *should* do, something that will enable him to get back \ + a quarter, but he has no idea *what*. He fumbles through his red \ + squeezey changepurse and gives the boy three extra pennies with his \ + dollar, hoping that he might luck into the correct amount. The boy \ + gives him back two of his own pennies and then the big shiny quarter \ + that is his prize. -RICHH"); + if (match(text, "m/quarter/")) /** Does text contain 'quarter'? **/ + printf("match: Text contains the word 'quarter'.\n\n"); + else + printf("match: Text doesn't contain the word 'quarter'.\n\n"); + if (match(text, "m/eighth/")) /** Does text contain 'eighth'? **/ + printf("match: Text contains the word 'eighth'.\n\n"); + else + printf("match: Text doesn't contain the word 'eighth'.\n\n"); + /** Match all occurrences of /wi../ **/ + num_matches = matches(text, "m/(wi..)/g", &match_list); + printf("matches: m/(wi..)/g found %d matches...\n", num_matches); + for (i = 0; i < num_matches; i++) + printf("match: %s\n", match_list[i]); printf("\n"); for (i = 0; i < num_matches; i++) { - free(matches[i]); + free(match_list[i]); } - free(matches); - - /** Remove all vowels from text **/ - num_matches = perl_substitute(&text, "s/[aeiou]//gi"); + free(match_list); + /** Remove all vowels from text **/ + num_matches = substitute(&text, "s/[aeiou]//gi"); if (num_matches) { - printf("perl_substitute: s/[aeiou]//gi...%d substitutions made.\n", - num_matches); + printf("substitute: s/[aeiou]//gi...%d substitutions made.\n", + num_matches); printf("Now text is: %s\n\n", text); } - - /** Attempt a substitution - if (!perl_substitute(&text, "s/Perl/C/")) { - printf("perl_substitute: s/Perl/C...No substitution made.\n\n"); + /** Attempt a substitution **/ + if (!substitute(&text, "s/Perl/C/")) { + printf("substitute: s/Perl/C...No substitution made.\n\n"); } - free(text); - perl_destruct(my_perl); perl_free(my_perl); } -which produces the output +which produces the output (again, long lines have been wrapped here) - perl_match: Text contains the word 'quarter'. - - perl_match: Text doesn't contain the word 'eighth'. - - perl_matches: m/(wi..)/g found 2 matches... + match: Text contains the word 'quarter'. + + match: Text doesn't contain the word 'eighth'. + + matches: m/(wi..)/g found 2 matches... match: will match: with - - perl_substitute: s/[aeiou]//gi...139 substitutions made. - Now text is: Whn h s t cnvnnc str nd th bll cms t sm mnt lk 76 cnts, Mynrd s wr tht thr s smthng h *shld* d, smthng tht wll nbl hm t gt bck qrtr, bt h hs n d *wht*. H fmbls thrgh hs rd sqzy chngprs nd gvs th by thr xtr pnns wth hs dllr, hpng tht h mght lck nt th crrct mnt. Th by gvs hm bck tw f hs wn pnns nd thn th bg shny qrtr tht s hs prz. -RCHH - - perl_substitute: s/Perl/C...No substitution made. - + + substitute: s/[aeiou]//gi...139 substitutions made. + Now text is: Whn h s t cnvnnc str nd th bll cms t sm mnt lk 76 cnts, + Mynrd s wr tht thr s smthng h *shld* d, smthng tht wll nbl hm t gt bck + qrtr, bt h hs n d *wht*. H fmbls thrgh hs rd sqzy chngprs nd gvs th by + thr xtr pnns wth hs dllr, hpng tht h mght lck nt th crrct mnt. Th by gvs + hm bck tw f hs wn pnns nd thn th bg shny qrtr tht s hs prz. -RCHH + + substitute: s/Perl/C...No substitution made. + =head2 Fiddling with the Perl stack from your C program When trying to explain stacks, most computer science textbooks mumble @@ -467,7 +510,7 @@ thing you pushed on the stack is the first thing you pop off. That'll do for our purposes: your C program will push some arguments onto "the Perl stack", shut its eyes while some magic happens, and then pop the results--the return value of your Perl subroutine--off the stack. - + First you'll need to know how to convert between C types and Perl types, with newSViv() and sv_setnv() and newAV() and all their friends. They're described in L<perlguts>. @@ -475,11 +518,11 @@ friends. They're described in L<perlguts>. Then you'll need to know how to manipulate the Perl stack. That's described in L<perlcall>. -Once you've understood those, embedding Perl in C is easy. +Once you've understood those, embedding Perl in C is easy. -Since C has no built-in function for integer exponentiation, let's +Because C has no builtin function for integer exponentiation, let's make Perl's ** operator available to it (this is less useful than it -sounds, since Perl implements ** with C's I<pow()> function). First +sounds, because Perl implements ** with C's I<pow()> function). First I'll create a stub exponentiation function in I<power.pl>: sub expo { @@ -492,12 +535,11 @@ I<PerlPower()> that contains all the perlguts necessary to push the two arguments into I<expo()> and to pop the return value out. Take a deep breath... - #include <stdio.h> #include <EXTERN.h> #include <perl.h> - + static PerlInterpreter *my_perl; - + static void PerlPower(int a, int b) { @@ -512,54 +554,428 @@ deep breath... SPAGAIN; /* refresh stack pointer */ /* pop the return value from stack */ printf ("%d to the %dth power is %d.\n", a, b, POPi); - PUTBACK; + PUTBACK; FREETMPS; /* free that return value */ LEAVE; /* ...and the XPUSHed "mortal" args.*/ } - - int main (int argc, char **argv, char **env) + + int main (int argc, char **argv, char **env) { char *my_argv[2]; - + my_perl = perl_alloc(); perl_construct( my_perl ); - + my_argv[1] = (char *) malloc(10); sprintf(my_argv[1], "power.pl"); - - perl_parse(my_perl, NULL, argc, my_argv, env); - + + perl_parse(my_perl, NULL, argc, my_argv, NULL); + perl_run(my_perl); + PerlPower(3, 4); /*** Compute 3 ** 4 ***/ - + perl_destruct(my_perl); perl_free(my_perl); } - + Compile and run: - % cc -o power power.c -L/usr/local/lib/perl5/alpha-dec_osf/CORE - -I/usr/local/lib/perl5/alpha-dec_osf/CORE -lperl -lm - - % power + % cc -o power power.c `perl -MExtUtils::Embed -e ccopts -e ldopts` + + % power 3 to the 4th power is 81. +=head2 Maintaining a persistent interpreter + +When developing interactive and/or potentially long-running +applications, it's a good idea to maintain a persistent interpreter +rather than allocating and constructing a new interpreter multiple +times. The major reason is speed: since Perl will only be loaded into +memory once. + +However, you have to be more cautious with namespace and variable +scoping when using a persistent interpreter. In previous examples +we've been using global variables in the default package C<main>. We +knew exactly what code would be run, and assumed we could avoid +variable collisions and outrageous symbol table growth. + +Let's say your application is a server that will occasionally run Perl +code from some arbitrary file. Your server has no way of knowing what +code it's going to run. Very dangerous. + +If the file is pulled in by C<perl_parse()>, compiled into a newly +constructed interpreter, and subsequently cleaned out with +C<perl_destruct()> afterwards, you're shielded from most namespace +troubles. + +One way to avoid namespace collisions in this scenario is to translate +the filename into a guaranteed-unique package name, and then compile +the code into that package using L<perlfunc/eval>. In the example +below, each file will only be compiled once. Or, the application +might choose to clean out the symbol table associated with the file +after it's no longer needed. Using L<perlcall/perl_call_argv>, We'll +call the subroutine C<Embed::Persistent::eval_file> which lives in the +file C<persistent.pl> and pass the filename and boolean cleanup/cache +flag as arguments. + +Note that the process will continue to grow for each file that it +uses. In addition, there might be C<AUTOLOAD>ed subroutines and other +conditions that cause Perl's symbol table to grow. You might want to +add some logic that keeps track of the process size, or restarts +itself after a certain number of requests, to ensure that memory +consumption is minimized. You'll also want to scope your variables +with L<perlfunc/my> whenever possible. + + + package Embed::Persistent; + #persistent.pl + + use strict; + use vars '%Cache'; + + sub valid_package_name { + my($string) = @_; + $string =~ s/([^A-Za-z0-9\/])/sprintf("_%2x",unpack("C",$1))/eg; + # second pass only for words starting with a digit + $string =~ s|/(\d)|sprintf("/_%2x",unpack("C",$1))|eg; + + # Dress it up as a real package name + $string =~ s|/|::|g; + return "Embed" . $string; + } + + #borrowed from Safe.pm + sub delete_package { + my $pkg = shift; + my ($stem, $leaf); + + no strict 'refs'; + $pkg = "main::$pkg\::"; # expand to full symbol table name + ($stem, $leaf) = $pkg =~ m/(.*::)(\w+::)$/; + + my $stem_symtab = *{$stem}{HASH}; + + delete $stem_symtab->{$leaf}; + } + + sub eval_file { + my($filename, $delete) = @_; + my $package = valid_package_name($filename); + my $mtime = -M $filename; + if(defined $Cache{$package}{mtime} + && + $Cache{$package}{mtime} <= $mtime) + { + # we have compiled this subroutine already, + # it has not been updated on disk, nothing left to do + print STDERR "already compiled $package->handler\n"; + } + else { + local *FH; + open FH, $filename or die "open '$filename' $!"; + local($/) = undef; + my $sub = <FH>; + close FH; + + #wrap the code into a subroutine inside our unique package + my $eval = qq{package $package; sub handler { $sub; }}; + { + # hide our variables within this block + my($filename,$mtime,$package,$sub); + eval $eval; + } + die $@ if $@; + + #cache it unless we're cleaning out each time + $Cache{$package}{mtime} = $mtime unless $delete; + } + + eval {$package->handler;}; + die $@ if $@; + + delete_package($package) if $delete; + + #take a look if you want + #print Devel::Symdump->rnew($package)->as_string, $/; + } + + 1; + + __END__ + + /* persistent.c */ + #include <EXTERN.h> + #include <perl.h> + + /* 1 = clean out filename's symbol table after each request, 0 = don't */ + #ifndef DO_CLEAN + #define DO_CLEAN 0 + #endif + + static PerlInterpreter *perl = NULL; + + int + main(int argc, char **argv, char **env) + { + char *embedding[] = { "", "persistent.pl" }; + char *args[] = { "", DO_CLEAN, NULL }; + char filename [1024]; + int exitstatus = 0; + + if((perl = perl_alloc()) == NULL) { + fprintf(stderr, "no memory!"); + exit(1); + } + perl_construct(perl); + + exitstatus = perl_parse(perl, NULL, 2, embedding, NULL); + + if(!exitstatus) { + exitstatus = perl_run(perl); + + while(printf("Enter file name: ") && gets(filename)) { + + /* call the subroutine, passing it the filename as an argument */ + args[0] = filename; + perl_call_argv("Embed::Persistent::eval_file", + G_DISCARD | G_EVAL, args); + + /* check $@ */ + if(SvTRUE(GvSV(errgv))) + fprintf(stderr, "eval error: %s\n", SvPV(GvSV(errgv),na)); + } + } + + perl_destruct_level = 0; + perl_destruct(perl); + perl_free(perl); + exit(exitstatus); + } + +Now compile: + + % cc -o persistent persistent.c `perl -MExtUtils::Embed -e ccopts -e ldopts` + +Here's a example script file: + + #test.pl + my $string = "hello"; + foo($string); + + sub foo { + print "foo says: @_\n"; + } + +Now run: + + % persistent + Enter file name: test.pl + foo says: hello + Enter file name: test.pl + already compiled Embed::test_2epl->handler + foo says: hello + Enter file name: ^C + +=head2 Maintaining multiple interpreter instances + +Some rare applications will need to create more than one interpreter +during a session. Such an application might sporadically decide to +release any resources associated with the interpreter. + +The program must take care to ensure that this takes place I<before> +the next interpreter is constructed. By default, the global variable +C<perl_destruct_level> is set to C<0>, since extra cleaning isn't +needed when a program has only one interpreter. + +Setting C<perl_destruct_level> to C<1> makes everything squeaky clean: + + perl_destruct_level = 1; + + while(1) { + ... + /* reset global variables here with perl_destruct_level = 1 */ + perl_construct(my_perl); + ... + /* clean and reset _everything_ during perl_destruct */ + perl_destruct(my_perl); + perl_free(my_perl); + ... + /* let's go do it again! */ + } + +When I<perl_destruct()> is called, the interpreter's syntax parse tree +and symbol tables are cleaned up, and global variables are reset. + +Now suppose we have more than one interpreter instance running at the +same time. This is feasible, but only if you used the +C<-DMULTIPLICITY> flag when building Perl. By default, that sets +C<perl_destruct_level> to C<1>. + +Let's give it a try: + + + #include <EXTERN.h> + #include <perl.h> + + /* we're going to embed two interpreters */ + /* we're going to embed two interpreters */ + + #define SAY_HELLO "-e", "print qq(Hi, I'm $^X\n)" + + int main(int argc, char **argv, char **env) + { + PerlInterpreter + *one_perl = perl_alloc(), + *two_perl = perl_alloc(); + char *one_args[] = { "one_perl", SAY_HELLO }; + char *two_args[] = { "two_perl", SAY_HELLO }; + + perl_construct(one_perl); + perl_construct(two_perl); + + perl_parse(one_perl, NULL, 3, one_args, (char **)NULL); + perl_parse(two_perl, NULL, 3, two_args, (char **)NULL); + + perl_run(one_perl); + perl_run(two_perl); + + perl_destruct(one_perl); + perl_destruct(two_perl); + + perl_free(one_perl); + perl_free(two_perl); + } + + +Compile as usual: + + % cc -o multiplicity multiplicity.c `perl -MExtUtils::Embed -e ccopts -e ldopts` + +Run it, Run it: + + % multiplicity + Hi, I'm one_perl + Hi, I'm two_perl + +=head2 Using Perl modules, which themselves use C libraries, from your C program + +If you've played with the examples above and tried to embed a script +that I<use()>s a Perl module (such as I<Socket>) which itself uses a C or C++ library, +this probably happened: + + + Can't load module Socket, dynamic loading not available in this perl. + (You may need to build a new perl executable which either supports + dynamic loading or has the Socket module statically linked into it.) + + +What's wrong? + +Your interpreter doesn't know how to communicate with these extensions +on its own. A little glue will help. Up until now you've been +calling I<perl_parse()>, handing it NULL for the second argument: + + perl_parse(my_perl, NULL, argc, my_argv, NULL); + +That's where the glue code can be inserted to create the initial contact between +Perl and linked C/C++ routines. Let's take a look some pieces of I<perlmain.c> +to see how Perl does this: + + + #ifdef __cplusplus + # define EXTERN_C extern "C" + #else + # define EXTERN_C extern + #endif + + static void xs_init _((void)); + + EXTERN_C void boot_DynaLoader _((CV* cv)); + EXTERN_C void boot_Socket _((CV* cv)); + + + EXTERN_C void + xs_init() + { + char *file = __FILE__; + /* DynaLoader is a special case */ + newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file); + newXS("Socket::bootstrap", boot_Socket, file); + } + +Simply put: for each extension linked with your Perl executable +(determined during its initial configuration on your +computer or when adding a new extension), +a Perl subroutine is created to incorporate the extension's +routines. Normally, that subroutine is named +I<Module::bootstrap()> and is invoked when you say I<use Module>. In +turn, this hooks into an XSUB, I<boot_Module>, which creates a Perl +counterpart for each of the extension's XSUBs. Don't worry about this +part; leave that to the I<xsubpp> and extension authors. If your +extension is dynamically loaded, DynaLoader creates I<Module::bootstrap()> +for you on the fly. In fact, if you have a working DynaLoader then there +is rarely any need to link in any other extensions statically. + + +Once you have this code, slap it into the second argument of I<perl_parse()>: + + + perl_parse(my_perl, xs_init, argc, my_argv, NULL); + + +Then compile: + + % cc -o interp interp.c `perl -MExtUtils::Embed -e ccopts -e ldopts` + + % interp + use Socket; + use SomeDynamicallyLoadedModule; + + print "Now I can use extensions!\n"' + +B<ExtUtils::Embed> can also automate writing the I<xs_init> glue code. + + % perl -MExtUtils::Embed -e xsinit -- -o perlxsi.c + % cc -c perlxsi.c `perl -MExtUtils::Embed -e ccopts` + % cc -c interp.c `perl -MExtUtils::Embed -e ccopts` + % cc -o interp perlxsi.o interp.o `perl -MExtUtils::Embed -e ldopts` + +Consult L<perlxs> and L<perlguts> for more details. + + =head1 MORAL You can sometimes I<write faster code> in C, but -you can always I<write code faster> in Perl. Since you can use +you can always I<write code faster> in Perl. Because you can use each from the other, combine them as you wish. =head1 AUTHOR -Jon Orwant F<E<lt>orwant@media.mit.eduE<gt>>, with contributions from -Tim Bunce, Tom Christiansen, Dov Grobgeld, and Ilya Zakharevich. +Jon Orwant and <F<orwant@tpj.com>> and Doug MacEachern <F<dougm@osf.org>>, +with small contributions from Tim Bunce, Tom Christiansen, Hallvard Furuseth, +Dov Grobgeld, and Ilya Zakharevich. + +Check out Doug's article on embedding in Volume 1, Issue 4 of The Perl +Journal. Info about TPJ is available from http://tpj.com. -December 18, 1995 +April 14, 1997 -Some of this material is excerpted from my book: I<Perl 5 Interactive>, -Waite Group Press, 1996 (ISBN 1-57169-064-6) and appears +Some of this material is excerpted from Jon Orwant's book: I<Perl 5 +Interactive>, Waite Group Press, 1996 (ISBN 1-57169-064-6) and appears courtesy of Waite Group Press. +=head1 COPYRIGHT + +Copyright (C) 1995, 1996, 1997 Doug MacEachern and Jon Orwant. All +Rights Reserved. + +Although destined for release with the standard Perl distribution, +this document is not public domain, nor is any of Perl and its +documentation. Permission is granted to freely distribute verbatim +copies of this document provided that no modifications outside of +formatting be made, and that this notice remain intact. You are +permitted and encouraged to use its code and derivatives thereof in +your own source code for fun or for profit as you see fit. diff --git a/pod/perlfaq.pod b/pod/perlfaq.pod new file mode 100644 index 0000000000..2213a0f2f0 --- /dev/null +++ b/pod/perlfaq.pod @@ -0,0 +1,174 @@ +=head1 NAME + +perlfaq - frequently asked questions about Perl ($Date: 1997/04/24 22:46:06 $) + +=head1 DESCRIPTION + +This document is structured into the following sections: + +=over + +=item perlfaq: Structural overview of the FAQ. + +This document. + +=item L<perlfaq1>: General Questions About Perl + +Very general, high-level information about Perl. + +=item L<perlfaq2>: Obtaining and Learning about Perl + +Where to find source and documentation to Perl, support and training, +and related matters. + +=item L<perlfaq3>: Programming Tools + +Programmer tools and programming support. + +=item L<perlfaq4>: Data Manipulation + +Manipulating numbers, dates, strings, arrays, hashes, and +miscellaneous data issues. + +=item L<perlfaq5>: Files and Formats + +I/O and the "f" issues: filehandles, flushing, formats and footers. + +=item L<perlfaq6>: Regexps + +Pattern matching and regular expressions. + +=item L<perlfaq7>: General Perl Language Issues + +General Perl language issues that don't clearly fit into any of the +other sections. + +=item L<perlfaq8>: System Interaction + +Interprocess communication (IPC), control over the user-interface +(keyboard, screen and pointing devices). + +=item L<perlfaq9>: Networking + +Networking, the Internet, and a few on the web. + +=back + +=head2 Where to get this document + +This document is posted regularly to comp.lang.perl.announce and +several other related newsgroups. It is available in a variety of +formats from CPAN in the /CPAN/doc/FAQs/FAQ/ directory, or on the web +at http://www.perl.com/perl/faq/ . + +=head2 How to contribute to this document + +You may mail corrections, additions, and suggestions to +perlfaq-suggestions@perl.com . Mail sent to the old perlfaq alias +will merely cause the FAQ to be sent to you. + +=head2 What will happen if you mail your Perl programming problems to the authors + +Your questions will probably go unread, unless they're suggestions of +new questions to add to the FAQ, in which case they should have gone +to the perlfaq-suggestions@perl.com instead. + +You should have read section 2 of this faq. There you would have +learned that comp.lang.perl.misc is the appropriate place to go for +free advice. If your question is really important and you require a +prompt and correct answer, you should hire a consultant. + +=head1 Credits + +When I first began the Perl FAQ in the late 80s, I never realized it +would have grown to over a hundred pages, nor that Perl would ever become +so popular and widespread. This document could not have been written +without the tremendous help provided by Larry Wall and the rest of the +Perl Porters. + +=head1 Author and Copyright Information + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. + +=head2 Noncommercial Reproduction + +Permission is granted to distribute this document, in part or in full, +via electronic means or printed copy providing that (1) that all credits +and copyright notices be retained, (2) that no charges beyond reproduction +be involved, and (3) that a reasonable attempt be made to use the most +current version available. + +Furthermore, you may include this document in any distribution of the +full Perl source or binaries, in its verbatim documentation, or on a +complete dump of the CPAN archive, providing that the three stipulations +given above continue to be met. + +=head2 Commercial Reproduction + +Requests for all other distribution rights, including the incorporation +in part or in full of this text or its code into commercial products +such as but not limited to books, magazine articles, or CD-ROMs, must +be made to perlfaq-legal@perl.com. Any commercial use of any portion +of this document without prior written authorization by its authors +will be subject to appropriate action. + +=head2 Disclaimer + +This information is offered in good faith and in the hope that it may +be of use, but is not guaranteed to be correct, up to date, or suitable +for any particular purpose whatsoever. The authors accept no liability +in respect of this information or its use. + +=head1 Changes + +=over 4 + +=item 24/April/97 + +Style and whitespace changes from Chip, new question on reading one +character at a time from a terminal using POSIX from Tom. + +=item 23/April/97 + +Added http://www.oasis.leo.org/perl/ to L<perlfaq2>. Style fix to +L<perlfaq3>. Added floating point precision, fixed complex number +arithmetic, cross-references, caveat for Text::Wrap, alternative +answer for initial capitalizing, fixed incorrect regexp, added example +of Tie::IxHash to L<perlfaq4>. Added example of passing and storing +filehandles, added commify to L<perlfaq5>. Restored variable suicide, +and added mass commenting to L<perlfaq7>. Added Net::Telnet, fixed +backticks, added reader/writer pair to telnet question, added FindBin, +grouped module questions together in L<perlfaq8>. Expanded caveats +for the simple URL extractor, gave LWP example, added CGI security +question, expanded on the email address answer in L<perlfaq9>. + +=item 25/March/97 + +Added more info to the binary distribution section of L<perlfaq2>. +Added Net::Telnet to L<perlfaq6>. Fixed typos in L<perlfaq8>. Added +mail sending example to L<perlfaq9>. Added Merlyn's columns to +L<perlfaq2>. + +=item 18/March/97 + +Added the DATE to the NAME section, indicating which sections have +changed. + +Mentioned SIGPIPE and L<perlipc> in the forking open answer in +L<perlfaq8>. + +Fixed description of a regular expression in L<perlfaq4>. + +=item 17/March/97 Version + +Various typos fixed throughout. + +Added new question on Perl BNF on L<perlfaq7>. + +=item Initial Release: 11/March/97 + +This is the initial release of version 3 of the FAQ; consequently there +have been no changes since its initial release. + +=back diff --git a/pod/perlfaq1.pod b/pod/perlfaq1.pod new file mode 100644 index 0000000000..a9a5fd4858 --- /dev/null +++ b/pod/perlfaq1.pod @@ -0,0 +1,249 @@ +=head1 NAME + +perlfaq1 - General Questions About Perl ($Revision: 1.12 $, $Date: 1997/04/24 22:43:34 $) + +=head1 DESCRIPTION + +This section of the FAQ answers very general, high-level questions +about Perl. + +=head2 What is Perl? + +Perl is a high-level programming language with an eclectic heritage +written by Larry Wall and a cast of thousands. It derives from the +ubiquitous C programming language and to a lesser extent from sed, +awk, the Unix shell, and at least a dozen other tools and languages. +Perl's process, file, and text manipulation facilities make it +particularly well-suited for tasks involving quick prototyping, system +utilities, software tools, system management tasks, database access, +graphical programming, networking, and world wide web programming. +These strengths make it especially popular with system administrators +and CGI script authors, but mathematicians, geneticists, journalists, +and even managers also use Perl. Maybe you should, too. + +=head2 Who supports Perl? Who develops it? Why is it free? + +The original culture of the pre-populist Internet and the deeply-held +beliefs of Perl's author, Larry Wall, gave rise to the free and open +distribution policy of perl. Perl is supported by its users. The +core, the standard Perl library, the optional modules, and the +documentation you're reading now were all written by volunteers. See +the personal note at the end of the README file in the perl source +distribution for more details. + +In particular, the core development team (known as the Perl +Porters) are a rag-tag band of highly altruistic individuals +committed to producing better software for free than you +could hope to purchase for money. You may snoop on pending +developments via news://genetics.upenn.edu/perl.porters-gw/ and +http://www.frii.com/~gnat/perl/porters/summary.html. + +While the GNU project includes Perl in its distributions, there's no +such thing as "GNU Perl". Perl is not produced nor maintained by the +Free Software Foundation. Perl's licensing terms are also more open +than GNU software's tend to be. + +You can get commercial support of Perl if you wish, although for most +users the informal support will more than suffice. See the answer to +"Where can I buy a commercial version of perl?" for more information. + +=head2 Which version of Perl should I use? + +You should definitely use version 5. Version 4 is old, limited, and +no longer maintained; its last patch (4.036) was in 1992. The most +recent production release is 5.004. Further references to the Perl +language in this document refer to this production release unless +otherwise specified. There may be one or more official bug fixes for +5.004 by the time you read this, and also perhaps some experimental +versions on the way to the next release. + +=head2 What are perl4 and perl5? + +Perl4 and perl5 are informal names for different versions of the Perl +programming language. It's easier to say "perl5" than it is to say +"the 5(.004) release of Perl", but some people have interpreted this +to mean there's a language called "perl5", which isn't the case. +Perl5 is merely the popular name for the fifth major release (October 1994), +while perl4 was the fourth major release (March 1991). There was also a +perl1 (in January 1988), a perl2 (June 1988), and a perl3 (October 1989). + +The 5.0 release is, essentially, a complete rewrite of the perl source +code from the ground up. It has been modularized, object-oriented, +tweaked, trimmed, and optimized until it almost doesn't look like the +old code. However, the interface is mostly the same, and compatibility +with previous releases is very high. + +To avoid the "what language is perl5?" confusion, some people prefer to +simply use "perl" to refer to the latest version of perl and avoid using +"perl5" altogether. It's not really that big a deal, though. + +=head2 How stable is Perl? + +Production releases, which incorporate bug fixes and new functionality, +are widely tested before release. Since the 5.000 release, we have +averaged only about one production release per year. + +Larry and the Perl development team occasionally make changes to the +internal core of the language, but all possible efforts are made toward +backward compatibility. While not quite all perl4 scripts run flawlessly +under perl5, an update to perl should nearly never invalidate a program +written for an earlier version of perl (barring accidental bug fixes +and the rare new keyword). + +=head2 Is Perl difficult to learn? + +Perl is easy to start learning -- and easy to keep learning. It looks +like most programming languages you're likely to have had experience +with, so if you've ever written an C program, an awk script, a shell +script, or even an Excel macro, you're already part way there. + +Most tasks only require a small subset of the Perl language. One of +the guiding mottos for Perl development is "there's more than one way +to do it" (TMTOWTDI, sometimes pronounced "tim toady"). Perl's +learning curve is therefore shallow (easy to learn) and long (there's +a whole lot you can do if you really want). + +Finally, Perl is (frequently) an interpreted language. This means +that you can write your programs and test them without an intermediate +compilation step, allowing you to experiment and test/debug quickly +and easily. This ease of experimentation flattens the learning curve +even more. + +Things that make Perl easier to learn: Unix experience, almost any kind +of programming experience, an understanding of regular expressions, and +the ability to understand other people's code. If there's something you +need to do, then it's probably already been done, and a working example is +usually available for free. Don't forget the new perl modules, either. +They're discussed in Part 3 of this FAQ, along with the CPAN, which is +discussed in Part 2. + +=head2 How does Perl compare with other languages like Java, Python, REXX, Scheme, or Tcl? + +Favorably in some areas, unfavorably in others. Precisely which areas +are good and bad is often a personal choice, so asking this question +on Usenet runs a strong risk of starting an unproductive Holy War. + +Probably the best thing to do is try to write equivalent code to do a +set of tasks. These languages have their own newsgroups in which you +can learn about (but hopefully not argue about) them. + +=head2 Can I do [task] in Perl? + +Perl is flexible and extensible enough for you to use on almost any +task, from one-line file-processing tasks to complex systems. For +many people, Perl serves as a great replacement for shell scripting. +For others, it serves as a convenient, high-level replacement for most +of what they'd program in low-level languages like C or C++. It's +ultimately up to you (and possibly your management ...) which tasks +you'll use Perl for and which you won't. + +If you have a library that provides an API, you can make any component +of it available as just another Perl function or variable using a Perl +extension written in C or C++ and dynamically linked into your main +perl interpreter. You can also go the other direction, and write your +main program in C or C++, and then link in some Perl code on the fly, +to create a powerful application. + +That said, there will always be small, focused, special-purpose +languages dedicated to a specific problem domain that are simply more +convenient for certain kinds of problems. Perl tries to be all things +to all people, but nothing special to anyone. Examples of specialized +languages that come to mind include prolog and matlab. + +=head2 When shouldn't I program in Perl? + +When your manager forbids it -- but do consider replacing them :-). + +Actually, one good reason is when you already have an existing +application written in another language that's all done (and done +well), or you have an application language specifically designed for a +certain task (e.g. prolog, make). + +For various reasons, Perl is probably not well-suited for real-time +embedded systems, low-level operating systems development work like +device drivers or context-switching code, complex multithreaded +shared-memory applications, or extremely large applications. You'll +notice that perl is not itself written in Perl. + +The new native-code compiler for Perl may reduce the limitations given +in the previous statement to some degree, but understand that Perl +remains fundamentally a dynamically typed language, and not a +statically typed one. You certainly won't be chastized if you don't +trust nuclear-plant or brain-surgery monitoring code to it. And +Larry will sleep easier, too -- Wall Street programs not +withstanding. :-) + +=head2 What's the difference between "perl" and "Perl"? + +One bit. Oh, you weren't talking ASCII? :-) Larry now uses "Perl" to +signify the language proper and "perl" the implementation of it, +i.e. the current interpreter. Hence Tom's quip that "Nothing but perl +can parse Perl." You may or may not choose to follow this usage. For +example, parallelism means "awk and perl" and "Python and Perl" look +ok, while "awk and Perl" and "Python and perl" do not. + +=head2 Is it a Perl program or a Perl script? + +It doesn't matter. + +In "standard terminology" a I<program> has been compiled to physical +machine code once, and can then be be run multiple times, whereas a +I<script> must be translated by a program each time it's used. Perl +programs, however, are usually neither strictly compiled nor strictly +interpreted. They can be compiled to a byte code form (something of a +Perl virtual machine) or to completely different languages, like C or +assembly language. You can't tell just by looking whether the source +is destined for a pure interpreter, a parse-tree interpreter, a byte +code interpreter, or a native-code compiler, so it's hard to give a +definitive answer here. + +=head2 What is a JAPH? + +These are the "just another perl hacker" signatures that some people +sign their postings with. About 100 of the of the earlier ones are +available from http://www.perl.com/CPAN/misc/japh . + +=head2 Where can I get a list of Larry Wall witticisms? + +Over a hundred quips by Larry, from postings of his or source code, +can be found at http://www.perl.com/CPAN/misc/lwall-quotes . + +=head2 How can I convince my sysadmin/supervisor/employees to use version (5/5.004/Perl instead of some other language)? + +If your manager or employees are wary of unsupported software, or +software which doesn't officially ship with your Operating System, you +might try to appeal to their self-interest. If programmers can be +more productive using and utilizing Perl constructs, functionality, +simplicity, and power, then the typical manager/supervisor/employee +may be persuaded. Regarding using Perl in general, it's also +sometimes helpful to point out that delivery times may be reduced +using Perl, as compared to other languages. + +If you have a project which has a bottleneck, especially in terms of +translation, or testing, Perl almost certainly will provide a viable, +and quick solution. In conjunction with any persuasion effort, you +should not fail to point out that Perl is used, quite extensively, and +with extremely reliable and valuable results, at many large computer +software and/or hardware companies throughout the world. In fact, +many Unix vendors now ship Perl by default, and support is usually +just a news-posting away, if you can't find the answer in the +I<comprehensive> documentation, including this FAQ. + +If you face reluctance to upgrading from an older version of perl, +then point out that version 4 is utterly unmaintained and unsupported +by the Perl Development Team. Another big sell for Perl5 is the large +number of modules and extensions which greatly reduce development time +for any given task. Also mention that the difference between version +4 and version 5 of Perl is like the difference between awk and C++. +(Well, ok, maybe not quite that distinct, but you get the idea.) If +you want support and a reasonable guarantee that what you're +developing will continue to work in the future, then you have to run +the supported version. That probably means running the 5.004 release, +although 5.003 isn't that bad (it's just one year and one release +behind). Several important bugs were fixed from the 5.000 through +5.002 versions, though, so try upgrading past them if possible. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. diff --git a/pod/perlfaq2.pod b/pod/perlfaq2.pod new file mode 100644 index 0000000000..8a954da64e --- /dev/null +++ b/pod/perlfaq2.pod @@ -0,0 +1,443 @@ +=head1 NAME + +perlfaq2 - Obtaining and Learning about Perl ($Revision: 1.16 $, $Date: 1997/04/23 18:04:09 $) + +=head1 DESCRIPTION + +This section of the FAQ answers questions about where to find +source and documentation for Perl, support and training, and +related matters. + +=head2 What machines support Perl? Where do I get it? + +The standard release of Perl (the one maintained by the perl +development team) is distributed only in source code form. You can +find this at http://www.perl.com/CPAN/src/latest.tar.gz, which is a +gzipped archive in POSIX tar format. This source builds with no +porting whatsoever on most Unix systems (Perl's native environment), +as well as Plan 9, VMS, QNX, OS/2, and the Amiga. + +Although it's rumored that the (imminent) 5.004 release may build +on Windows NT, this is yet to be proven. Binary distributions +for 32-bit Microsoft systems and for Apple systems can be found +http://www.perl.com/CPAN/ports/ directory. Because these are not part of +the standard distribution, they may and in fact do differ from the base +Perl port in a variety of ways. You'll have to check their respective +release notes to see just what the differences are. These differences +can be either positive (e.g. extensions for the features of the particular +platform that are not supported in the source release of perl) or negative +(e.g. might be based upon a less current source release of perl). + +A useful FAQ for Win32 Perl users is +http://www.endcontsw.com/people/evangelo/Perl_for_Win32_FAQ.html + +=head2 How can I get a binary version of Perl? + +If you don't have a C compiler because for whatever reasons your +vendor did not include one with your system, the best thing to do is +grab a binary version of gcc from the net and use that to compile perl +with. CPAN only has binaries for systems that are terribly hard to +get free compilers for, not for Unix systems. + +Your first stop should be http://www.perl.com/CPAN/ports to see what +information is already available. A simple installation guide for +MS-DOS is available at http://www.cs.ruu.nl/~piet/perl5dos.html , and +similarly for Windows 3.1 at http://www.cs.ruu.nl/~piet/perlwin3.html +. + +=head2 I don't have a C compiler on my system. How can I compile perl? + +Since you don't have a C compiler, you're doomed and your vendor +should be sacrificed to the Sun gods. But that doesn't help you. + +What you need to do is get a binary version of gcc for your system +first. Consult the Usenet FAQs for your operating system for +information on where to get such a binary version. + +=head2 I copied the Perl binary from one machine to another, but scripts don't work. + +That's probably because you forgot libraries, or library paths differ. +You really should build the whole distribution on the machine it will +eventually live on, and then type C<make install>. Most other +approaches are doomed to failure. + +One simple way to check that things are in the right place is to print out +the hard-coded @INC which perl is looking for. + + perl -e 'print join("\n",@INC)' + +If this command lists any paths which don't exist on your system, then you +may need to move the appropriate libraries to these locations, or create +symlinks, aliases, or shortcuts appropriately. + +You might also want to check out L<perlfaq8/"How do I keep my own +module/library directory?">. + +=head2 I grabbed the sources and tried to compile but gdbm/dynamic loading/malloc/linking/... failed. How do I make it work? + +Read the F<INSTALL> file, which is part of the source distribution. +It describes in detail how to cope with most idiosyncracies that the +Configure script can't work around for any given system or +architecture. + +=head2 What modules and extensions are available for Perl? What is CPAN? What does CPAN/src/... mean? + +CPAN stands for Comprehensive Perl Archive Network, a huge archive +replicated on dozens of machines all over the world. CPAN contains +source code, non-native ports, documentation, scripts, and many +third-party modules and extensions, designed for everything from +commercial database interfaces to keyboard/screen control to web +walking and CGI scripts. The master machine for CPAN is +ftp://ftp.funet.fi/pub/languages/perl/CPAN/, but you can use the +address http://www.perl.com/CPAN/CPAN.html to fetch a copy from a +"site near you". See http://www.perl.com/CPAN (without a slash at the +end) for how this process works. + +CPAN/path/... is a naming convention for files available on CPAN +sites. CPAN indicates the base directory of a CPAN mirror, and the +rest of the path is the path from that directory to the file. For +instance, if you're using ftp://ftp.funet.fi/pub/languages/perl/CPAN +as your CPAN site, the file CPAN/misc/japh file is downloadable as +ftp://ftp.funet.fi/pub/languages/perl/CPAN/misc/japh . + +Considering that there are hundreds of existing modules in the +archive, one probably exists to do nearly anything you can think of. +Current categories under CPAN/modules/by-category/ include perl core +modules; development support; operating system interfaces; networking, +devices, and interprocess communication; data type utilities; database +interfaces; user interfaces; interfaces to other languages; filenames, +file systems, and file locking; internationalization and locale; world +wide web support; server and daemon utilities; archiving and +compression; image manipulation; mail and news; control flow +utilities; filehandle and I/O; Microsoft Windows modules; and +miscellaneous modules. + +=head2 Is there an ISO or ANSI certified version of Perl? + +Certainly not. Larry expects that he'll be certified before Perl is. + +=head2 Where can I get information on Perl? + +The complete Perl documentation is available with the perl +distribution. If you have perl installed locally, you probably have +the documentation installed as well: type C<man perl> if you're on a +system resembling Unix. This will lead you to other important man +pages. If you're not on a Unix system, access to the documentation +will be different; for example, it might be only in HTML format. But +all proper perl installations have fully-accessible documentation. + +You might also try C<perldoc perl> in case your system doesn't +have a proper man command, or it's been misinstalled. If that doesn't +work, try looking in /usr/local/lib/perl5/pod for documentation. + +If all else fails, consult the CPAN/doc directory, which contains the +complete documentation in various formats, including native pod, +troff, html, and plain text. There's also a web page at +http://www.perl.com/perl/info/documentation.html that might help. + +It's also worth noting that there's a PDF version of the complete +documentation for perl available in the CPAN/authors/id/BMIDD +directory. + +Many good books have been written about Perl -- see the section below +for more details. + +=head2 What are the Perl newsgroups on USENET? Where do I post questions? + +The now defunct comp.lang.perl newsgroup has been superseded by the +following groups: + + comp.lang.perl.announce Moderated announcement group + comp.lang.perl.misc Very busy group about Perl in general + comp.lang.perl.modules Use and development of Perl modules + comp.lang.perl.tk Using Tk (and X) from Perl + + comp.infosystems.www.authoring.cgi Writing CGI scripts for the Web. + +There is also USENET gateway to the mailing list used by the crack +Perl development team (perl5-porters) at +news://genetics.upenn.edu/perl.porters-gw/ . + +=head2 Where should I post source code? + +You should post source code to whichever group is most appropriate, +but feel free to cross-post to comp.lang.perl.misc. If you want to +cross-post to alt.sources, please make sure it follows their posting +standards, including setting the Followup-To header line to NOT +include alt.sources; see their FAQ for details. + +=head2 Perl Books + +A number books on Perl and/or CGI programming are available. A few of +these are good, some are ok, but many aren't worth your money. Tom +Christiansen maintains a list of these books, some with extensive +reviews, at http://www.perl.com/perl/critiques/index.html. + +The incontestably definitive reference book on Perl, written by the +creator of Perl and his apostles, is now in its second edition and +fourth printing. + + Programming Perl (the "Camel Book"): + Authors: Larry Wall, Tom Christiansen, and Randal Schwartz + ISBN 1-56592-149-6 (English) + ISBN 4-89052-384-7 (Japanese) + (French and German translations in progress) + +Note that O'Reilly books are color-coded: turquoise (some would call +it teal) covers indicate perl5 coverage, while magenta (some would +call it pink) covers indicate perl4 only. Check the cover color +before you buy! + +What follows is a list of the books that the FAQ authors found personally +useful. Your mileage may (but, we hope, probably won't) vary. + +If you're already a hard-core systems programmer, then the Camel Book +just might suffice for you to learn Perl from. But if you're not, +check out the "Llama Book". It currently doesn't cover perl5, but the +2nd edition is nearly done and should be out by summer 97: + + Learning Perl (the Llama Book): + Author: Randal Schwartz, with intro by Larry Wall + ISBN 1-56592-042-2 (English) + ISBN 4-89502-678-1 (Japanese) + ISBN 2-84177-005-2 (French) + ISBN 3-930673-08-8 (German) + +Another stand-out book in the turquoise O'Reilly Perl line is the "Hip +Owls" book. It covers regular expressions inside and out, with quite a +bit devoted exclusively to Perl: + + Mastering Regular Expressions (the Cute Owls Book): + Author: Jeffrey Friedl + ISBN 1-56592-257-3 + +You can order any of these books from O'Reilly & Associates, +1-800-998-9938. Local/overseas is 1-707-829-0515. If you can locate +an O'Reilly order form, you can also fax to 1-707-829-0104. See +http://www.ora.com/ on the Web. + +Recommended Perl books that are not from O'Reilly are the following: + + Cross-Platform Perl, (for Unix and Windows NT) + Author: Eric F. Johnson + ISBN: 1-55851-483-X + + How to Set up and Maintain a World Wide Web Site, (2nd edition) + Author: Lincoln Stein, M.D., Ph.D. + ISBN: 0-201-63462-7 + + CGI Programming in C & Perl, + Author: Thomas Boutell + ISBN: 0-201-42219-0 + +Note that some of these address specific application areas (e.g. the +Web) and are not general-purpose programming books. + +=head2 Perl in Magazines + +The Perl Journal is the first and only magazine dedicated to Perl. +It is published (on paper, not online) quarterly by Jon Orwant +(orwant@tpj.com), editor. Subscription information is at http://tpj.com +or via email to subscriptions@tpj.com. + +Beyond this, two other magazines that frequently carry high-quality +articles on Perl are Web Techniques (see +http://www.webtechniques.com/) and Unix Review +(http://www.unixreview.com/). Randal Schwartz's Web Technique's +columns are available on the web at +http://www.stonehenge.com/merlyn/WebTechniques/ . + +=head2 Perl on the Net: FTP and WWW Access + +To get the best (and possibly cheapest) performance, pick a site from +the list below and use it to grab the complete list of mirror sites. +From there you can find the quickest site for you. Remember, the +following list is I<not> the complete list of CPAN mirrors. + + http://www.perl.com/CPAN (redirects to another mirror) + http://www.perl.org/CPAN + ftp://ftp.funet.fi/pub/languages/perl/CPAN/ + http://www.cs.ruu.nl/pub/PERL/CPAN/ + ftp://ftp.cs.colorado.edu/pub/perl/CPAN/ + +http:/www.oasis.leo.org/perl/ has, amongst other things, source to +versions 1 through 5 of Perl. + +=head2 What mailing lists are there for perl? + +Most of the major modules (tk, CGI, libwww-perl) have their own +mailing lists. Consult the documentation that came with the module for +subscription information. The following are a list of mailing lists +related to perl itself. + +If you subscribe to a mailing list, it behooves you to know how to +unsubscribe from it. Strident pleas to the list itself to get you off +will not be favorably received. + +=over 4 + +=item MacPerl + +There is a mailing list for discussing Macintosh Perl. Contact +"mac-perl-request@iis.ee.ethz.ch". + +Also see Matthias Neeracher's (the creator and maintainer of MacPerl) +webpage at http://www.iis.ee.ethz.ch/~neeri/macintosh/perl.html for +many links to interesting MacPerl sites, and the applications/MPW +tools, precompiled. + +=item Perl5-Porters + +The core development team have a mailing list for discussing fixes and +changes to the language. Send mail to +"perl5-porters-request@perl.org" with help in the body of the message +for information on subscribing. + +=item NTPerl + +This list is used to discuss issues involving Win32 Perl 5 (Windows NT +and Win95). Subscribe by emailing ListManager@ActiveWare.com with the +message body: + + subscribe Perl-Win32-Users + +The list software, also written in perl, will automatically determine +your address, and subscribe you automatically. To unsubscribe, email +the following in the message body to the same address like so: + + unsubscribe Perl-Win32-Users + +You can also check http://www.activeware.com/ and select "Mailing Lists" +to join or leave this list. + +=item Perl-Packrats + +Discussion related to archiving of perl materials, particularly the +Comprehensive PerlArchive Network (CPAN). Subscribe by emailing +majordomo@cis.ufl.edu: + + subscribe perl-packrats + +The list software, also written in perl, will automatically determine +your address, and subscribe you automatically. To unsubscribe, simple +prepend the same command with an "un", and mail to the same address +like so: + + unsubscribe perl-packrats + +=back + +=head2 Archives of comp.lang.perl.misc + +Have you tried Deja News or Alta Vista? + +ftp.cis.ufl.edu:/pub/perl/comp.lang.perl.*/monthly has an almost +complete collection dating back to 12/89 (missing 08/91 through +12/93). They are kept as one large file for each month. + +You'll probably want more a sophisticated query and retrieval mechanism +than a file listing, preferably one that allows you to retrieve +articles using a fast-access indices, keyed on at least author, date, +subject, thread (as in "trn") and probably keywords. The best +solution the FAQ authors know of is the MH pick command, but it is +very slow to select on 18000 articles. + +If you have, or know where can be found, the missing sections, please +let perlfaq-suggestions@perl.com know. + +=head2 Perl Training + +While some large training companies offer their own courses on Perl, +you may prefer to contact individuals near and dear to the heart of +Perl development. Two well-known members of the Perl development team +who offer such things are Tom Christiansen <perl-classes@perl.com> +and Randal Schwartz <perl-training-info@stonehenge.com>, plus their +respective minions, who offer a variety of professional tutorials +and seminars on Perl. These courses include large public seminars, +private corporate training, and fly-ins to Colorado and Oregon. +See http://www.perl.com/perl/info/training.html for more details. + +=head2 Where can I buy a commercial version of Perl? + +In a sense, Perl already I<is> commercial software: It has a licence +that you can grab and carefully read to your manager. It is +distributed in releases and comes in well-defined packages. There is a +very large user community and an extensive literature. The +comp.lang.perl.* newsgroups and several of the mailing lists provide +free answers to your questions in near real-time. Perl has +traditionally been supported by Larry, dozens of software designers +and developers, and thousands of programmers, all working for free +to create a useful thing to make life better for everyone. + +However, these answers may not suffice for managers who require a +purchase order from a company whom they can sue should anything go +wrong. Or maybe they need very serious hand-holding and contractual +obligations. Shrink-wrapped CDs with perl on them are available from +several sources if that will help. + +Or you can purchase a real support contract. Although Cygnus historically +provided this service, they no longer sell support contracts for Perl. +Instead, the Paul Ingram Group will be taking up the slack through The +Perl Clinic. The following is a commercial from them: + +"Do you need professional support for Perl and/or Oraperl? Do you need +a support contract with defined levels of service? Do you want to pay +only for what you need? + +"The Paul Ingram Group has provided quality software development and +support services to some of the world's largest corporations for ten +years. We are now offering the same quality support services for Perl +at The Perl Clinic. This service is led by Tim Bunce, an active perl +porter since 1994 and well known as the author and maintainer of the +DBI, DBD::Oracle, and Oraperl modules and author/co-maintainer of The +Perl 5 Module List. We also offer Oracle users support for Perl5 +Oraperl and related modules (which Oracle is planning to ship as part +of Oracle Web Server 3). 20% of the profit from our Perl support work +will be donated to The Perl Institute." + +For more information, contact the The Perl Clinic: + + Tel: +44 1483 424424 + Fax: +44 1483 419419 + Web: http://www.perl.co.uk/ + Email: perl-support-info@perl.co.uk or Tim.Bunce@ig.co.uk + +=head2 Where do I send bug reports? + +If you are reporting a bug in the perl interpreter or the modules +shipped with perl, use the perlbug program in the perl distribution or +email your report to perlbug@perl.com. + +If you are posting a bug with a non-standard port (see the answer to +"What platforms is Perl available for?"), a binary distribution, or a +non-standard module (such as Tk, CGI, etc), then please see the +documentation that came with it to determine the correct place to post +bugs. + +Read the perlbug man page (perl5.004 or later) for more information. + +=head2 What is perl.com? perl.org? The Perl Institute? + +perl.org is the official vehicle for The Perl Institute. The motto of +TPI is "helping people help Perl help people" (or something like +that). It's a non-profit organization supporting development, +documentation, and dissemination of perl. Current directors of TPI +include Larry Wall, Tom Christiansen, and Randal Schwartz, whom you +may have heard of somewhere else around here. + +The perl.com domain is Tom Christiansen's domain. He created it as a +public service long before perl.org came about. It's the original PBS +of the Perl world, a clearinghouse for information about all things +Perlian, accepting no paid advertisements, glossy gifs, or (gasp!) +java applets on its pages. + +=head2 How do I learn about object-oriented Perl programming? + +L<perltoot> (distributed with 5.004 or later) is a good place to start. +Also, L<perlobj>, L<perlref>, and L<perlmod> are useful references, +while L<perlbot> has some excellent tips and tricks. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. diff --git a/pod/perlfaq3.pod b/pod/perlfaq3.pod new file mode 100644 index 0000000000..65ebafdea5 --- /dev/null +++ b/pod/perlfaq3.pod @@ -0,0 +1,504 @@ +=head1 NAME + +perlfaq3 - Programming Tools ($Revision: 1.22 $, $Date: 1997/04/24 22:43:42 $) + +=head1 DESCRIPTION + +This section of the FAQ answers questions related to programmer tools +and programming support. + +=head2 How do I do (anything)? + +Have you looked at CPAN (see L<perlfaq2>)? The chances are that +someone has already written a module that can solve your problem. +Have you read the appropriate man pages? Here's a brief index: + + Objects perlref, perlmod, perlobj, perltie + Data Structures perlref, perllol, perldsc + Modules perlmod, perlmodlib, perlsub + Regexps perlre, perlfunc, perlop + Moving to perl5 perltrap, perl + Linking w/C perlxstut, perlxs, perlcall, perlguts, perlembed + Various http://www.perl.com/CPAN/doc/FMTEYEWTK/index.html + (not a man-page but still useful) + +L<perltoc> provides a crude table of contents for the perl man page set. + +=head2 How can I use Perl interactively? + +The typical approach uses the Perl debugger, described in the +perldebug(1) man page, on an "empty" program, like this: + + perl -de 42 + +Now just type in any legal Perl code, and it will be immediately +evaluated. You can also examine the symbol table, get stack +backtraces, check variable values, set breakpoints, and other +operations typically found in symbolic debuggers + +=head2 Is there a Perl shell? + +In general, no. The Shell.pm module (distributed with perl) makes +perl try commands which aren't part of the Perl language as shell +commands. perlsh from the source distribution is simplistic and +uninteresting, but may still be what you want. + +=head2 How do I debug my Perl programs? + +Have you used C<-w>? + +Have you tried C<use strict>? + +Did you check the returns of each and every system call? + +Did you read L<perltrap>? + +Have you tried the Perl debugger, described in L<perldebug>? + +=head2 How do I profile my Perl programs? + +You should get the Devel::DProf module from CPAN, and also use +Benchmark.pm from the standard distribution. Benchmark lets you time +specific portions of your code, while Devel::DProf gives detailed +breakdowns of where your code spends its time. + +=head2 How do I cross-reference my Perl programs? + +The B::Xref module, shipped with the new, alpha-release Perl compiler +(not the general distribution), can be used to generate +cross-reference reports for Perl programs. + + perl -MO=Xref[,OPTIONS] foo.pl + +=head2 Is there a pretty-printer (formatter) for Perl? + +There is no program that will reformat Perl as much as indent(1) will +do for C. The complex feedback between the scanner and the parser +(this feedback is what confuses the vgrind and emacs programs) makes it +challenging at best to write a stand-alone Perl parser. + +Of course, if you simply follow the guidelines in L<perlstyle>, you +shouldn't need to reformat. + +Your editor can and should help you with source formatting. The +perl-mode for emacs can provide a remarkable amount of help with most +(but not all) code, and even less programmable editors can provide +significant assistance. + +If you are using to using vgrind program for printing out nice code to +a laser printer, you can take a stab at this using +http://www.perl.com/CPAN/doc/misc/tips/working.vgrind.entry, but the +results are not particularly satisfying for sophisticated code. + +=head2 Is there a ctags for Perl? + +There's a simple one at +http://www.perl.com/CPAN/authors/id/TOMC/scripts/ptags.gz which may do +the trick. + +=head2 Where can I get Perl macros for vi? + +For a complete version of Tom Christiansen's vi configuration file, +see ftp://ftp.perl.com/pub/vi/toms.exrc, the standard benchmark file +for vi emulators. This runs best with nvi, the current version of vi +out of Berkeley, which incidentally can be built with an embedded Perl +interpreter -- see http://www.perl.com/CPAN/src/misc . + +=head2 Where can I get perl-mode for emacs? + +Since Emacs version 19 patchlevel 22 or so, there have been both a +perl-mode.el and support for the perl debugger built in. These should +come with the standard Emacs 19 distribution. + +In the perl source directory, you'll find a directory called "emacs", +which contains a cperl-mode that color-codes keywords, provides +context-sensitive help, and other nifty things. + +Note that the perl-mode of emacs will have fits with "main'foo" +(single quote), and mess up the indentation and hilighting. You +should be using "main::foo", anyway. + +=head2 How can I use curses with Perl? + +The Curses module from CPAN provides a dynamically loadable object +module interface to a curses library. + +=head2 How can I use X or Tk with Perl? + +Tk is a completely Perl-based, object-oriented interface to the Tk +toolkit that doesn't force you to use Tcl just to get at Tk. Sx is an +interface to the Athena Widget set. Both are available from CPAN. + +=head2 How can I generate simple menus without using CGI or Tk? + +The http://www.perl.com/CPAN/authors/id/SKUNZ/perlmenu.v4.0.tar.gz +module, which is curses-based, can help with this. + +=head2 Can I dynamically load C routines into Perl? + +If your system architecture supports it, then the standard perl +on your system should also provide you with this via the +DynaLoader module. Read L<perlxstut> for details. + +=head2 What is undump? + +See the next questions. + +=head2 How can I make my Perl program run faster? + +The best way to do this is to come up with a better algorithm. +This can often make a dramatic difference. Chapter 8 in the Camel +has some efficiency tips in it you might want to look at. + +Other approaches include autoloading seldom-used Perl code. See the +AutoSplit and AutoLoader modules in the standard distribution for +that. Or you could locate the bottleneck and think about writing just +that part in C, the way we used to take bottlenecks in C code and +write them in assembler. Similar to rewriting in C is the use of +modules that have critical sections written in C (for instance, the +PDL module from CPAN). + +In some cases, it may be worth it to use the backend compiler to +produce byte code (saving compilation time) or compile into C, which +will certainly save compilation time and sometimes a small amount (but +not much) execution time. See the question about compiling your Perl +programs. + +If you're currently linking your perl executable to a shared libc.so, +you can often gain a 10-25% performance benefit by rebuilding it to +link with a static libc.a instead. This will make a bigger perl +executable, but your Perl programs (and programmers) may thank you for +it. See the F<INSTALL> file in the source distribution for more +information. + +Unsubstantiated reports allege that Perl interpreters that use sfio +outperform those that don't (for IO intensive applications). To try +this, see the F<INSTALL> file in the source distribution, especially +the "Selecting File IO mechanisms" section. + +The undump program was an old attempt to speed up your Perl program +by storing the already-compiled form to disk. This is no longer +a viable option, as it only worked on a few architectures, and +wasn't a good solution anyway. + +=head2 How can I make my Perl program take less memory? + +When it comes to time-space tradeoffs, Perl nearly always prefers to +throw memory at a problem. Scalars in Perl use more memory than +strings in C, arrays take more that, and hashes use even more. While +there's still a lot to be done, recent releases have been addressing +these issues. For example, as of 5.004, duplicate hash keys are +shared amongst all hashes using them, so require no reallocation. + +In some cases, using substr() or vec() to simulate arrays can be +highly beneficial. For example, an array of a thousand booleans will +take at least 20,000 bytes of space, but it can be turned into one +125-byte bit vector for a considerable memory savings. The standard +Tie::SubstrHash module can also help for certain types of data +structure. If you're working with specialist data structures +(matrices, for instance) modules that implement these in C may use +less memory than equivalent Perl modules. + +Another thing to try is learning whether your Perl was compiled with +the system malloc or with Perl's builtin malloc. Whichever one it +is, try using the other one and see whether this makes a difference. +Information about malloc is in the F<INSTALL> file in the source +distribution. You can find out whether you are using perl's malloc by +typing C<perl -V:usemymalloc>. + +=head2 Is it unsafe to return a pointer to local data? + +No, Perl's garbage collection system takes care of this. + + sub makeone { + my @a = ( 1 .. 10 ); + return \@a; + } + + for $i ( 1 .. 10 ) { + push @many, makeone(); + } + + print $many[4][5], "\n"; + + print "@many\n"; + +=head2 How can I free an array or hash so my program shrinks? + +You can't. Memory the system allocates to a program will never be +returned to the system. That's why long-running programs sometimes +re-exec themselves. + +However, judicious use of my() on your variables will help make sure +that they go out of scope so that Perl can free up their storage for +use in other parts of your program. (NB: my() variables also execute +about 10% faster than globals.) A global variable, of course, never +goes out of scope, so you can't get its space automatically reclaimed, +although undef()ing and/or delete()ing it will achieve the same effect. +In general, memory allocation and de-allocation isn't something you can +or should be worrying about much in Perl, but even this capability +(preallocation of data types) is in the works. + +=head2 How can I make my CGI script more efficient? + +Beyond the normal measures described to make general Perl programs +faster or smaller, a CGI program has additional issues. It may be run +several times per second. Given that each time it runs it will need +to be re-compiled and will often allocate a megabyte or more of system +memory, this can be a killer. Compiling into C B<isn't going to help +you> because the process start-up overhead is where the bottleneck is. + +There are at least two popular ways to avoid this overhead. One +solution involves running the Apache HTTP server (available from +http://www.apache.org/) with either of the mod_perl or mod_fastcgi +plugin modules. With mod_perl and the Apache::* modules (from CPAN), +httpd will run with an embedded Perl interpreter which pre-compiles +your script and then executes it within the same address space without +forking. The Apache extension also gives Perl access to the internal +server API, so modules written in Perl can do just about anything a +module written in C can. With the FCGI module (from CPAN), a Perl +executable compiled with sfio (see the F<INSTALL> file in the +distribution) and the mod_fastcgi module (available from +http://www.fastcgi.com/) each of your perl scripts becomes a permanent +CGI daemon processes. + +Both of these solutions can have far-reaching effects on your system +and on the way you write your CGI scripts, so investigate them with +care. + +=head2 How can I hide the source for my Perl program? + +Delete it. :-) Seriously, there are a number of (mostly +unsatisfactory) solutions with varying levels of "security". + +First of all, however, you I<can't> take away read permission, because +the source code has to be readable in order to be compiled and +interpreted. (That doesn't mean that a CGI script's source is +readable by people on the web, though.) So you have to leave the +permissions at the socially friendly 0755 level. + +Some people regard this as a security problem. If your program does +insecure things, and relies on people not knowing how to exploit those +insecurities, it is not secure. It is often possible for someone to +determine the insecure things and exploit them without viewing the +source. Security through obscurity, the name for hiding your bugs +instead of fixing them, is little security indeed. + +You can try using encryption via source filters (Filter::* from CPAN). +But crackers might be able to decrypt it. You can try using the byte +code compiler and interpreter described below, but crackers might be +able to de-compile it. You can try using the native-code compiler +described below, but crackers might be able to disassemble it. These +pose varying degrees of difficulty to people wanting to get at your +code, but none can definitively conceal it (this is true of every +language, not just Perl). + +If you're concerned about people profiting from your code, then the +bottom line is that nothing but a restrictive licence will give you +legal security. License your software and pepper it with threatening +statements like "This is unpublished proprietary software of XYZ Corp. +Your access to it does not give you permission to use it blah blah +blah." We are not lawyers, of course, so you should see a lawyer if +you want to be sure your licence's wording will stand up in court. + +=head2 How can I compile my Perl program into byte code or C? + +Malcolm Beattie has written a multifunction backend compiler, +available from CPAN, that can do both these things. It is as of +Feb-1997 in late alpha release, which means it's fun to play with if +you're a programmer but not really for people looking for turn-key +solutions. + +I<Please> understand that merely compiling into C does not in and of +itself guarantee that your code will run very much faster. That's +because except for lucky cases where a lot of native type inferencing +is possible, the normal Perl run time system is still present and thus +will still take just as long to run and be just as big. Most programs +save little more than compilation time, leaving execution no more than +10-30% faster. A few rare programs actually benefit significantly +(like several times faster), but this takes some tweaking of your +code. + +Malcolm will be in charge of the 5.005 release of Perl itself +to try to unify and merge his compiler and multithreading work into +the main release. + +You'll probably be astonished to learn that the current version of the +compiler generates a compiled form of your script whose executable is +just as big as the original perl executable, and then some. That's +because as currently written, all programs are prepared for a full +eval() statement. You can tremendously reduce this cost by building a +shared libperl.so library and linking against that. See the +F<INSTALL> podfile in the perl source distribution for details. If +you link your main perl binary with this, it will make it miniscule. +For example, on one author's system, /usr/bin/perl is only 11k in +size! + +=head2 How can I get '#!perl' to work on [MS-DOS,NT,...]? + +For OS/2 just use + + extproc perl -S -your_switches + +as the first line in C<*.cmd> file (C<-S> due to a bug in cmd.exe's +`extproc' handling). For DOS one should first invent a corresponding +batch file, and codify it in C<ALTERNATIVE_SHEBANG> (see the +F<INSTALL> file in the source distribution for more information). + +The Win95/NT installation, when using the Activeware port of Perl, +will modify the Registry to associate the .pl extension with the perl +interpreter. If you install another port, or (eventually) build your +own Win95/NT Perl using WinGCC, then you'll have to modify the +Registry yourself. + +Macintosh perl scripts will have the the appropriate Creator and +Type, so that double-clicking them will invoke the perl application. + +I<IMPORTANT!>: Whatever you do, PLEASE don't get frustrated, and just +throw the perl interpreter into your cgi-bin directory, in order to +get your scripts working for a web server. This is an EXTREMELY big +security risk. Take the time to figure out how to do it correctly. + +=head2 Can I write useful perl programs on the command line? + +Yes. Read L<perlrun> for more information. Some examples follow. +(These assume standard Unix shell quoting rules.) + + # sum first and last fields + perl -lane 'print $F[0] + $F[-1]' + + # identify text files + perl -le 'for(@ARGV) {print if -f && -T _}' * + + # remove comments from C program + perl -0777 -pe 's{/\*.*?\*/}{}gs' foo.c + + # make file a month younger than today, defeating reaper daemons + perl -e '$X=24*60*60; utime(time(),time() + 30 * $X,@ARGV)' * + + # find first unused uid + perl -le '$i++ while getpwuid($i); print $i' + + # display reasonable manpath + echo $PATH | perl -nl -072 -e ' + s![^/+]*$!man!&&-d&&!$s{$_}++&&push@m,$_;END{print"@m"}' + +Ok, the last one was actually an obfuscated perl entry. :-) + +=head2 Why don't perl one-liners work on my DOS/Mac/VMS system? + +The problem is usually that the command interpreters on those systems +have rather different ideas about quoting than the Unix shells under +which the one-liners were created. On some systems, you may have to +change single-quotes to double ones, which you must I<NOT> do on Unix +or Plan9 systems. You might also have to change a single % to a %%. + +For example: + + # Unix + perl -e 'print "Hello world\n"' + + # DOS, etc. + perl -e "print \"Hello world\n\"" + + # Mac + print "Hello world\n" + (then Run "Myscript" or Shift-Command-R) + + # VMS + perl -e "print ""Hello world\n""" + +The problem is that none of this is reliable: it depends on the command +interpreter. Under Unix, the first two often work. Under DOS, it's +entirely possible neither works. If 4DOS was the command shell, I'd +probably have better luck like this: + + perl -e "print <Ctrl-x>"Hello world\n<Ctrl-x>"" + +Under the Mac, it depends which environment you are using. The MacPerl +shell, or MPW, is much like Unix shells in its support for several +quoting variants, except that it makes free use of the Mac's non-ASCII +characters as control characters. + +I'm afraid that there is no general solution to all of this. It is a +mess, pure and simple. + +[Some of this answer was contributed by Kenneth Albanowski.] + +=head2 Where can I learn about CGI or Web programming in Perl? + +For modules, get the CGI or LWP modules from CPAN. For textbooks, +see the two especially dedicated to web stuff in the question on +books. For problems and questions related to the web, like "Why +do I get 500 Errors" or "Why doesn't it run from the browser right +when it runs fine on the command line", see these sources: + + The Idiot's Guide to Solving Perl/CGI Problems, by Tom Christiansen + http://www.perl.com/perl/faq/idiots-guide.html + + Frequently Asked Questions about CGI Programming, by Nick Kew + ftp://rtfm.mit.edu/pub/usenet/news.answers/www/cgi-faq + http://www3.pair.com/webthing/docs/cgi/faqs/cgifaq.shtml + + Perl/CGI programming FAQ, by Shishir Gundavaram and Tom Christiansen + http://www.perl.com/perl/faq/perl-cgi-faq.html + + The WWW Security FAQ, by Lincoln Stein + http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html + + World Wide Web FAQ, by Thomas Boutell + http://www.boutell.com/faq/ + +=head2 Where can I learn about object-oriented Perl programming? + +L<perltoot> is a good place to start, and you can use L<perlobj> and +L<perlbot> for reference. Perltoot didn't come out until the 5.004 +release, but you can get a copy (in pod, html, or postscript) from +http://www.perl.com/CPAN/doc/FMTEYEWTK/ . + +=head2 Where can I learn about linking C with Perl? [h2xs, xsubpp] + +If you want to call C from Perl, start with L<perlxstut>, +moving on to L<perlxs>, L<xsubpp>, and L<perlguts>. If you want to +call Perl from C, then read L<perlembed>, L<perlcall>, and +L<perlguts>. Don't forget that you can learn a lot from looking at +how the authors of existing extension modules wrote their code and +solved their problems. + +=head2 I've read perlembed, perlguts, etc., but I can't embed perl in +my C program, what am I doing wrong? + +Download the ExtUtils::Embed kit from CPAN and run `make test'. If +the tests pass, read the pods again and again and again. If they +fail, see L<perlbug> and send a bugreport with the output of +C<make test TEST_VERBOSE=1> along with C<perl -V>. + +=head2 When I tried to run my script, I got this message. What does it +mean? + +L<perldiag> has a complete list of perl's error messages and warnings, +with explanatory text. You can also use the splain program (distributed +with perl) to explain the error messages: + + perl program 2>diag.out + splain [-v] [-p] diag.out + +or change your program to explain the messages for you: + + use diagnostics; + +or + + use diagnostics -verbose; + +=head2 What's MakeMaker? + +This module (part of the standard perl distribution) is designed to +write a Makefile for an extension module from a Makefile.PL. For more +information, see L<ExtUtils::MakeMaker>. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + diff --git a/pod/perlfaq4.pod b/pod/perlfaq4.pod new file mode 100644 index 0000000000..bcf03990bc --- /dev/null +++ b/pod/perlfaq4.pod @@ -0,0 +1,1101 @@ +=head1 NAME + +perlfaq4 - Data Manipulation ($Revision: 1.19 $, $Date: 1997/04/24 22:43:57 $) + +=head1 DESCRIPTION + +The section of the FAQ answers question related to the manipulation +of data as numbers, dates, strings, arrays, hashes, and miscellaneous +data issues. + +=head1 Data: Numbers + +=head2 Why am I getting long decimals (eg, 19.9499999999999) instead of the numbers I should be getting (eg, 19.95)? + +Internally, your computer represents floating-point numbers in binary. +Floating-point numbers read in from a file, or appearing as literals +in your program, are converted from their decimal floating-point +representation (eg, 19.95) to the internal binary representation. + +However, 19.95 can't be precisely represented as a binary +floating-point number, just like 1/3 can't be exactly represented as a +decimal floating-point number. The computer's binary representation +of 19.95, therefore, isn't exactly 19.95. + +When a floating-point number gets printed, the binary floating-point +representation is converted back to decimal. These decimal numbers +are displayed in either the format you specify with printf(), or the +current output format for numbers (see L<perlvar/"$#"> if you use +print. C<$#> has a different default value in Perl5 than it did in +Perl4. Changing C<$#> yourself is deprecated. + +This affects B<all> computer languages that represent decimal +floating-point numbers in binary, not just Perl. Perl provides +arbitrary-precision decimal numbers with the Math::BigFloat module +(part of the standard Perl distribution), but mathematical operations +are consequently slower. + +To get rid of the superfluous digits, just use a format (eg, +C<printf("%.2f", 19.95)>) to get the required precision. + +=head2 Why isn't my octal data interpreted correctly? + +Perl only understands octal and hex numbers as such when they occur +as literals in your program. If they are read in from somewhere and +assigned, no automatic conversion takes place. You must explicitly +use oct() or hex() if you want the values converted. oct() interprets +both hex ("0x350") numbers and octal ones ("0350" or even without the +leading "0", like "377"), while hex() only converts hexadecimal ones, +with or without a leading "0x", like "0x255", "3A", "ff", or "deadbeef". + +This problem shows up most often when people try using chmod(), mkdir(), +umask(), or sysopen(), which all want permissions in octal. + + chmod(644, $file); # WRONG -- perl -w catches this + chmod(0644, $file); # right + +=head2 Does perl have a round function? What about ceil() and floor()? +Trig functions? + +For rounding to a certain number of digits, sprintf() or printf() is +usually the easiest route. + +The POSIX module (part of the standard perl distribution) implements +ceil(), floor(), and a number of other mathematical and trigonometric +functions. + +In 5.000 to 5.003 Perls, trigonometry was done in the Math::Complex +module. With 5.004, the Math::Trig module (part of the standard perl +distribution) implements the trigonometric functions. Internally it +uses the Math::Complex module and some functions can break out from +the real axis into the complex plane, for example the inverse sine of +2. + +Rounding in financial applications can have serious implications, and +the rounding method used should be specified precisely. In these +cases, it probably pays not to trust whichever system rounding is +being used by Perl, but to instead implement the rounding function you +need yourself. + +=head2 How do I convert bits into ints? + +To turn a string of 1s and 0s like '10110110' into a scalar containing +its binary value, use the pack() function (documented in +L<perlfunc/"pack">): + + $decimal = pack('B8', '10110110'); + +Here's an example of going the other way: + + $binary_string = join('', unpack('B*', "\x29")); + +=head2 How do I multiply matrices? + +Use the Math::Matrix or Math::MatrixReal modules (available from CPAN) +or the PDL extension (also available from CPAN). + +=head2 How do I perform an operation on a series of integers? + +To call a function on each element in an array, and collect the +results, use: + + @results = map { my_func($_) } @array; + +For example: + + @triple = map { 3 * $_ } @single; + +To call a function on each element of an array, but ignore the +results: + + foreach $iterator (@array) { + &my_func($iterator); + } + +To call a function on each integer in a (small) range, you B<can> use: + + @results = map { &my_func($_) } (5 .. 25); + +but you should be aware that the C<..> operator creates an array of +all integers in the range. This can take a lot of memory for large +ranges. Instead use: + + @results = (); + for ($i=5; $i < 500_005; $i++) { + push(@results, &my_func($i)); + } + +=head2 How can I output Roman numerals? + +Get the http://www.perl.com/CPAN/modules/by-module/Roman module. + +=head2 Why aren't my random numbers random? + +The short explanation is that you're getting pseudorandom numbers, not +random ones, because that's how these things work. A longer +explanation is available on +http://www.perl.com/CPAN/doc/FMTEYEWTK/random, courtesy of Tom +Phoenix. + +You should also check out the Math::TrulyRandom module from CPAN. + +=head1 Data: Dates + +=head2 How do I find the week-of-the-year/day-of-the-year? + +The day of the year is in the array returned by localtime() (see +L<perlfunc/"localtime">): + + $day_of_year = (localtime(time()))[7]; + +or more legibly (in 5.004 or higher): + + use Time::localtime; + $day_of_year = localtime(time())->yday; + +You can find the week of the year by dividing this by 7: + + $week_of_year = int($day_of_year / 7); + +Of course, this believes that weeks start at zero. + +=head2 How can I compare two date strings? + +Use the Date::Manip or Date::DateCalc modules from CPAN. + +=head2 How can I take a string and turn it into epoch seconds? + +If it's a regular enough string that it always has the same format, +you can split it up and pass the parts to timelocal in the standard +Time::Local module. Otherwise, you should look into one of the +Date modules from CPAN. + +=head2 How can I find the Julian Day? + +Neither Date::Manip nor Date::DateCalc deal with Julian days. +Instead, there is an example of Julian date calculation in +http://www.perl.com/CPAN/authors/David_Muir_Sharnoff/modules/Time/JulianDay.pm.gz, +which should help. + +=head2 Does Perl have a year 2000 problem? + +Not unless you use Perl to create one. The date and time functions +supplied with perl (gmtime and localtime) supply adequate information +to determine the year well beyond 2000 (2038 is when trouble strikes). +The year returned by these functions when used in an array context is +the year minus 1900. For years between 1910 and 1999 this I<happens> +to be a 2-digit decimal number. To avoid the year 2000 problem simply +do not treat the year as a 2-digit number. It isn't. + +When gmtime() and localtime() are used in a scalar context they return +a timestamp string that contains a fully-expanded year. For example, +C<$timestamp = gmtime(1005613200)> sets $timestamp to "Tue Nov 13 01:00:00 +2001". There's no year 2000 problem here. + +=head1 Data: Strings + +=head2 How do I validate input? + +The answer to this question is usually a regular expression, perhaps +with auxiliary logic. See the more specific questions (numbers, email +addresses, etc.) for details. + +=head2 How do I unescape a string? + +It depends just what you mean by "escape". URL escapes are dealt with +in L<perlfaq9>. Shell escapes with the backslash (\) +character are removed with: + + s/\\(.)/$1/g; + +Note that this won't expand \n or \t or any other special escapes. + +=head2 How do I remove consecutive pairs of characters? + +To turn "abbcccd" into "abccd": + + s/(.)\1/$1/g; + +=head2 How do I expand function calls in a string? + +This is documented in L<perlref>. In general, this is fraught with +quoting and readability problems, but it is possible. To interpolate +a subroutine call (in a list context) into a string: + + print "My sub returned @{[mysub(1,2,3)]} that time.\n"; + +If you prefer scalar context, similar chicanery is also useful for +arbitrary expressions: + + print "That yields ${\($n + 5)} widgets\n"; + +See also "How can I expand variables in text strings?" in this section +of the FAQ. + +=head2 How do I find matching/nesting anything? + +This isn't something that can be tackled in one regular expression, no +matter how complicated. To find something between two single characters, +a pattern like C</x([^x]*)x/> will get the intervening bits in $1. For +multiple ones, then something more like C</alpha(.*?)omega/> would +be needed. But none of these deals with nested patterns, nor can they. +For that you'll have to write a parser. + +=head2 How do I reverse a string? + +Use reverse() in a scalar context, as documented in +L<perlfunc/reverse>. + + $reversed = reverse $string; + +=head2 How do I expand tabs in a string? + +You can do it the old-fashioned way: + + 1 while $string =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; + +Or you can just use the Text::Tabs module (part of the standard perl +distribution). + + use Text::Tabs; + @expanded_lines = expand(@lines_with_tabs); + +=head2 How do I reformat a paragraph? + +Use Text::Wrap (part of the standard perl distribution): + + use Text::Wrap; + print wrap("\t", ' ', @paragraphs); + +The paragraphs you give to Text::Wrap may not contain embedded +newlines. Text::Wrap doesn't justify the lines (flush-right). + +=head2 How can I access/change the first N letters of a string? + +There are many ways. If you just want to grab a copy, use +substr: + + $first_byte = substr($a, 0, 1); + +If you want to modify part of a string, the simplest way is often to +use substr() as an lvalue: + + substr($a, 0, 3) = "Tom"; + +Although those with a regexp kind of thought process will likely prefer + + $a =~ s/^.../Tom/; + +=head2 How do I change the Nth occurrence of something? + +You have to keep track. For example, let's say you want +to change the fifth occurrence of "whoever" or "whomever" +into "whosoever" or "whomsoever", case insensitively. + + $count = 0; + s{((whom?)ever)}{ + ++$count == 5 # is it the 5th? + ? "${2}soever" # yes, swap + : $1 # renege and leave it there + }igex; + +=head2 How can I count the number of occurrences of a substring within a string? + +There are a number of ways, with varying efficiency: If you want a +count of a certain single character (X) within a string, you can use the +C<tr///> function like so: + + $string = "ThisXlineXhasXsomeXx'sXinXit": + $count = ($string =~ tr/X//); + print "There are $count X charcters in the string"; + +This is fine if you are just looking for a single character. However, +if you are trying to count multiple character substrings within a +larger string, C<tr///> won't work. What you can do is wrap a while() +loop around a global pattern match. For example, let's count negative +integers: + + $string = "-9 55 48 -2 23 -76 4 14 -44"; + while ($string =~ /-\d+/g) { $count++ } + print "There are $count negative numbers in the string"; + +=head2 How do I capitalize all the words on one line? + +To make the first letter of each word upper case: + + $line =~ s/\b(\w)/\U$1/g; + +This has the strange effect of turning "C<don't do it>" into "C<Don'T +Do It>". Sometimes you might want this, instead (Suggested by Brian +Foy E<lt>comdog@computerdog.comE<gt>): + + $string =~ s/ ( + (^\w) #at the beginning of the line + | # or + (\s\w) #preceded by whitespace + ) + /\U$1/xg; + $string =~ /([\w']+)/\u\L$1/g; + +To make the whole line upper case: + + $line = uc($line); + +To force each word to be lower case, with the first letter upper case: + + $line =~ s/(\w+)/\u\L$1/g; + +=head2 How can I split a [character] delimited string except when inside +[character]? (Comma-separated files) + +Take the example case of trying to split a string that is comma-separated +into its different fields. (We'll pretend you said comma-separated, not +comma-delimited, which is different and almost never what you mean.) You +can't use C<split(/,/)> because you shouldn't split if the comma is inside +quotes. For example, take a data line like this: + + SAR001,"","Cimetrix, Inc","Bob Smith","CAM",N,8,1,0,7,"Error, Core Dumped" + +Due to the restriction of the quotes, this is a fairly complex +problem. Thankfully, we have Jeffrey Friedl, author of a highly +recommended book on regular expressions, to handle these for us. He +suggests (assuming your string is contained in $text): + + @new = (); + push(@new, $+) while $text =~ m{ + "([^\"\\]*(?:\\.[^\"\\]*)*)",? # groups the phrase inside the quotes + | ([^,]+),? + | , + }gx; + push(@new, undef) if substr($text,-1,1) eq ','; + +If you want to represent quotation marks inside a +quotation-mark-delimited field, escape them with backslashes (eg, +C<"like \"this\""). Unescaping them is a task addressed earlier in +this section. + +Alternatively, the Text::ParseWords module (part of the standard perl +distribution) lets you say: + + use Text::ParseWords; + @new = quotewords(",", 0, $text); + +=head2 How do I strip blank space from the beginning/end of a string? + +The simplest approach, albeit not the fastest, is probably like this: + + $string =~ s/^\s*(.*?)\s*$/$1/; + +It would be faster to do this in two steps: + + $string =~ s/^\s+//; + $string =~ s/\s+$//; + +Or more nicely written as: + + for ($string) { + s/^\s+//; + s/\s+$//; + } + +=head2 How do I extract selected columns from a string? + +Use substr() or unpack(), both documented in L<perlfunc>. + +=head2 How do I find the soundex value of a string? + +Use the standard Text::Soundex module distributed with perl. + +=head2 How can I expand variables in text strings? + +Let's assume that you have a string like: + + $text = 'this has a $foo in it and a $bar'; + $text =~ s/\$(\w+)/${$1}/g; + +Before version 5 of perl, this had to be done with a double-eval +substitution: + + $text =~ s/(\$\w+)/$1/eeg; + +Which is bizarre enough that you'll probably actually need an EEG +afterwards. :-) + +See also "How do I expand function calls in a string?" in this section +of the FAQ. + +=head2 What's wrong with always quoting "$vars"? + +The problem is that those double-quotes force stringification, +coercing numbers and references into strings, even when you +don't want them to be. + +If you get used to writing odd things like these: + + print "$var"; # BAD + $new = "$old"; # BAD + somefunc("$var"); # BAD + +You'll be in trouble. Those should (in 99.8% of the cases) be +the simpler and more direct: + + print $var; + $new = $old; + somefunc($var); + +Otherwise, besides slowing you down, you're going to break code when +the thing in the scalar is actually neither a string nor a number, but +a reference: + + func(\@array); + sub func { + my $aref = shift; + my $oref = "$aref"; # WRONG + } + +You can also get into subtle problems on those few operations in Perl +that actually do care about the difference between a string and a +number, such as the magical C<++> autoincrement operator or the +syscall() function. + +=head2 Why don't my <<HERE documents work? + +Check for these three things: + +=over 4 + +=item 1. There must be no space after the << part. + +=item 2. There (probably) should be a semicolon at the end. + +=item 3. You can't (easily) have any space in front of the tag. + +=back + +=head1 Data: Arrays + +=head2 What is the difference between $array[1] and @array[1]? + +The former is a scalar value, the latter an array slice, which makes +it a list with one (scalar) value. You should use $ when you want a +scalar value (most of the time) and @ when you want a list with one +scalar value in it (very, very rarely; nearly never, in fact). + +Sometimes it doesn't make a difference, but sometimes it does. +For example, compare: + + $good[0] = `some program that outputs several lines`; + +with + + @bad[0] = `same program that outputs several lines`; + +The B<-w> flag will warn you about these matters. + +=head2 How can I extract just the unique elements of an array? + +There are several possible ways, depending on whether the array is +ordered and whether you wish to preserve the ordering. + +=over 4 + +=item a) If @in is sorted, and you want @out to be sorted: + + $prev = 'nonesuch'; + @out = grep($_ ne $prev && ($prev = $_), @in); + +This is nice in that it doesn't use much extra memory, +simulating uniq(1)'s behavior of removing only adjacent +duplicates. + +=item b) If you don't know whether @in is sorted: + + undef %saw; + @out = grep(!$saw{$_}++, @in); + +=item c) Like (b), but @in contains only small integers: + + @out = grep(!$saw[$_]++, @in); + +=item d) A way to do (b) without any loops or greps: + + undef %saw; + @saw{@in} = (); + @out = sort keys %saw; # remove sort if undesired + +=item e) Like (d), but @in contains only small positive integers: + + undef @ary; + @ary[@in] = @in; + @out = @ary; + +=back + +=head2 How can I tell whether an array contains a certain element? + +There are several ways to approach this. If you are going to make +this query many times and the values are arbitrary strings, the +fastest way is probably to invert the original array and keep an +associative array lying about whose keys are the first array's values. + + @blues = qw/azure cerulean teal turquoise lapis-lazuli/; + undef %is_blue; + for (@blues) { $is_blue{$_} = 1 } + +Now you can check whether $is_blue{$some_color}. It might have been a +good idea to keep the blues all in a hash in the first place. + +If the values are all small integers, you could use a simple indexed +array. This kind of an array will take up less space: + + @primes = (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31); + undef @is_tiny_prime; + for (@primes) { $is_tiny_prime[$_] = 1; } + +Now you check whether $is_tiny_prime[$some_number]. + +If the values in question are integers instead of strings, you can save +quite a lot of space by using bit strings instead: + + @articles = ( 1..10, 150..2000, 2017 ); + undef $read; + grep (vec($read,$_,1) = 1, @articles); + +Now check whether C<vec($read,$n,1)> is true for some C<$n>. + +Please do not use + + $is_there = grep $_ eq $whatever, @array; + +or worse yet + + $is_there = grep /$whatever/, @array; + +These are slow (checks every element even if the first matches), +inefficient (same reason), and potentially buggy (what if there are +regexp characters in $whatever?). + +=head2 How do I compute the difference of two arrays? How do I compute the intersection of two arrays? + +Use a hash. Here's code to do both and more. It assumes that +each element is unique in a given array: + + @union = @intersection = @difference = (); + %count = (); + foreach $element (@array1, @array2) { $count{$element}++ } + foreach $element (keys %count) { + push @union, $element; + push @{ $count{$element} > 1 ? \@intersection : \@difference }, $element; + } + +=head2 How do I find the first array element for which a condition is true? + +You can use this if you care about the index: + + for ($i=0; $i < @array; $i++) { + if ($array[$i] eq "Waldo") { + $found_index = $i; + last; + } + } + +Now C<$found_index> has what you want. + +=head2 How do I handle linked lists? + +In general, you usually don't need a linked list in Perl, since with +regular arrays, you can push and pop or shift and unshift at either end, +or you can use splice to add and/or remove arbitrary number of elements +at arbitrary points. + +If you really, really wanted, you could use structures as described in +L<perldsc> or L<perltoot> and do just what the algorithm book tells you +to do. + +=head2 How do I handle circular lists? + +Circular lists could be handled in the traditional fashion with linked +lists, or you could just do something like this with an array: + + unshift(@array, pop(@array)); # the last shall be first + push(@array, shift(@array)); # and vice versa + +=head2 How do I shuffle an array randomly? + +Here's a shuffling algorithm which works its way through the list, +randomly picking another element to swap the current element with: + + srand; + @new = (); + @old = 1 .. 10; # just a demo + while (@old) { + push(@new, splice(@old, rand @old, 1)); + } + +For large arrays, this avoids a lot of the reshuffling: + + srand; + @new = (); + @old = 1 .. 10000; # just a demo + for( @old ){ + my $r = rand @new+1; + push(@new,$new[$r]); + $new[$r] = $_; + } + +=head2 How do I process/modify each element of an array? + +Use C<for>/C<foreach>: + + for (@lines) { + s/foo/bar/; + tr[a-z][A-Z]; + } + +Here's another; let's compute spherical volumes: + + for (@radii) { + $_ **= 3; + $_ *= (4/3) * 3.14159; # this will be constant folded + } + +=head2 How do I select a random element from an array? + +Use the rand() function (see L<perlfunc/rand>): + + srand; # not needed for 5.004 and later + $index = rand @array; + $element = $array[$index]; + +=head2 How do I permute N elements of a list? + +Here's a little program that generates all permutations +of all the words on each line of input. The algorithm embodied +in the permut() function should work on any list: + + #!/usr/bin/perl -n + # permute - tchrist@perl.com + permut([split], []); + sub permut { + my @head = @{ $_[0] }; + my @tail = @{ $_[1] }; + unless (@head) { + # stop recursing when there are no elements in the head + print "@tail\n"; + } else { + # for all elements in @head, move one from @head to @tail + # and call permut() on the new @head and @tail + my(@newhead,@newtail,$i); + foreach $i (0 .. $#head) { + @newhead = @head; + @newtail = @tail; + unshift(@newtail, splice(@newhead, $i, 1)); + permut([@newhead], [@newtail]); + } + } + } + +=head2 How do I sort an array by (anything)? + +Supply a comparison function to sort() (described in L<perlfunc/sort>): + + @list = sort { $a <=> $b } @list; + +The default sort function is cmp, string comparison, which would +sort C<(1, 2, 10)> into C<(1, 10, 2)>. C<E<lt>=E<gt>>, used above, is +the numerical comparison operator. + +If you have a complicated function needed to pull out the part you +want to sort on, then don't do it inside the sort function. Pull it +out first, because the sort BLOCK can be called many times for the +same element. Here's an example of how to pull out the first word +after the first number on each item, and then sort those words +case-insensitively. + + @idx = (); + for (@data) { + ($item) = /\d+\s*(\S+)/; + push @idx, uc($item); + } + @sorted = @data[ sort { $idx[$a] cmp $idx[$b] } 0 .. $#idx ]; + +Which could also be written this way, using a trick +that's come to be known as the Schwartzian Transform: + + @sorted = map { $_->[0] } + sort { $a->[1] cmp $b->[1] } + map { [ $_, uc((/\d+\s*(\S+)/ )[0] ] } @data; + +If you need to sort on several fields, the following paradigm is useful. + + @sorted = sort { field1($a) <=> field1($b) || + field2($a) cmp field2($b) || + field3($a) cmp field3($b) + } @data; + +This can be conveniently combined with precalculation of keys as given +above. + +See http://www.perl.com/CPAN/doc/FMTEYEWTK/sort.html for more about +this approach. + +See also the question below on sorting hashes. + +=head2 How do I manipulate arrays of bits? + +Use pack() and unpack(), or else vec() and the bitwise operations. + +For example, this sets $vec to have bit N set if $ints[N] was set: + + $vec = ''; + foreach(@ints) { vec($vec,$_,1) = 1 } + +And here's how, given a vector in $vec, you can +get those bits into your @ints array: + + sub bitvec_to_list { + my $vec = shift; + my @ints; + # Find null-byte density then select best algorithm + if ($vec =~ tr/\0// / length $vec > 0.95) { + use integer; + my $i; + # This method is faster with mostly null-bytes + while($vec =~ /[^\0]/g ) { + $i = -9 + 8 * pos $vec; + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + push @ints, $i if vec($vec, ++$i, 1); + } + } else { + # This method is a fast general algorithm + use integer; + my $bits = unpack "b*", $vec; + push @ints, 0 if $bits =~ s/^(\d)// && $1; + push @ints, pos $bits while($bits =~ /1/g); + } + return \@ints; + } + +This method gets faster the more sparse the bit vector is. +(Courtesy of Tim Bunce and Winfried Koenig.) + +=head2 Why does defined() return true on empty arrays and hashes? + +See L<perlfunc/defined> in the 5.004 release or later of Perl. + +=head1 Data: Hashes (Associative Arrays) + +=head2 How do I process an entire hash? + +Use the each() function (see L<perlfunc/each>) if you don't care +whether it's sorted: + + while (($key,$value) = each %hash) { + print "$key = $value\n"; + } + +If you want it sorted, you'll have to use foreach() on the result of +sorting the keys as shown in an earlier question. + +=head2 What happens if I add or remove keys from a hash while iterating over it? + +Don't do that. + +=head2 How do I look up a hash element by value? + +Create a reverse hash: + + %by_value = reverse %by_key; + $key = $by_value{$value}; + +That's not particularly efficient. It would be more space-efficient +to use: + + while (($key, $value) = each %by_key) { + $by_value{$value} = $key; + } + +If your hash could have repeated values, the methods above will only +find one of the associated keys. This may or may not worry you. + +=head2 How can I know how many entries are in a hash? + +If you mean how many keys, then all you have to do is +take the scalar sense of the keys() function: + + $num_keys = scalar keys %hash; + +In void context it just resets the iterator, which is faster +for tied hashes. + +=head2 How do I sort a hash (optionally by value instead of key)? + +Internally, hashes are stored in a way that prevents you from imposing +an order on key-value pairs. Instead, you have to sort a list of the +keys or values: + + @keys = sort keys %hash; # sorted by key + @keys = sort { + $hash{$a} cmp $hash{$b} + } keys %hash; # and by value + +Here we'll do a reverse numeric sort by value, and if two keys are +identical, sort by length of key, and if that fails, by straight ASCII +comparison of the keys (well, possibly modified by your locale -- see +L<perllocale>). + + @keys = sort { + $hash{$b} <=> $hash{$a} + || + length($b) <=> length($a) + || + $a cmp $b + } keys %hash; + +=head2 How can I always keep my hash sorted? + +You can look into using the DB_File module and tie() using the +$DB_BTREE hash bindings as documented in L<DB_File/"In Memory Databases">. + +=head2 What's the difference between "delete" and "undef" with hashes? + +Hashes are pairs of scalars: the first is the key, the second is the +value. The key will be coerced to a string, although the value can be +any kind of scalar: string, number, or reference. If a key C<$key> is +present in the array, C<exists($key)> will return true. The value for +a given key can be C<undef>, in which case C<$array{$key}> will be +C<undef> while C<$exists{$key}> will return true. This corresponds to +(C<$key>, C<undef>) being in the hash. + +Pictures help... here's the C<%ary> table: + + keys values + +------+------+ + | a | 3 | + | x | 7 | + | d | 0 | + | e | 2 | + +------+------+ + +And these conditions hold + + $ary{'a'} is true + $ary{'d'} is false + defined $ary{'d'} is true + defined $ary{'a'} is true + exists $ary{'a'} is true (perl5 only) + grep ($_ eq 'a', keys %ary) is true + +If you now say + + undef $ary{'a'} + +your table now reads: + + + keys values + +------+------+ + | a | undef| + | x | 7 | + | d | 0 | + | e | 2 | + +------+------+ + +and these conditions now hold; changes in caps: + + $ary{'a'} is FALSE + $ary{'d'} is false + defined $ary{'d'} is true + defined $ary{'a'} is FALSE + exists $ary{'a'} is true (perl5 only) + grep ($_ eq 'a', keys %ary) is true + +Notice the last two: you have an undef value, but a defined key! + +Now, consider this: + + delete $ary{'a'} + +your table now reads: + + keys values + +------+------+ + | x | 7 | + | d | 0 | + | e | 2 | + +------+------+ + +and these conditions now hold; changes in caps: + + $ary{'a'} is false + $ary{'d'} is false + defined $ary{'d'} is true + defined $ary{'a'} is false + exists $ary{'a'} is FALSE (perl5 only) + grep ($_ eq 'a', keys %ary) is FALSE + +See, the whole entry is gone! + +=head2 Why don't my tied hashes make the defined/exists distinction? + +They may or may not implement the EXISTS() and DEFINED() methods +differently. For example, there isn't the concept of undef with hashes +that are tied to DBM* files. This means the true/false tables above +will give different results when used on such a hash. It also means +that exists and defined do the same thing with a DBM* file, and what +they end up doing is not what they do with ordinary hashes. + +=head2 How do I reset an each() operation part-way through? + +Using C<keys %hash> in a scalar context returns the number of keys in +the hash I<and> resets the iterator associated with the hash. You may +need to do this if you use C<last> to exit a loop early so that when you +re-enter it, the hash iterator has been reset. + +=head2 How can I get the unique keys from two hashes? + +First you extract the keys from the hashes into arrays, and then solve +the uniquifying the array problem described above. For example: + + %seen = (); + for $element (keys(%foo), keys(%bar)) { + $seen{$element}++; + } + @uniq = keys %seen; + +Or more succinctly: + + @uniq = keys %{{%foo,%bar}}; + +Or if you really want to save space: + + %seen = (); + while (defined ($key = each %foo)) { + $seen{$key}++; + } + while (defined ($key = each %bar)) { + $seen{$key}++; + } + @uniq = keys %seen; + +=head2 How can I store a multidimensional array in a DBM file? + +Either stringify the structure yourself (no fun), or else +get the MLDBM (which uses Data::Dumper) module from CPAN and layer +it on top of either DB_File or GDBM_File. + +=head2 How can I make my hash remember the order I put elements into it? + +Use the Tie::IxHash from CPAN. + + use Tie::IxHash; + tie(%myhash, Tie::IxHash); + for ($i=0; $i<20; $i++) { + $myhash{$i} = 2*$i; + } + @keys = keys %myhash; + # @keys = (0,1,2,3,...) + +=head2 Why does passing a subroutine an undefined element in a hash create it? + +If you say something like: + + somefunc($hash{"nonesuch key here"}); + +Then that element "autovivifies"; that is, it springs into existence +whether you store something there or not. That's because functions +get scalars passed in by reference. If somefunc() modifies C<$_[0]>, +it has to be ready to write it back into the caller's version. + +This has been fixed as of perl5.004. + +Normally, merely accessing a key's value for a nonexistent key does +I<not> cause that key to be forever there. This is different than +awk's behavior. + +=head2 How can I make the Perl equivalent of a C structure/C++ class/hash or array of hashes or arrays? + +Use references (documented in L<perlref>). Examples of complex data +structures are given in L<perldsc> and L<perllol>. Examples of +structures and object-oriented classes are in L<perltoot>. + +=head2 How can I use a reference as a hash key? + +You can't do this directly, but you could use the standard Tie::Refhash +module distributed with perl. + +=head1 Data: Misc + +=head2 How do I handle binary data correctly? + +Perl is binary clean, so this shouldn't be a problem. For example, +this works fine (assuming the files are found): + + if (`cat /vmunix` =~ /gzip/) { + print "Your kernel is GNU-zip enabled!\n"; + } + +On some systems, however, you have to play tedious games with "text" +versus "binary" files. See L<perlfunc/"binmode">. + +If you're concerned about 8-bit ASCII data, then see L<perllocale>. + +If you want to deal with multibyte characters, however, there are +some gotchas. See the section on Regular Expressions. + +=head2 How do I determine whether a scalar is a number/whole/integer/float? + +Assuming that you don't care about IEEE notations like "NaN" or +"Infinity", you probably just want to use a regular expression. + + warn "has nondigits" if /\D/; + warn "not a whole number" unless /^\d+$/; + warn "not an integer" unless /^-?\d+$/; # reject +3 + warn "not an integer" unless /^[+-]?\d+$/; + warn "not a decimal number" unless /^-?\d+\.?\d*$/; # rejects .2 + warn "not a decimal number" unless /^-?(?:\d+(?:\.\d*)?|\.\d+)$/; + warn "not a C float" + unless /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/; + +Or you could check out +http://www.perl.com/CPAN/modules/by-module/String/String-Scanf-1.1.tar.gz +instead. The POSIX module (part of the standard Perl distribution) +provides the C<strtol> and C<strtod> for converting strings to double +and longs, respectively. + +=head2 How do I keep persistent data across program calls? + +For some specific applications, you can use one of the DBM modules. +See L<AnyDBM_File>. More generically, you should consult the +FreezeThaw, Storable, or Class::Eroot modules from CPAN. + +=head2 How do I print out or copy a recursive data structure? + +The Data::Dumper module on CPAN is nice for printing out +data structures, and FreezeThaw for copying them. For example: + + use FreezeThaw qw(freeze thaw); + $new = thaw freeze $old; + +Where $old can be (a reference to) any kind of data structure you'd like. +It will be deeply copied. + +=head2 How do I define methods for every class/object? + +Use the UNIVERSAL class (see L<UNIVERSAL>). + +=head2 How do I verify a credit card checksum? + +Get the Business::CreditCard module from CPAN. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + diff --git a/pod/perlfaq5.pod b/pod/perlfaq5.pod new file mode 100644 index 0000000000..03d5e6a797 --- /dev/null +++ b/pod/perlfaq5.pod @@ -0,0 +1,830 @@ +=head1 NAME + +perlfaq5 - Files and Formats ($Revision: 1.22 $, $Date: 1997/04/24 22:44:02 $) + +=head1 DESCRIPTION + +This section deals with I/O and the "f" issues: filehandles, flushing, +formats, and footers. + +=head2 How do I flush/unbuffer a filehandle? Why must I do this? + +The C standard I/O library (stdio) normally buffers characters sent to +devices. This is done for efficiency reasons, so that there isn't a +system call for each byte. Any time you use print() or write() in +Perl, you go though this buffering. syswrite() circumvents stdio and +buffering. + +In most stdio implementations, the type of buffering and the size of +the buffer varies according to the type of device. Disk files are block +buffered, often with a buffer size of more than 2k. Pipes and sockets +are often buffered with a buffer size between 1/2 and 2k. Serial devices +(e.g. modems, terminals) are normally line-buffered, and stdio sends +the entire line when it gets the newline. + +Perl does not support truly unbuffered output (except insofar as you can +C<syswrite(OUT, $char, 1)>). What it does instead support is "command +buffering", in which a physical write is performed after every output +command. This isn't as hard on your system as unbuffering, but does +get the output where you want it when you want it. + +If you expect characters to get to your device when you print them there, +you'll want to autoflush its handle, as in the older: + + use FileHandle; + open(DEV, "<+/dev/tty"); # ceci n'est pas une pipe + DEV->autoflush(1); + +or the newer IO::* modules: + + use IO::Handle; + open(DEV, ">/dev/printer"); # but is this? + DEV->autoflush(1); + +or even this: + + use IO::Socket; # this one is kinda a pipe? + $sock = IO::Socket::INET->new(PeerAddr => 'www.perl.com', + PeerPort => 'http(80)', + Proto => 'tcp'); + die "$!" unless $sock; + + $sock->autoflush(); + $sock->print("GET /\015\012"); + $document = join('', $sock->getlines()); + print "DOC IS: $document\n"; + +Note the hardcoded carriage return and newline in their octal +equivalents. This is the ONLY way (currently) to assure a proper +flush on all platforms, including Macintosh. + +You can use select() and the C<$|> variable to control autoflushing +(see L<perlvar/$|> and L<perlfunc/select>): + + $oldh = select(DEV); + $| = 1; + select($oldh); + +You'll also see code that does this without a temporary variable, as in + + select((select(DEV), $| = 1)[0]); + +=head2 How do I change one line in a file/delete a line in a file/insert a line in the middle of a file/append to the beginning of a file? + +Although humans have an easy time thinking of a text file as being a +sequence of lines that operates much like a stack of playing cards -- +or punch cards -- computers usually see the text file as a sequence of +bytes. In general, there's no direct way for Perl to seek to a +particular line of a file, insert text into a file, or remove text +from a file. + +(There are exceptions in special circumstances. Replacing a sequence +of bytes with another sequence of the same length is one. Another is +using the C<$DB_RECNO> array bindings as documented in L<DB_File>. +Yet another is manipulating files with all lines the same length.) + +The general solution is to create a temporary copy of the text file with +the changes you want, then copy that over the original. + + $old = $file; + $new = "$file.tmp.$$"; + $bak = "$file.bak"; + + open(OLD, "< $old") or die "can't open $old: $!"; + open(NEW, "> $new") or die "can't open $new: $!"; + + # Correct typos, preserving case + while (<OLD>) { + s/\b(p)earl\b/${1}erl/i; + (print NEW $_) or die "can't write to $new: $!"; + } + + close(OLD) or die "can't close $old: $!"; + close(NEW) or die "can't close $new: $!"; + + rename($old, $bak) or die "can't rename $old to $bak: $!"; + rename($new, $old) or die "can't rename $new to $old: $!"; + +Perl can do this sort of thing for you automatically with the C<-i> +command-line switch or the closely-related C<$^I> variable (see +L<perlrun> for more details). Note that +C<-i> may require a suffix on some non-Unix systems; see the +platform-specific documentation that came with your port. + + # Renumber a series of tests from the command line + perl -pi -e 's/(^\s+test\s+)\d+/ $1 . ++$count /e' t/op/taint.t + + # form a script + local($^I, @ARGV) = ('.bak', glob("*.c")); + while (<>) { + if ($. == 1) { + print "This line should appear at the top of each file\n"; + } + s/\b(p)earl\b/${1}erl/i; # Correct typos, preserving case + print; + close ARGV if eof; # Reset $. + } + +If you need to seek to an arbitrary line of a file that changes +infrequently, you could build up an index of byte positions of where +the line ends are in the file. If the file is large, an index of +every tenth or hundredth line end would allow you to seek and read +fairly efficiently. If the file is sorted, try the look.pl library +(part of the standard perl distribution). + +In the unique case of deleting lines at the end of a file, you +can use tell() and truncate(). The following code snippet deletes +the last line of a file without making a copy or reading the +whole file into memory: + + open (FH, "+< $file"); + while ( <FH> ) { $addr = tell(FH) unless eof(FH) } + truncate(FH, $addr); + +Error checking is left as an exercise for the reader. + +=head2 How do I count the number of lines in a file? + +One fairly efficient way is to count newlines in the file. The +following program uses a feature of tr///, as documented in L<perlop>. +If your text file doesn't end with a newline, then it's not really a +proper text file, so this may report one fewer line than you expect. + + $lines = 0; + open(FILE, $filename) or die "Can't open `$filename': $!"; + while (sysread FILE, $buffer, 4096) { + $lines += ($buffer =~ tr/\n//); + } + close FILE; + +=head2 How do I make a temporary file name? + +Use the process ID and/or the current time-value. If you need to have +many temporary files in one process, use a counter: + + BEGIN { + use IO::File; + use Fcntl; + my $temp_dir = -d '/tmp' ? '/tmp' : $ENV{TMP} || $ENV{TEMP}; + my $base_name = sprintf("%s/%d-%d-0000", $temp_dir, $$, time()); + sub temp_file { + my $fh = undef; + my $count = 0; + until (defined($fh) || $count > 100) { + $base_name =~ s/-(\d+)$/"-" . (1 + $1)/e; + $fh = IO::File->new($base_name, O_WRONLY|O_EXCL|O_CREAT, 0644) + } + if (defined($fh)) { + return ($fh, $base_name); + } else { + return (); + } + } + } + +Or you could simply use IO::Handle::new_tmpfile. + +=head2 How can I manipulate fixed-record-length files? + +The most efficient way is using pack() and unpack(). This is faster +than using substr(). Here is a sample chunk of code to break up and +put back together again some fixed-format input lines, in this case +from the output of a normal, Berkeley-style ps: + + # sample input line: + # 15158 p5 T 0:00 perl /home/tchrist/scripts/now-what + $PS_T = 'A6 A4 A7 A5 A*'; + open(PS, "ps|"); + $_ = <PS>; print; + while (<PS>) { + ($pid, $tt, $stat, $time, $command) = unpack($PS_T, $_); + for $var (qw!pid tt stat time command!) { + print "$var: <$$var>\n"; + } + print 'line=', pack($PS_T, $pid, $tt, $stat, $time, $command), + "\n"; + } + +=head2 How can I make a filehandle local to a subroutine? How do I pass filehandles between subroutines? How do I make an array of filehandles? + +You may have some success with typeglobs, as we always had to use +in days of old: + + local(*FH); + +But while still supported, that isn't the best to go about getting +local filehandles. Typeglobs have their drawbacks. You may well want +to use the C<FileHandle> module, which creates new filehandles for you +(see L<FileHandle>): + + use FileHandle; + sub findme { + my $fh = FileHandle->new(); + open($fh, "</etc/hosts") or die "no /etc/hosts: $!"; + while (<$fh>) { + print if /\b127\.(0\.0\.)?1\b/; + } + # $fh automatically closes/disappears here + } + +Internally, Perl believes filehandles to be of class IO::Handle. You +may use that module directly if you'd like (see L<IO::Handle>), or +one of its more specific derived classes. + +Once you have IO::File or FileHandle objects, you can pass them +between subroutines or store them in hashes as you would any other +scalar values: + + use FileHandle; + + # Storing filehandles in a hash and array + foreach $filename (@names) { + my $fh = new FileHandle($filename) or die; + $file{$filename} = $fh; + push(@files, $fh); + } + + # Using the filehandles in the array + foreach $file (@files) { + print $file "Testing\n"; + } + + # You have to do the { } ugliness when you're specifying the + # filehandle by anything other than a simple scalar variable. + print { $files[2] } "Testing\n"; + + # Passing filehandles to subroutines + sub debug { + my $filehandle = shift; + printf $filehandle "DEBUG: ", @_; + } + + debug($fh, "Testing\n"); + +=head2 How can I set up a footer format to be used with write()? + +There's no builtin way to do this, but L<perlform> has a couple of +techniques to make it possible for the intrepid hacker. + +=head2 How can I write() into a string? + +See L<perlform> for an swrite() function. + +=head2 How can I output my numbers with commas added? + +This one will do it for you: + + sub commify { + local $_ = shift; + 1 while s/^(-?\d+)(\d{3})/$1,$2/; + return $_; + } + + $n = 23659019423.2331; + print "GOT: ", commify($n), "\n"; + + GOT: 23,659,019,423.2331 + +You can't just: + + s/^(-?\d+)(\d{3})/$1,$2/g; + +because you have to put the comma in and then recalculate your +position. + +Alternatively, this commifies all numbers in a line regardless of +whether they have decimal portions, are preceded by + or -, or +whatever: + + # from Andrew Johnson <ajohnson@gpu.srv.ualberta.ca> + sub commify { + my $input = shift; + $input = reverse $input; + $input =~ s<(\d\d\d)(?=\d)(?!\d*\.)><$1,>g; + return reverse $input; + } + +=head2 How can I translate tildes (~) in a filename? + +Use the E<lt>E<gt> (glob()) operator, documented in L<perlfunc>. This +requires that you have a shell installed that groks tildes, meaning +csh or tcsh or (some versions of) ksh, and thus may have portability +problems. The Glob::KGlob module (available from CPAN) gives more +portable glob functionality. + +Within Perl, you may use this directly: + + $filename =~ s{ + ^ ~ # find a leading tilde + ( # save this in $1 + [^/] # a non-slash character + * # repeated 0 or more times (0 means me) + ) + }{ + $1 + ? (getpwnam($1))[7] + : ( $ENV{HOME} || $ENV{LOGDIR} ) + }ex; + +=head2 How come when I open the file read-write it wipes it out? + +Because you're using something like this, which truncates the file and +I<then> gives you read-write access: + + open(FH, "+> /path/name"); # WRONG + +Whoops. You should instead use this, which will fail if the file +doesn't exist. + + open(FH, "+< /path/name"); # open for update + +If this is an issue, try: + + sysopen(FH, "/path/name", O_RDWR|O_CREAT, 0644); + +Error checking is left as an exercise for the reader. + +=head2 Why do I sometimes get an "Argument list too long" when I use <*>? + +The C<E<lt>E<gt>> operator performs a globbing operation (see above). +By default glob() forks csh(1) to do the actual glob expansion, but +csh can't handle more than 127 items and so gives the error message +C<Argument list too long>. People who installed tcsh as csh won't +have this problem, but their users may be surprised by it. + +To get around this, either do the glob yourself with C<Dirhandle>s and +patterns, or use a module like Glob::KGlob, one that doesn't use the +shell to do globbing. + +=head2 Is there a leak/bug in glob()? + +Due to the current implementation on some operating systems, when you +use the glob() function or its angle-bracket alias in a scalar +context, you may cause a leak and/or unpredictable behavior. It's +best therefore to use glob() only in list context. + +=head2 How can I open a file with a leading "E<gt>" or trailing blanks? + +Normally perl ignores trailing blanks in filenames, and interprets +certain leading characters (or a trailing "|") to mean something +special. To avoid this, you might want to use a routine like this. +It makes incomplete pathnames into explicit relative ones, and tacks a +trailing null byte on the name to make perl leave it alone: + + sub safe_filename { + local $_ = shift; + return m#^/# + ? "$_\0" + : "./$_\0"; + } + + $fn = safe_filename("<<<something really wicked "); + open(FH, "> $fn") or "couldn't open $fn: $!"; + +You could also use the sysopen() function (see L<perlfunc/sysopen>). + +=head2 How can I reliably rename a file? + +Well, usually you just use Perl's rename() function. But that may +not work everywhere, in particular, renaming files across file systems. +If your operating system supports a mv(1) program or its moral equivalent, +this works: + + rename($old, $new) or system("mv", $old, $new); + +It may be more compelling to use the File::Copy module instead. You +just copy to the new file to the new name (checking return values), +then delete the old one. This isn't really the same semantics as a +real rename(), though, which preserves metainformation like +permissions, timestamps, inode info, etc. + +=head2 How can I lock a file? + +Perl's builtin flock() function (see L<perlfunc> for details) will call +flock(2) if that exists, fcntl(2) if it doesn't (on perl version 5.004 and +later), and lockf(3) if neither of the two previous system calls exists. +On some systems, it may even use a different form of native locking. +Here are some gotchas with Perl's flock(): + +=over 4 + +=item 1 + +Produces a fatal error if none of the three system calls (or their +close equivalent) exists. + +=item 2 + +lockf(3) does not provide shared locking, and requires that the +filehandle be open for writing (or appending, or read/writing). + +=item 3 + +Some versions of flock() can't lock files over a network (e.g. on NFS +file systems), so you'd need to force the use of fcntl(2) when you +build Perl. See the flock entry of L<perlfunc>, and the F<INSTALL> +file in the source distribution for information on building Perl to do +this. + +=back + +The CPAN module File::Lock offers similar functionality and (if you +have dynamic loading) won't require you to rebuild perl if your +flock() can't lock network files. + +=head2 What can't I just open(FH, ">file.lock")? + +A common bit of code B<NOT TO USE> is this: + + sleep(3) while -e "file.lock"; # PLEASE DO NOT USE + open(LCK, "> file.lock"); # THIS BROKEN CODE + +This is a classic race condition: you take two steps to do something +which must be done in one. That's why computer hardware provides an +atomic test-and-set instruction. In theory, this "ought" to work: + + sysopen(FH, "file.lock", O_WRONLY|O_EXCL|O_CREAT, 0644) + or die "can't open file.lock: $!": + +except that lamentably, file creation (and deletion) is not atomic +over NFS, so this won't work (at least, not every time) over the net. +Various schemes involving involving link() have been suggested, but +these tend to involve busy-wait, which is also subdesirable. + +=head2 I still don't get locking. I just want to increment the number in the file. How can I do this? + +Didn't anyone ever tell you web-page hit counters were useless? + +Anyway, this is what to do: + + use Fcntl; + sysopen(FH, "numfile", O_RDWR|O_CREAT, 0644) or die "can't open numfile: $!"; + flock(FH, 2) or die "can't flock numfile: $!"; + $num = <FH> || 0; + seek(FH, 0, 0) or die "can't rewind numfile: $!"; + truncate(FH, 0) or die "can't truncate numfile: $!"; + (print FH $num+1, "\n") or die "can't write numfile: $!"; + # DO NOT UNLOCK THIS UNTIL YOU CLOSE + close FH or die "can't close numfile: $!"; + +Here's a much better web-page hit counter: + + $hits = int( (time() - 850_000_000) / rand(1_000) ); + +If the count doesn't impress your friends, then the code might. :-) + +=head2 How do I randomly update a binary file? + +If you're just trying to patch a binary, in many cases something as +simple as this works: + + perl -i -pe 's{window manager}{window mangler}g' /usr/bin/emacs + +However, if you have fixed sized records, then you might do something more +like this: + + $RECSIZE = 220; # size of record, in bytes + $recno = 37; # which record to update + open(FH, "+<somewhere") || die "can't update somewhere: $!"; + seek(FH, $recno * $RECSIZE, 0); + read(FH, $record, $RECSIZE) == $RECSIZE || die "can't read record $recno: $!"; + # munge the record + seek(FH, $recno * $RECSIZE, 0); + print FH $record; + close FH; + +Locking and error checking are left as an exercise for the reader. +Don't forget them, or you'll be quite sorry. + +Don't forget to set binmode() under DOS-like platforms when operating +on files that have anything other than straight text in them. See the +docs on open() and on binmode() for more details. + +=head2 How do I get a file's timestamp in perl? + +If you want to retrieve the time at which the file was last read, +written, or had its meta-data (owner, etc) changed, you use the B<-M>, +B<-A>, or B<-C> filetest operations as documented in L<perlfunc>. These +retrieve the age of the file (measured against the start-time of your +program) in days as a floating point number. To retrieve the "raw" +time in seconds since the epoch, you would call the stat function, +then use localtime(), gmtime(), or POSIX::strftime() to convert this +into human-readable form. + +Here's an example: + + $write_secs = (stat($file))[9]; + print "file $file updated at ", scalar(localtime($file)), "\n"; + +If you prefer something more legible, use the File::stat module +(part of the standard distribution in version 5.004 and later): + + use File::stat; + use Time::localtime; + $date_string = ctime(stat($file)->mtime); + print "file $file updated at $date_string\n"; + +Error checking is left as an exercise for the reader. + +=head2 How do I set a file's timestamp in perl? + +You use the utime() function documented in L<perlfunc/utime>. +By way of example, here's a little program that copies the +read and write times from its first argument to all the rest +of them. + + if (@ARGV < 2) { + die "usage: cptimes timestamp_file other_files ...\n"; + } + $timestamp = shift; + ($atime, $mtime) = (stat($timestamp))[8,9]; + utime $atime, $mtime, @ARGV; + +Error checking is left as an exercise for the reader. + +Note that utime() currently doesn't work correctly with Win95/NT +ports. A bug has been reported. Check it carefully before using +it on those platforms. + +=head2 How do I print to more than one file at once? + +If you only have to do this once, you can do this: + + for $fh (FH1, FH2, FH3) { print $fh "whatever\n" } + +To connect up to one filehandle to several output filehandles, it's +easiest to use the tee(1) program if you have it, and let it take care +of the multiplexing: + + open (FH, "| tee file1 file2 file3"); + +Otherwise you'll have to write your own multiplexing print function -- +or your own tee program -- or use Tom Christiansen's, at +http://www.perl.com/CPAN/authors/id/TOMC/scripts/tct.gz, which is +written in Perl. + +In theory a IO::Tee class could be written, but to date we haven't +seen such. + +=head2 How can I read in a file by paragraphs? + +Use the C<$\> variable (see L<perlvar> for details). You can either +set it to C<""> to eliminate empty paragraphs (C<"abc\n\n\n\ndef">, +for instance, gets treated as two paragraphs and not three), or +C<"\n\n"> to accept empty paragraphs. + +=head2 How can I read a single character from a file? From the keyboard? + +You can use the builtin C<getc()> function for most filehandles, but +it won't (easily) work on a terminal device. For STDIN, either use +the Term::ReadKey module from CPAN, or use the sample code in +L<perlfunc/getc>. + +If your system supports POSIX, you can use the following code, which +you'll note turns off echo processing as well. + + #!/usr/bin/perl -w + use strict; + $| = 1; + for (1..4) { + my $got; + print "gimme: "; + $got = getone(); + print "--> $got\n"; + } + exit; + + BEGIN { + use POSIX qw(:termios_h); + + my ($term, $oterm, $echo, $noecho, $fd_stdin); + + $fd_stdin = fileno(STDIN); + + $term = POSIX::Termios->new(); + $term->getattr($fd_stdin); + $oterm = $term->getlflag(); + + $echo = ECHO | ECHOK | ICANON; + $noecho = $oterm & ~$echo; + + sub cbreak { + $term->setlflag($noecho); + $term->setcc(VTIME, 1); + $term->setattr($fd_stdin, TCSANOW); + } + + sub cooked { + $term->setlflag($oterm); + $term->setcc(VTIME, 0); + $term->setattr($fd_stdin, TCSANOW); + } + + sub getone { + my $key = ''; + cbreak(); + sysread(STDIN, $key, 1); + cooked(); + return $key; + } + + } + + END { cooked() } + +The Term::ReadKey module from CPAN may be easier to use: + + use Term::ReadKey; + open(TTY, "</dev/tty"); + print "Gimme a char: "; + ReadMode "raw"; + $key = ReadKey 0, *TTY; + ReadMode "normal"; + printf "\nYou said %s, char number %03d\n", + $key, ord $key; + +For DOS systems, Dan Carson <dbc@tc.fluke.COM> reports the following: + +To put the PC in "raw" mode, use ioctl with some magic numbers gleaned +from msdos.c (Perl source file) and Ralf Brown's interrupt list (comes +across the net every so often): + + $old_ioctl = ioctl(STDIN,0,0); # Gets device info + $old_ioctl &= 0xff; + ioctl(STDIN,1,$old_ioctl | 32); # Writes it back, setting bit 5 + +Then to read a single character: + + sysread(STDIN,$c,1); # Read a single character + +And to put the PC back to "cooked" mode: + + ioctl(STDIN,1,$old_ioctl); # Sets it back to cooked mode. + +So now you have $c. If C<ord($c) == 0>, you have a two byte code, which +means you hit a special key. Read another byte with C<sysread(STDIN,$c,1)>, +and that value tells you what combination it was according to this +table: + + # PC 2-byte keycodes = ^@ + the following: + + # HEX KEYS + # --- ---- + # 0F SHF TAB + # 10-19 ALT QWERTYUIOP + # 1E-26 ALT ASDFGHJKL + # 2C-32 ALT ZXCVBNM + # 3B-44 F1-F10 + # 47-49 HOME,UP,PgUp + # 4B LEFT + # 4D RIGHT + # 4F-53 END,DOWN,PgDn,Ins,Del + # 54-5D SHF F1-F10 + # 5E-67 CTR F1-F10 + # 68-71 ALT F1-F10 + # 73-77 CTR LEFT,RIGHT,END,PgDn,HOME + # 78-83 ALT 1234567890-= + # 84 CTR PgUp + +This is all trial and error I did a long time ago, I hope I'm reading the +file that worked. + +=head2 How can I tell if there's a character waiting on a filehandle? + +You should check out the Frequently Asked Questions list in +comp.unix.* for things like this: the answer is essentially the same. +It's very system dependent. Here's one solution that works on BSD +systems: + + sub key_ready { + my($rin, $nfd); + vec($rin, fileno(STDIN), 1) = 1; + return $nfd = select($rin,undef,undef,0); + } + +You should look into getting the Term::ReadKey extension from CPAN. + +=head2 How do I open a file without blocking? + +You need to use the O_NDELAY or O_NONBLOCK flag from the Fcntl module +in conjunction with sysopen(): + + use Fcntl; + sysopen(FH, "/tmp/somefile", O_WRONLY|O_NDELAY|O_CREAT, 0644) + or die "can't open /tmp/somefile: $!": + +=head2 How do I create a file only if it doesn't exist? + +You need to use the O_CREAT and O_EXCL flags from the Fcntl module in +conjunction with sysopen(): + + use Fcntl; + sysopen(FH, "/tmp/somefile", O_WRONLY|O_EXCL|O_CREAT, 0644) + or die "can't open /tmp/somefile: $!": + +Be warned that neither creation nor deletion of files is guaranteed to +be an atomic operation over NFS. That is, two processes might both +successful create or unlink the same file! + +=head2 How do I do a C<tail -f> in perl? + +First try + + seek(GWFILE, 0, 1); + +The statement C<seek(GWFILE, 0, 1)> doesn't change the current position, +but it does clear the end-of-file condition on the handle, so that the +next <GWFILE> makes Perl try again to read something. + +If that doesn't work (it relies on features of your stdio implementation), +then you need something more like this: + + for (;;) { + for ($curpos = tell(GWFILE); <GWFILE>; $curpos = tell(GWFILE)) { + # search for some stuff and put it into files + } + # sleep for a while + seek(GWFILE, $curpos, 0); # seek to where we had been + } + +If this still doesn't work, look into the POSIX module. POSIX defines +the clearerr() method, which can remove the end of file condition on a +filehandle. The method: read until end of file, clearerr(), read some +more. Lather, rinse, repeat. + +=head2 How do I dup() a filehandle in Perl? + +If you check L<perlfunc/open>, you'll see that several of the ways +to call open() should do the trick. For example: + + open(LOG, ">>/tmp/logfile"); + open(STDERR, ">&LOG"); + +Or even with a literal numeric descriptor: + + $fd = $ENV{MHCONTEXTFD}; + open(MHCONTEXT, "<&=$fd"); # like fdopen(3S) + +Error checking has been left as an exercise for the reader. + +=head2 How do I close a file descriptor by number? + +This should rarely be necessary, as the Perl close() function is to be +used for things that Perl opened itself, even if it was a dup of a +numeric descriptor, as with MHCONTEXT above. But if you really have +to, you may be able to do this: + + require 'sys/syscall.ph'; + $rc = syscall(&SYS_close, $fd + 0); # must force numeric + die "can't sysclose $fd: $!" unless $rc == -1; + +=head2 Why can't I use "C:\temp\foo" in DOS paths? What doesn't `C:\temp\foo.exe` work? + +Whoops! You just put a tab and a formfeed into that filename! +Remember that within double quoted strings ("like\this"), the +backslash is an escape character. The full list of these is in +L<perlop/Quote and Quote-like Operators>. Unsurprisingly, you don't +have a file called "c:(tab)emp(formfeed)oo" or +"c:(tab)emp(formfeed)oo.exe" on your DOS filesystem. + +Either single-quote your strings, or (preferably) use forward slashes. +Since all DOS and Windows versions since something like MS-DOS 2.0 or so +have treated C</> and C<\> the same in a path, you might as well use the +one that doesn't clash with Perl -- or the POSIX shell, ANSI C and C++, +awk, Tcl, Java, or Python, just to mention a few. + +=head2 Why doesn't glob("*.*") get all the files? + +Because even on non-Unix ports, Perl's glob function follows standard +Unix globbing semantics. You'll need C<glob("*")> to get all (non-hidden) +files. + +=head2 Why does Perl let me delete read-only files? Why does C<-i> clobber protected files? Isn't this a bug in Perl? + +This is elaborately and painstakingly described in the "Far More Than +You Every Wanted To Know" in +http://www.perl.com/CPAN/doc/FMTEYEWTK/file-dir-perms . + +The executive summary: learn how your filesystem works. The +permissions on a file say what can happen to the data in that file. +The permissions on a directory say what can happen to the list of +files in that directory. If you delete a file, you're removing its +name from the directory (so the operation depends on the permissions +of the directory, not of the file). If you try to write to the file, +the permissions of the file govern whether you're allowed to. + +=head2 How do I select a random line from a file? + +Here's an algorithm from the Camel Book: + + srand; + rand($.) < 1 && ($line = $_) while <>; + +This has a significant advantage in space over reading the whole +file in. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + diff --git a/pod/perlfaq6.pod b/pod/perlfaq6.pod new file mode 100644 index 0000000000..535e464455 --- /dev/null +++ b/pod/perlfaq6.pod @@ -0,0 +1,605 @@ +=head1 NAME + +perlfaq6 - Regexps ($Revision: 1.17 $, $Date: 1997/04/24 22:44:10 $) + +=head1 DESCRIPTION + +This section is surprisingly small because the rest of the FAQ is +littered with answers involving regular expressions. For example, +decoding a URL and checking whether something is a number are handled +with regular expressions, but those answers are found elsewhere in +this document (in the section on Data and the Networking one on +networking, to be precise). + +=head2 How can I hope to use regular expressions without creating illegible and unmaintainable code? + +Three techniques can make regular expressions maintainable and +understandable. + +=over 4 + +=item Comments Outside the Regexp + +Describe what you're doing and how you're doing it, using normal Perl +comments. + + # turn the line into the first word, a colon, and the + # number of characters on the rest of the line + s/^(\w+)(.*)/ lc($1) . ":" . length($2) /ge; + +=item Comments Inside the Regexp + +The C</x> modifier causes whitespace to be ignored in a regexp pattern +(except in a character class), and also allows you to use normal +comments there, too. As you can imagine, whitespace and comments help +a lot. + +C</x> lets you turn this: + + s{<(?:[^>'"]*|".*?"|'.*?')+>}{}gs; + +into this: + + s{ < # opening angle bracket + (?: # Non-backreffing grouping paren + [^>'"] * # 0 or more things that are neither > nor ' nor " + | # or else + ".*?" # a section between double quotes (stingy match) + | # or else + '.*?' # a section between single quotes (stingy match) + ) + # all occurring one or more times + > # closing angle bracket + }{}gsx; # replace with nothing, i.e. delete + +It's still not quite so clear as prose, but it is very useful for +describing the meaning of each part of the pattern. + +=item Different Delimiters + +While we normally think of patterns as being delimited with C</> +characters, they can be delimited by almost any character. L<perlre> +describes this. For example, the C<s///> above uses braces as +delimiters. Selecting another delimiter can avoid quoting the +delimiter within the pattern: + + s/\/usr\/local/\/usr\/share/g; # bad delimiter choice + s#/usr/local#/usr/share#g; # better + +=back + +=head2 I'm having trouble matching over more than one line. What's wrong? + +Either you don't have newlines in your string, or you aren't using the +correct modifier(s) on your pattern. + +There are many ways to get multiline data into a string. If you want +it to happen automatically while reading input, you'll want to set $/ +(probably to '' for paragraphs or C<undef> for the whole file) to +allow you to read more than one line at a time. + +Read L<perlre> to help you decide which of C</s> and C</m> (or both) +you might want to use: C</s> allows dot to include newline, and C</m> +allows caret and dollar to match next to a newline, not just at the +end of the string. You do need to make sure that you've actually +got a multiline string in there. + +For example, this program detects duplicate words, even when they span +line breaks (but not paragraph ones). For this example, we don't need +C</s> because we aren't using dot in a regular expression that we want +to cross line boundaries. Neither do we need C</m> because we aren't +wanting caret or dollar to match at any point inside the record next +to newlines. But it's imperative that $/ be set to something other +than the default, or else we won't actually ever have a multiline +record read in. + + $/ = ''; # read in more whole paragraph, not just one line + while ( <> ) { + while ( /\b(\w\S+)(\s+\1)+\b/gi ) { + print "Duplicate $1 at paragraph $.\n"; + } + } + +Here's code that finds sentences that begin with "From " (which would +be mangled by many mailers): + + $/ = ''; # read in more whole paragraph, not just one line + while ( <> ) { + while ( /^From /gm ) { # /m makes ^ match next to \n + print "leading from in paragraph $.\n"; + } + } + +Here's code that finds everything between START and END in a paragraph: + + undef $/; # read in whole file, not just one line or paragraph + while ( <> ) { + while ( /START(.*?)END/sm ) { # /s makes . cross line boundaries + print "$1\n"; + } + } + +=head2 How can I pull out lines between two patterns that are themselves on different lines? + +You can use Perl's somewhat exotic C<..> operator (documented in +L<perlop>): + + perl -ne 'print if /START/ .. /END/' file1 file2 ... + +If you wanted text and not lines, you would use + + perl -0777 -pe 'print "$1\n" while /START(.*?)END/gs' file1 file2 ... + +But if you want nested occurrences of C<START> through C<END>, you'll +run up against the problem described in the question in this section +on matching balanced text. + +=head2 I put a regular expression into $/ but it didn't work. What's wrong? + +$/ must be a string, not a regular expression. Awk has to be better +for something. :-) + +Actually, you could do this if you don't mind reading the whole file +into memory: + + undef $/; + @records = split /your_pattern/, <FH>; + +The Net::Telnet module (available from CPAN) has the capability to +wait for a pattern in the input stream, or timeout if it doesn't +appear within a certain time. + + ## Create a file with three lines. + open FH, ">file"; + print FH "The first line\nThe second line\nThe third line\n"; + close FH; + + ## Get a read/write filehandle to it. + $fh = new FileHandle "+<file"; + + ## Attach it to a "stream" object. + use Net::Telnet; + $file = new Net::Telnet (-fhopen => $fh); + + ## Search for the second line and print out the third. + $file->waitfor('/second line\n/'); + print $file->getline; + +=head2 How do I substitute case insensitively on the LHS, but preserving case on the RHS? + +It depends on what you mean by "preserving case". The following +script makes the substitution have the same case, letter by letter, as +the original. If the substitution has more characters than the string +being substituted, the case of the last character is used for the rest +of the substitution. + + # Original by Nathan Torkington, massaged by Jeffrey Friedl + # + sub preserve_case($$) + { + my ($old, $new) = @_; + my ($state) = 0; # 0 = no change; 1 = lc; 2 = uc + my ($i, $oldlen, $newlen, $c) = (0, length($old), length($new)); + my ($len) = $oldlen < $newlen ? $oldlen : $newlen; + + for ($i = 0; $i < $len; $i++) { + if ($c = substr($old, $i, 1), $c =~ /[\W\d_]/) { + $state = 0; + } elsif (lc $c eq $c) { + substr($new, $i, 1) = lc(substr($new, $i, 1)); + $state = 1; + } else { + substr($new, $i, 1) = uc(substr($new, $i, 1)); + $state = 2; + } + } + # finish up with any remaining new (for when new is longer than old) + if ($newlen > $oldlen) { + if ($state == 1) { + substr($new, $oldlen) = lc(substr($new, $oldlen)); + } elsif ($state == 2) { + substr($new, $oldlen) = uc(substr($new, $oldlen)); + } + } + return $new; + } + + $a = "this is a TEsT case"; + $a =~ s/(test)/preserve_case($1, "success")/gie; + print "$a\n"; + +This prints: + + this is a SUcCESS case + +=head2 How can I make C<\w> match accented characters? + +See L<perllocale>. + +=head2 How can I match a locale-smart version of C</[a-zA-Z]/>? + +One alphabetic character would be C</[^\W\d_]/>, no matter what locale +you're in. Non-alphabetics would be C</[\W\d_]/> (assuming you don't +consider an underscore a letter). + +=head2 How can I quote a variable to use in a regexp? + +The Perl parser will expand $variable and @variable references in +regular expressions unless the delimiter is a single quote. Remember, +too, that the right-hand side of a C<s///> substitution is considered +a double-quoted string (see L<perlop> for more details). Remember +also that any regexp special characters will be acted on unless you +precede the substitution with \Q. Here's an example: + + $string = "to die?"; + $lhs = "die?"; + $rhs = "sleep no more"; + + $string =~ s/\Q$lhs/$rhs/; + # $string is now "to sleep no more" + +Without the \Q, the regexp would also spuriously match "di". + +=head2 What is C</o> really for? + +Using a variable in a regular expression match forces a re-evaluation +(and perhaps recompilation) each time through. The C</o> modifier +locks in the regexp the first time it's used. This always happens in a +constant regular expression, and in fact, the pattern was compiled +into the internal format at the same time your entire program was. + +Use of C</o> is irrelevant unless variable interpolation is used in +the pattern, and if so, the regexp engine will neither know nor care +whether the variables change after the pattern is evaluated the I<very +first> time. + +C</o> is often used to gain an extra measure of efficiency by not +performing subsequent evaluations when you know it won't matter +(because you know the variables won't change), or more rarely, when +you don't want the regexp to notice if they do. + +For example, here's a "paragrep" program: + + $/ = ''; # paragraph mode + $pat = shift; + while (<>) { + print if /$pat/o; + } + +=head2 How do I use a regular expression to strip C style comments from a file? + +While this actually can be done, it's much harder than you'd think. +For example, this one-liner + + perl -0777 -pe 's{/\*.*?\*/}{}gs' foo.c + +will work in many but not all cases. You see, it's too simple-minded for +certain kinds of C programs, in particular, those with what appear to be +comments in quoted strings. For that, you'd need something like this, +created by Jeffrey Friedl: + + $/ = undef; + $_ = <>; + s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|\n+|.[^/"'\\]*)#$2#g; + print; + +This could, of course, be more legibly written with the C</x> modifier, adding +whitespace and comments. + +=head2 Can I use Perl regular expressions to match balanced text? + +Although Perl regular expressions are more powerful than "mathematical" +regular expressions, because they feature conveniences like backreferences +(C<\1> and its ilk), they still aren't powerful enough. You still need +to use non-regexp techniques to parse balanced text, such as the text +enclosed between matching parentheses or braces, for example. + +An elaborate subroutine (for 7-bit ASCII only) to pull out balanced +and possibly nested single chars, like C<`> and C<'>, C<{> and C<}>, +or C<(> and C<)> can be found in +http://www.perl.com/CPAN/authors/id/TOMC/scripts/pull_quotes.gz . + +The C::Scan module from CPAN contains such subs for internal usage, +but they are undocumented. + +=head2 What does it mean that regexps are greedy? How can I get around it? + +Most people mean that greedy regexps match as much as they can. +Technically speaking, it's actually the quantifiers (C<?>, C<*>, C<+>, +C<{}>) that are greedy rather than the whole pattern; Perl prefers local +greed and immediate gratification to overall greed. To get non-greedy +versions of the same quantifiers, use (C<??>, C<*?>, C<+?>, C<{}?>). + +An example: + + $s1 = $s2 = "I am very very cold"; + $s1 =~ s/ve.*y //; # I am cold + $s2 =~ s/ve.*?y //; # I am very cold + +Notice how the second substitution stopped matching as soon as it +encountered "y ". The C<*?> quantifier effectively tells the regular +expression engine to find a match as quickly as possible and pass +control on to whatever is next in line, like you would if you were +playing hot potato. + +=head2 How do I process each word on each line? + +Use the split function: + + while (<>) { + foreach $word ( split ) { + # do something with $word here + } + } + +Note that this isn't really a word in the English sense; it's just +chunks of consecutive non-whitespace characters. + +To work with only alphanumeric sequences, you might consider + + while (<>) { + foreach $word (m/(\w+)/g) { + # do something with $word here + } + } + +=head2 How can I print out a word-frequency or line-frequency summary? + +To do this, you have to parse out each word in the input stream. We'll +pretend that by word you mean chunk of alphabetics, hyphens, or +apostrophes, rather than the non-whitespace chunk idea of a word given +in the previous question: + + while (<>) { + while ( /(\b[^\W_\d][\w'-]+\b)/g ) { # misses "`sheep'" + $seen{$1}++; + } + } + while ( ($word, $count) = each %seen ) { + print "$count $word\n"; + } + +If you wanted to do the same thing for lines, you wouldn't need a +regular expression: + + while (<>) { + $seen{$_}++; + } + while ( ($line, $count) = each %seen ) { + print "$count $line"; + } + +If you want these output in a sorted order, see the section on Hashes. + +=head2 How can I do approximate matching? + +See the module String::Approx available from CPAN. + +=head2 How do I efficiently match many regular expressions at once? + +The following is super-inefficient: + + while (<FH>) { + foreach $pat (@patterns) { + if ( /$pat/ ) { + # do something + } + } + } + +Instead, you either need to use one of the experimental Regexp extension +modules from CPAN (which might well be overkill for your purposes), +or else put together something like this, inspired from a routine +in Jeffrey Friedl's book: + + sub _bm_build { + my $condition = shift; + my @regexp = @_; # this MUST not be local(); need my() + my $expr = join $condition => map { "m/\$regexp[$_]/o" } (0..$#regexp); + my $match_func = eval "sub { $expr }"; + die if $@; # propagate $@; this shouldn't happen! + return $match_func; + } + + sub bm_and { _bm_build('&&', @_) } + sub bm_or { _bm_build('||', @_) } + + $f1 = bm_and qw{ + xterm + (?i)window + }; + + $f2 = bm_or qw{ + \b[Ff]ree\b + \bBSD\B + (?i)sys(tem)?\s*[V5]\b + }; + + # feed me /etc/termcap, prolly + while ( <> ) { + print "1: $_" if &$f1; + print "2: $_" if &$f2; + } + +=head2 Why don't word-boundary searches with C<\b> work for me? + +Two common misconceptions are that C<\b> is a synonym for C<\s+>, and +that it's the edge between whitespace characters and non-whitespace +characters. Neither is correct. C<\b> is the place between a C<\w> +character and a C<\W> character (that is, C<\b> is the edge of a +"word"). It's a zero-width assertion, just like C<^>, C<$>, and all +the other anchors, so it doesn't consume any characters. L<perlre> +describes the behaviour of all the regexp metacharacters. + +Here are examples of the incorrect application of C<\b>, with fixes: + + "two words" =~ /(\w+)\b(\w+)/; # WRONG + "two words" =~ /(\w+)\s+(\w+)/; # right + + " =matchless= text" =~ /\b=(\w+)=\b/; # WRONG + " =matchless= text" =~ /=(\w+)=/; # right + +Although they may not do what you thought they did, C<\b> and C<\B> +can still be quite useful. For an example of the correct use of +C<\b>, see the example of matching duplicate words over multiple +lines. + +An example of using C<\B> is the pattern C<\Bis\B>. This will find +occurrences of "is" on the insides of words only, as in "thistle", but +not "this" or "island". + +=head2 Why does using $&, $`, or $' slow my program down? + +Because once Perl sees that you need one of these variables anywhere +in the program, it has to provide them on each and every pattern +match. The same mechanism that handles these provides for the use of +$1, $2, etc., so you pay the same price for each regexp that contains +capturing parentheses. But if you never use $&, etc., in your script, +then regexps I<without> capturing parentheses won't be penalized. So +avoid $&, $', and $` if you can, but if you can't (and some algorithms +really appreciate them), once you've used them once, use them at will, +because you've already paid the price. + +=head2 What good is C<\G> in a regular expression? + +The notation C<\G> is used in a match or substitution in conjunction the +C</g> modifier (and ignored if there's no C</g>) to anchor the regular +expression to the point just past where the last match occurred, i.e. the +pos() point. + +For example, suppose you had a line of text quoted in standard mail +and Usenet notation, (that is, with leading C<E<gt>> characters), and +you want change each leading C<E<gt>> into a corresponding C<:>. You +could do so in this way: + + s/^(>+)/':' x length($1)/gem; + +Or, using C<\G>, the much simpler (and faster): + + s/\G>/:/g; + +A more sophisticated use might involve a tokenizer. The following +lex-like example is courtesy of Jeffrey Friedl. It did not work in +5.003 due to bugs in that release, but does work in 5.004 or better. +(Note the use of C</c>, which prevents a failed match with C</g> from +resetting the search position back to the beginning of the string.) + + while (<>) { + chomp; + PARSER: { + m/ \G( \d+\b )/gcx && do { print "number: $1\n"; redo; }; + m/ \G( \w+ )/gcx && do { print "word: $1\n"; redo; }; + m/ \G( \s+ )/gcx && do { print "space: $1\n"; redo; }; + m/ \G( [^\w\d]+ )/gcx && do { print "other: $1\n"; redo; }; + } + } + +Of course, that could have been written as + + while (<>) { + chomp; + PARSER: { + if ( /\G( \d+\b )/gcx { + print "number: $1\n"; + redo PARSER; + } + if ( /\G( \w+ )/gcx { + print "word: $1\n"; + redo PARSER; + } + if ( /\G( \s+ )/gcx { + print "space: $1\n"; + redo PARSER; + } + if ( /\G( [^\w\d]+ )/gcx { + print "other: $1\n"; + redo PARSER; + } + } + } + +But then you lose the vertical alignment of the regular expressions. + +=head2 Are Perl regexps DFAs or NFAs? Are they POSIX compliant? + +While it's true that Perl's regular expressions resemble the DFAs +(deterministic finite automata) of the egrep(1) program, they are in +fact implemented as NFAs (non-deterministic finite automata) to allow +backtracking and backreferencing. And they aren't POSIX-style either, +because those guarantee worst-case behavior for all cases. (It seems +that some people prefer guarantees of consistency, even when what's +guaranteed is slowness.) See the book "Mastering Regular Expressions" +(from O'Reilly) by Jeffrey Friedl for all the details you could ever +hope to know on these matters (a full citation appears in +L<perlfaq2>). + +=head2 What's wrong with using grep or map in a void context? + +Strictly speaking, nothing. Stylistically speaking, it's not a good +way to write maintainable code. That's because you're using these +constructs not for their return values but rather for their +side-effects, and side-effects can be mystifying. There's no void +grep() that's not better written as a C<for> (well, C<foreach>, +technically) loop. + +=head2 How can I match strings with multibyte characters? + +This is hard, and there's no good way. Perl does not directly support +wide characters. It pretends that a byte and a character are +synonymous. The following set of approaches was offered by Jeffrey +Friedl, whose article in issue #5 of The Perl Journal talks about this +very matter. + +Let's suppose you have some weird Martian encoding where pairs of +ASCII uppercase letters encode single Martian letters (i.e. the two +bytes "CV" make a single Martian letter, as do the two bytes "SG", +"VS", "XX", etc.). Other bytes represent single characters, just like +ASCII. + +So, the string of Martian "I am CVSGXX!" uses 12 bytes to encode the +nine characters 'I', ' ', 'a', 'm', ' ', 'CV', 'SG', 'XX', '!'. + +Now, say you want to search for the single character C</GX/>. Perl +doesn't know about Martian, so it'll find the two bytes "GX" in the "I +am CVSGXX!" string, even though that character isn't there: it just +looks like it is because "SG" is next to "XX", but there's no real +"GX". This is a big problem. + +Here are a few ways, all painful, to deal with it: + + $martian =~ s/([A-Z][A-Z])/ $1 /g; # Make sure adjacent ``martian'' bytes + # are no longer adjacent. + print "found GX!\n" if $martian =~ /GX/; + +Or like this: + + @chars = $martian =~ m/([A-Z][A-Z]|[^A-Z])/g; + # above is conceptually similar to: @chars = $text =~ m/(.)/g; + # + foreach $char (@chars) { + print "found GX!\n", last if $char eq 'GX'; + } + +Or like this: + + while ($martian =~ m/\G([A-Z][A-Z]|.)/gs) { # \G probably unneeded + print "found GX!\n", last if $1 eq 'GX'; + } + +Or like this: + + die "sorry, Perl doesn't (yet) have Martian support )-:\n"; + +In addition, a sample program which converts half-width to full-width +katakana (in Shift-JIS or EUC encoding) is available from CPAN as + +=for Tom make it so + +There are many double- (and multi-) byte encodings commonly used these +days. Some versions of these have 1-, 2-, 3-, and 4-byte characters, +all mixed. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + diff --git a/pod/perlfaq7.pod b/pod/perlfaq7.pod new file mode 100644 index 0000000000..283aa2bb34 --- /dev/null +++ b/pod/perlfaq7.pod @@ -0,0 +1,717 @@ +=head1 NAME + +perlfaq7 - Perl Language Issues ($Revision: 1.18 $, $Date: 1997/04/24 22:44:14 $) + +=head1 DESCRIPTION + +This section deals with general Perl language issues that don't +clearly fit into any of the other sections. + +=head2 Can I get a BNF/yacc/RE for the Perl language? + +No, in the words of Chaim Frenkel: "Perl's grammar can not be reduced +to BNF. The work of parsing perl is distributed between yacc, the +lexer, smoke and mirrors." + +=head2 What are all these $@%* punctuation signs, and how do I know when to use them? + +They are type specifiers, as detailed in L<perldata>: + + $ for scalar values (number, string or reference) + @ for arrays + % for hashes (associative arrays) + * for all types of that symbol name. In version 4 you used them like + pointers, but in modern perls you can just use references. + +While there are a few places where you don't actually need these type +specifiers, you should always use them. + +A couple of others that you're likely to encounter that aren't +really type specifiers are: + + <> are used for inputting a record from a filehandle. + \ takes a reference to something. + +Note that E<lt>FILEE<gt> is I<neither> the type specifier for files +nor the name of the handle. It is the C<E<lt>E<gt>> operator applied +to the handle FILE. It reads one line (well, record - see +L<perlvar/$/>) from the handle FILE in scalar context, or I<all> lines +in list context. When performing open, close, or any other operation +besides C<E<lt>E<gt>> on files, or even talking about the handle, do +I<not> use the brackets. These are correct: C<eof(FH)>, C<seek(FH, 0, +2)> and "copying from STDIN to FILE". + +=head2 Do I always/never have to quote my strings or use semicolons and commas? + +Normally, a bareword doesn't need to be quoted, but in most cases +probably should be (and must be under C<use strict>). But a hash key +consisting of a simple word (that isn't the name of a defined +subroutine) and the left-hand operand to the C<=E<gt>> operator both +count as though they were quoted: + + This is like this + ------------ --------------- + $foo{line} $foo{"line"} + bar => stuff "bar" => stuff + +The final semicolon in a block is optional, as is the final comma in a +list. Good style (see L<perlstyle>) says to put them in except for +one-liners: + + if ($whoops) { exit 1 } + @nums = (1, 2, 3); + + if ($whoops) { + exit 1; + } + @lines = ( + "There Beren came from mountains cold", + "And lost he wandered under leaves", + ); + +=head2 How do I skip some return values? + +One way is to treat the return values as a list and index into it: + + $dir = (getpwnam($user))[7]; + +Another way is to use undef as an element on the left-hand-side: + + ($dev, $ino, undef, undef, $uid, $gid) = stat($file); + +=head2 How do I temporarily block warnings? + +The C<$^W> variable (documented in L<perlvar>) controls +runtime warnings for a block: + + { + local $^W = 0; # temporarily turn off warnings + $a = $b + $c; # I know these might be undef + } + +Note that like all the punctuation variables, you cannot currently +use my() on C<$^W>, only local(). + +A new C<use warnings> pragma is in the works to provide finer control +over all this. The curious should check the perl5-porters mailing list +archives for details. + +=head2 What's an extension? + +A way of calling compiled C code from Perl. Reading L<perlxstut> +is a good place to learn more about extensions. + +=head2 Why do Perl operators have different precedence than C operators? + +Actually, they don't. All C operators that Perl copies have the same +precedence in Perl as they do in C. The problem is with operators that C +doesn't have, especially functions that give a list context to everything +on their right, eg print, chmod, exec, and so on. Such functions are +called "list operators" and appear as such in the precedence table in +L<perlop>. + +A common mistake is to write: + + unlink $file || die "snafu"; + +This gets interpreted as: + + unlink ($file || die "snafu"); + +To avoid this problem, either put in extra parentheses or use the +super low precedence C<or> operator: + + (unlink $file) || die "snafu"; + unlink $file or die "snafu"; + +The "English" operators (C<and>, C<or>, C<xor>, and C<not>) +deliberately have precedence lower than that of list operators for +just such situations as the one above. + +Another operator with surprising precedence is exponentiation. It +binds more tightly even than unary minus, making C<-2**2> product a +negative not a positive four. It is also right-associating, meaning +that C<2**3**2> is two raised to the ninth power, not eight squared. + +=head2 How do I declare/create a structure? + +In general, you don't "declare" a structure. Just use a (probably +anonymous) hash reference. See L<perlref> and L<perldsc> for details. +Here's an example: + + $person = {}; # new anonymous hash + $person->{AGE} = 24; # set field AGE to 24 + $person->{NAME} = "Nat"; # set field NAME to "Nat" + +If you're looking for something a bit more rigorous, try L<perltoot>. + +=head2 How do I create a module? + +A module is a package that lives in a file of the same name. For +example, the Hello::There module would live in Hello/There.pm. For +details, read L<perlmod>. You'll also find L<Exporter> helpful. If +you're writing a C or mixed-language module with both C and Perl, then +you should study L<perlxstut>. + +Here's a convenient template you might wish you use when starting your +own module. Make sure to change the names appropriately. + + package Some::Module; # assumes Some/Module.pm + + use strict; + + BEGIN { + use Exporter (); + use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + + ## set the version for version checking; uncomment to use + ## $VERSION = 1.00; + + # if using RCS/CVS, this next line may be preferred, + # but beware two-digit versions. + $VERSION = do{my@r=q$Revision: 1.18 $=~/\d+/g;sprintf '%d.'.'%02d'x$#r,@r}; + + @ISA = qw(Exporter); + @EXPORT = qw(&func1 &func2 &func3); + %EXPORT_TAGS = ( ); # eg: TAG => [ qw!name1 name2! ], + + # your exported package globals go here, + # as well as any optionally exported functions + @EXPORT_OK = qw($Var1 %Hashit); + } + use vars @EXPORT_OK; + + # non-exported package globals go here + use vars qw( @more $stuff ); + + # initialize package globals, first exported ones + $Var1 = ''; + %Hashit = (); + + # then the others (which are still accessible as $Some::Module::stuff) + $stuff = ''; + @more = (); + + # all file-scoped lexicals must be created before + # the functions below that use them. + + # file-private lexicals go here + my $priv_var = ''; + my %secret_hash = (); + + # here's a file-private function as a closure, + # callable as &$priv_func; it cannot be prototyped. + my $priv_func = sub { + # stuff goes here. + }; + + # make all your functions, whether exported or not; + # remember to put something interesting in the {} stubs + sub func1 {} # no prototype + sub func2() {} # proto'd void + sub func3($$) {} # proto'd to 2 scalars + + # this one isn't exported, but could be called! + sub func4(\%) {} # proto'd to 1 hash ref + + END { } # module clean-up code here (global destructor) + + 1; # modules must return true + +=head2 How do I create a class? + +See L<perltoot> for an introduction to classes and objects, as well as +L<perlobj> and L<perlbot>. + +=head2 How can I tell if a variable is tainted? + +See L<perlsec/"Laundering and Detecting Tainted Data">. Here's an +example (which doesn't use any system calls, because the kill() +is given no processes to signal): + + sub is_tainted { + return ! eval { join('',@_), kill 0; 1; }; + } + +This is not C<-w> clean, however. There is no C<-w> clean way to +detect taintedness - take this as a hint that you should untaint +all possibly-tainted data. + +=head2 What's a closure? + +Closures are documented in L<perlref>. + +I<Closure> is a computer science term with a precise but +hard-to-explain meaning. Closures are implemented in Perl as anonymous +subroutines with lasting references to lexical variables outside their +own scopes. These lexicals magically refer to the variables that were +around when the subroutine was defined (deep binding). + +Closures make sense in any programming language where you can have the +return value of a function be itself a function, as you can in Perl. +Note that some languages provide anonymous functions but are not +capable of providing proper closures; the Python language, for +example. For more information on closures, check out any textbook on +functional programming. Scheme is a language that not only supports +but encourages closures. + +Here's a classic function-generating function: + + sub add_function_generator { + return sub { shift + shift }; + } + + $add_sub = add_function_generator(); + $sum = &$add_sub(4,5); # $sum is 9 now. + +The closure works as a I<function template> with some customization +slots left out to be filled later. The anonymous subroutine returned +by add_function_generator() isn't technically a closure because it +refers to no lexicals outside its own scope. + +Contrast this with the following make_adder() function, in which the +returned anonymous function contains a reference to a lexical variable +outside the scope of that function itself. Such a reference requires +that Perl return a proper closure, thus locking in for all time the +value that the lexical had when the function was created. + + sub make_adder { + my $addpiece = shift; + return sub { shift + $addpiece }; + } + + $f1 = make_adder(20); + $f2 = make_adder(555); + +Now C<&$f1($n)> is always 20 plus whatever $n you pass in, whereas +C<&$f2($n)> is always 555 plus whatever $n you pass in. The $addpiece +in the closure sticks around. + +Closures are often used for less esoteric purposes. For example, when +you want to pass in a bit of code into a function: + + my $line; + timeout( 30, sub { $line = <STDIN> } ); + +If the code to execute had been passed in as a string, C<'$line = +E<lt>STDINE<gt>'>, there would have been no way for the hypothetical +timeout() function to access the lexical variable $line back in its +caller's scope. + +=head2 What is variable suicide and how can I prevent it? + +Variable suicide is when you (temporarily or permanently) lose the +value of a variable. It is caused by scoping through my() and local() +interacting with either closures or aliased foreach() interator +variables and subroutine arguments. It used to be easy to +inadvertently lose a variable's value this way, but now it's much +harder. Take this code: + + my $f = "foo"; + sub T { + while ($i++ < 3) { my $f = $f; $f .= "bar"; print $f, "\n" } + } + T; + print "Finally $f\n"; + +The $f that has "bar" added to it three times should be a new C<$f> +(C<my $f> should create a new local variable each time through the +loop). It isn't, however. This is a bug, and will be fixed. + +=head2 How can I pass/return a {Function, FileHandle, Array, Hash, Method, Regexp}? + +With the exception of regexps, you need to pass references to these +objects. See L<perlsub/"Pass by Reference"> for this particular +question, and L<perlref> for information on references. + +=over 4 + +=item Passing Variables and Functions + +Regular variables and functions are quite easy: just pass in a +reference to an existing or anonymous variable or function: + + func( \$some_scalar ); + + func( \$some_array ); + func( [ 1 .. 10 ] ); + + func( \%some_hash ); + func( { this => 10, that => 20 } ); + + func( \&some_func ); + func( sub { $_[0] ** $_[1] } ); + +=item Passing Filehandles + +To create filehandles you can pass to subroutines, you can use C<*FH> +or C<\*FH> notation ("typeglobs" - see L<perldata> for more information), +or create filehandles dynamically using the old FileHandle or the new +IO::File modules, both part of the standard Perl distribution. + + use Fcntl; + use IO::File; + my $fh = new IO::File $filename, O_WRONLY|O_APPEND; + or die "Can't append to $filename: $!"; + func($fh); + +=item Passing Regexps + +To pass regexps around, you'll need to either use one of the highly +experimental regular expression modules from CPAN (Nick Ing-Simmons's +Regexp or Ilya Zakharevich's Devel::Regexp), pass around strings +and use an exception-trapping eval, or else be be very, very clever. +Here's an example of how to pass in a string to be regexp compared: + + sub compare($$) { + my ($val1, $regexp) = @_; + my $retval = eval { $val =~ /$regexp/ }; + die if $@; + return $retval; + } + + $match = compare("old McDonald", q/d.*D/); + +Make sure you never say something like this: + + return eval "\$val =~ /$regexp/"; # WRONG + +or someone can sneak shell escapes into the regexp due to the double +interpolation of the eval and the double-quoted string. For example: + + $pattern_of_evil = 'danger ${ system("rm -rf * &") } danger'; + + eval "\$string =~ /$pattern_of_evil/"; + +Those preferring to be very, very clever might see the O'Reilly book, +I<Mastering Regular Expressions>, by Jeffrey Friedl. Page 273's +Build_MatchMany_Function() is particularly interesting. A complete +citation of this book is given in L<perlfaq2>. + +=item Passing Methods + +To pass an object method into a subroutine, you can do this: + + call_a_lot(10, $some_obj, "methname") + sub call_a_lot { + my ($count, $widget, $trick) = @_; + for (my $i = 0; $i < $count; $i++) { + $widget->$trick(); + } + } + +or you can use a closure to bundle up the object and its method call +and arguments: + + my $whatnot = sub { $some_obj->obfuscate(@args) }; + func($whatnot); + sub func { + my $code = shift; + &$code(); + } + +You could also investigate the can() method in the UNIVERSAL class +(part of the standard perl distribution). + +=back + +=head2 How do I create a static variable? + +As with most things in Perl, TMTOWTDI. What is a "static variable" in +other languages could be either a function-private variable (visible +only within a single function, retaining its value between calls to +that function), or a file-private variable (visible only to functions +within the file it was declared in) in Perl. + +Here's code to implement a function-private variable: + + BEGIN { + my $counter = 42; + sub prev_counter { return --$counter } + sub next_counter { return $counter++ } + } + +Now prev_counter() and next_counter() share a private variable $counter +that was initialized at compile time. + +To declare a file-private variable, you'll still use a my(), putting +it at the outer scope level at the top of the file. Assume this is in +file Pax.pm: + + package Pax; + my $started = scalar(localtime(time())); + + sub begun { return $started } + +When C<use Pax> or C<require Pax> loads this module, the variable will +be initialized. It won't get garbage-collected the way most variables +going out of scope do, because the begun() function cares about it, +but no one else can get it. It is not called $Pax::started because +its scope is unrelated to the package. It's scoped to the file. You +could conceivably have several packages in that same file all +accessing the same private variable, but another file with the same +package couldn't get to it. + +=head2 What's the difference between dynamic and lexical (static) scoping? Between local() and my()? + +C<local($x)> saves away the old value of the global variable C<$x>, +and assigns a new value for the duration of the subroutine, I<which is +visible in other functions called from that subroutine>. This is done +at run-time, so is called dynamic scoping. local() always affects global +variables, also called package variables or dynamic variables. + +C<my($x)> creates a new variable that is only visible in the current +subroutine. This is done at compile-time, so is called lexical or +static scoping. my() always affects private variables, also called +lexical variables or (improperly) static(ly scoped) variables. + +For instance: + + sub visible { + print "var has value $var\n"; + } + + sub dynamic { + local $var = 'local'; # new temporary value for the still-global + visible(); # variable called $var + } + + sub lexical { + my $var = 'private'; # new private variable, $var + visible(); # (invisible outside of sub scope) + } + + $var = 'global'; + + visible(); # prints global + dynamic(); # prints local + lexical(); # prints global + +Notice how at no point does the value "private" get printed. That's +because $var only has that value within the block of the lexical() +function, and it is hidden from called subroutine. + +In summary, local() doesn't make what you think of as private, local +variables. It gives a global variable a temporary value. my() is +what you're looking for if you want private variables. + +See also L<perlsub>, which explains this all in more detail. + +=head2 How can I access a dynamic variable while a similarly named lexical is in scope? + +You can do this via symbolic references, provided you haven't set +C<use strict "refs">. So instead of $var, use C<${'var'}>. + + local $var = "global"; + my $var = "lexical"; + + print "lexical is $var\n"; + + no strict 'refs'; + print "global is ${'var'}\n"; + +If you know your package, you can just mention it explicitly, as in +$Some_Pack::var. Note that the notation $::var is I<not> the dynamic +$var in the current package, but rather the one in the C<main> +package, as though you had written $main::var. Specifying the package +directly makes you hard-code its name, but it executes faster and +avoids running afoul of C<use strict "refs">. + +=head2 What's the difference between deep and shallow binding? + +In deep binding, lexical variables mentioned in anonymous subroutines +are the same ones that were in scope when the subroutine was created. +In shallow binding, they are whichever variables with the same names +happen to be in scope when the subroutine is called. Perl always uses +deep binding of lexical variables (i.e., those created with my()). +However, dynamic variables (aka global, local, or package variables) +are effectively shallowly bound. Consider this just one more reason +not to use them. See the answer to L<"What's a closure?">. + +=head2 Why doesn't "local($foo) = <FILE>;" work right? + +C<local()> gives list context to the right hand side of C<=>. The +E<lt>FHE<gt> read operation, like so many of Perl's functions and +operators, can tell which context it was called in and behaves +appropriately. In general, the scalar() function can help. This +function does nothing to the data itself (contrary to popular myth) +but rather tells its argument to behave in whatever its scalar fashion +is. If that function doesn't have a defined scalar behavior, this of +course doesn't help you (such as with sort()). + +To enforce scalar context in this particular case, however, you need +merely omit the parentheses: + + local($foo) = <FILE>; # WRONG + local($foo) = scalar(<FILE>); # ok + local $foo = <FILE>; # right + +You should probably be using lexical variables anyway, although the +issue is the same here: + + my($foo) = <FILE>; # WRONG + my $foo = <FILE>; # right + +=head2 How do I redefine a builtin function, operator, or method? + +Why do you want to do that? :-) + +If you want to override a predefined function, such as open(), +then you'll have to import the new definition from a different +module. See L<perlsub/"Overriding Builtin Functions">. There's +also an example in L<perltoot/"Class::Template">. + +If you want to overload a Perl operator, such as C<+> or C<**>, +then you'll want to use the C<use overload> pragma, documented +in L<overload>. + +If you're talking about obscuring method calls in parent classes, +see L<perltoot/"Overridden Methods">. + +=head2 What's the difference between calling a function as &foo and foo()? + +When you call a function as C<&foo>, you allow that function access to +your current @_ values, and you by-pass prototypes. That means that +the function doesn't get an empty @_, it gets yours! While not +strictly speaking a bug (it's documented that way in L<perlsub>), it +would be hard to consider this a feature in most cases. + +When you call your function as C<&foo()>, then you do get a new @_, +but prototyping is still circumvented. + +Normally, you want to call a function using C<foo()>. You may only +omit the parentheses if the function is already known to the compiler +because it already saw the definition (C<use> but not C<require>), +or via a forward reference or C<use subs> declaration. Even in this +case, you get a clean @_ without any of the old values leaking through +where they don't belong. + +=head2 How do I create a switch or case statement? + +This is explained in more depth in the L<perlsyn>. Briefly, there's +no official case statement, because of the variety of tests possible +in Perl (numeric comparison, string comparison, glob comparison, +regexp matching, overloaded comparisons, ...). Larry couldn't decide +how best to do this, so he left it out, even though it's been on the +wish list since perl1. + +Here's a simple example of a switch based on pattern matching. We'll +do a multi-way conditional based on the type of reference stored in +$whatchamacallit: + + SWITCH: + for (ref $whatchamacallit) { + + /^$/ && die "not a reference"; + + /SCALAR/ && do { + print_scalar($$ref); + last SWITCH; + }; + + /ARRAY/ && do { + print_array(@$ref); + last SWITCH; + }; + + /HASH/ && do { + print_hash(%$ref); + last SWITCH; + }; + + /CODE/ && do { + warn "can't print function ref"; + last SWITCH; + }; + + # DEFAULT + + warn "User defined type skipped"; + + } + +=head2 How can I catch accesses to undefined variables/functions/methods? + +The AUTOLOAD method, discussed in L<perlsub/"Autoloading"> and +L<perltoot/"AUTOLOAD: Proxy Methods">, lets you capture calls to +undefined functions and methods. + +When it comes to undefined variables that would trigger a warning +under C<-w>, you can use a handler to trap the pseudo-signal +C<__WARN__> like this: + + $SIG{__WARN__} = sub { + + for ( $_[0] ) { + + /Use of uninitialized value/ && do { + # promote warning to a fatal + die $_; + }; + + # other warning cases to catch could go here; + + warn $_; + } + + }; + +=head2 Why can't a method included in this same file be found? + +Some possible reasons: your inheritance is getting confused, you've +misspelled the method name, or the object is of the wrong type. Check +out L<perltoot> for details on these. You may also use C<print +ref($object)> to find out the class C<$object> was blessed into. + +Another possible reason for problems is because you've used the +indirect object syntax (eg, C<find Guru "Samy">) on a class name +before Perl has seen that such a package exists. It's wisest to make +sure your packages are all defined before you start using them, which +will be taken care of if you use the C<use> statement instead of +C<require>. If not, make sure to use arrow notation (eg, +C<Guru->find("Samy")>) instead. Object notation is explained in +L<perlobj>. + +=head2 How can I find out my current package? + +If you're just a random program, you can do this to find +out what the currently compiled package is: + + my $packname = ref bless []; + +But if you're a method and you want to print an error message +that includes the kind of object you were called on (which is +not necessarily the same as the one in which you were compiled): + + sub amethod { + my $self = shift; + my $class = ref($self) || $self; + warn "called me from a $class object"; + } + +=head2 How can I comment out a large block of perl code? + +Use embedded POD to discard it: + + # program is here + + =for nobody + This paragraph is commented out + + # program continues + + =begin comment text + + all of this stuff + + here will be ignored + by everyone + + =end comment text + + =cut + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. diff --git a/pod/perlfaq8.pod b/pod/perlfaq8.pod new file mode 100644 index 0000000000..4fabce6f36 --- /dev/null +++ b/pod/perlfaq8.pod @@ -0,0 +1,1185 @@ +=head1 NAME + +perlfaq8 - System Interaction ($Revision: 1.21 $, $Date: 1997/04/24 22:44:19 $) + +=head1 DESCRIPTION + +This section of the Perl FAQ covers questions involving operating +system interaction. This involves interprocess communication (IPC), +control over the user-interface (keyboard, screen and pointing +devices), and most anything else not related to data manipulation. + +Read the FAQs and documentation specific to the port of perl to your +operating system (eg, L<perlvms>, L<perlplan9>, ...). These should +contain more detailed information on the vagaries of your perl. + +=head2 How do I find out which operating system I'm running under? + +The $^O variable ($OSTYPE if you use English) contains the operating +system that your perl binary was built for. + +=head2 How come exec() doesn't return? + +Because that's what it does: it replaces your currently running +program with a different one. If you want to keep going (as is +probably the case if you're asking this question) use system() +instead. + +=head2 How do I do fancy stuff with the keyboard/screen/mouse? + +How you access/control keyboards, screens, and pointing devices +("mice") is system-dependent. Try the following modules: + +=over 4 + +=item Keyboard + + Term::Cap Standard perl distribution + Term::ReadKey CPAN + Term::ReadLine::Gnu CPAN + Term::ReadLine::Perl CPAN + Term::Screen CPAN + +=item Screen + + Term::Cap Standard perl distribution + Curses CPAN + Term::ANSIColor CPAN + +=item Mouse + + Tk CPAN + +=back + +=head2 How do I ask the user for a password? + +(This question has nothing to do with the web. See a different +FAQ for that.) + +There's an example of this in L<perlfunc/crypt>). First, you put +the terminal into "no echo" mode, then just read the password +normally. You may do this with an old-style ioctl() function, POSIX +terminal control (see L<POSIX>, and Chapter 7 of the Camel), or a call +to the B<stty> program, with varying degrees of portability. + +You can also do this for most systems using the Term::ReadKey module +from CPAN, which is easier to use and in theory more portable. + +=head2 How do I read and write the serial port? + +This depends on which operating system your program is running on. In +the case of Unix, the serial ports will be accessible through files in +/dev; on other systems, the devices names will doubtless differ. +Several problem areas common to all device interaction are the +following + +=over 4 + +=item lockfiles + +Your system may use lockfiles to control multiple access. Make sure +you follow the correct protocol. Unpredictable behaviour can result +from multiple processes reading from one device. + +=item open mode + +If you expect to use both read and write operations on the device, +you'll have to open it for update (see L<perlfunc/"open"> for +details). You may wish to open it without running the risk of +blocking by using sysopen() and C<O_RDWR|O_NDELAY|O_NOCTTY> from the +Fcntl module (part of the standard perl distribution). See +L<perlfunc/"sysopen"> for more on this approach. + +=item end of line + +Some devices will be expecting a "\r" at the end of each line rather +than a "\n". In some ports of perl, "\r" and "\n" are different from +their usual (Unix) ASCII values of "\012" and "\015". You may have to +give the numeric values you want directly, using octal ("\015"), hex +("0x0D"), or as a control-character specification ("\cM"). + + print DEV "atv1\012"; # wrong, for some devices + print DEV "atv1\015"; # right, for some devices + +Even though with normal text files, a "\n" will do the trick, there is +still no unified scheme for terminating a line that is portable +between Unix, DOS/Win, and Macintosh, except to terminate I<ALL> line +ends with "\015\012", and strip what you don't need from the output. +This applies especially to socket I/O and autoflushing, discussed +next. + +=item flushing output + +If you expect characters to get to your device when you print() them, +you'll want to autoflush that filehandle, as in the older + + use FileHandle; + DEV->autoflush(1); + +and the newer + + use IO::Handle; + DEV->autoflush(1); + +You can use select() and the C<$|> variable to control autoflushing +(see L<perlvar/$|> and L<perlfunc/select>): + + $oldh = select(DEV); + $| = 1; + select($oldh); + +You'll also see code that does this without a temporary variable, as in + + select((select(DEV), $| = 1)[0]); + +As mentioned in the previous item, this still doesn't work when using +socket I/O between Unix and Macintosh. You'll need to hardcode your +line terminators, in that case. + +=item non-blocking input + +If you are doing a blocking read() or sysread(), you'll have to +arrange for an alarm handler to provide a timeout (see +L<perlfunc/alarm>). If you have a non-blocking open, you'll likely +have a non-blocking read, which means you may have to use a 4-arg +select() to determine whether I/O is ready on that device (see +L<perlfunc/"select">. + +=back + +=head2 How do I decode encrypted password files? + +You spend lots and lots of money on dedicated hardware, but this is +bound to get you talked about. + +Seriously, you can't if they are Unix password files - the Unix +password system employs one-way encryption. Programs like Crack can +forcibly (and intelligently) try to guess passwords, but don't (can't) +guarantee quick success. + +If you're worried about users selecting bad passwords, you should +proactively check when they try to change their password (by modifying +passwd(1), for example). + +=head2 How do I start a process in the background? + +You could use + + system("cmd &") + +or you could use fork as documented in L<perlfunc/"fork">, with +further examples in L<perlipc>. Some things to be aware of, if you're +on a Unix-like system: + +=over 4 + +=item STDIN, STDOUT and STDERR are shared + +Both the main process and the backgrounded one (the "child" process) +share the same STDIN, STDOUT and STDERR filehandles. If both try to +access them at once, strange things can happen. You may want to close +or reopen these for the child. You can get around this with +C<open>ing a pipe (see L<perlfunc/"open">) but on some systems this +means that the child process cannot outlive the parent. + +=item Signals + +You'll have to catch the SIGCHLD signal, and possibly SIGPIPE too. +SIGCHLD is sent when the backgrounded process finishes. SIGPIPE is +sent when you write to a filehandle whose child process has closed (an +untrapped SIGPIPE can cause your program to silently die). This is +not an issue with C<system("cmd&")>. + +=item Zombies + +You have to be prepared to "reap" the child process when it finishes + + $SIG{CHLD} = sub { wait }; + +See L<perlipc/"Signals"> for other examples of code to do this. +Zombies are not an issue with C<system("prog &")>. + +=back + +=head2 How do I trap control characters/signals? + +You don't actually "trap" a control character. Instead, that +character generates a signal, which you then trap. Signals are +documented in L<perlipc/"Signals"> and chapter 6 of the Camel. + +Be warned that very few C libraries are re-entrant. Therefore, if you +attempt to print() in a handler that got invoked during another stdio +operation your internal structures will likely be in an +inconsistent state, and your program will dump core. You can +sometimes avoid this by using syswrite() instead of print(). + +Unless you're exceedingly careful, the only safe things to do inside a +signal handler are: set a variable and exit. And in the first case, +you should only set a variable in such a way that malloc() is not +called (eg, by setting a variable that already has a value). + +For example: + + $Interrupted = 0; # to ensure it has a value + $SIG{INT} = sub { + $Interrupted++; + syswrite(STDERR, "ouch\n", 5); + } + +However, because syscalls restart by default, you'll find that if +you're in a "slow" call, such as E<lt>FHE<gt>, read(), connect(), or +wait(), that the only way to terminate them is by "longjumping" out; +that is, by raising an exception. See the time-out handler for a +blocking flock() in L<perlipc/"Signals"> or chapter 6 of the Camel. + +=head2 How do I modify the shadow password file on a Unix system? + +If perl was installed correctly, the getpw*() functions described in +L<perlfunc> provide (read-only) access to the shadow password file. +To change the file, make a new shadow password file (the format varies +from system to system - see L<passwd(5)> for specifics) and use +pwd_mkdb(8) to install it (see L<pwd_mkdb(5)> for more details). + +=head2 How do I set the time and date? + +Assuming you're running under sufficient permissions, you should be +able to set the system-wide date and time by running the date(1) +program. (There is no way to set the time and date on a per-process +basis.) This mechanism will work for Unix, MS-DOS, Windows, and NT; +the VMS equivalent is C<set time>. + +However, if all you want to do is change your timezone, you can +probably get away with setting an environment variable: + + $ENV{TZ} = "MST7MDT"; # unixish + $ENV{'SYS$TIMEZONE_DIFFERENTIAL'}="-5" # vms + system "trn comp.lang.perl"; + +=head2 How can I sleep() or alarm() for under a second? + +If you want finer granularity than the 1 second that the sleep() +function provides, the easiest way is to use the select() function as +documented in L<perlfunc/"select">. If your system has itimers and +syscall() support, you can check out the old example in +http://www.perl.com/CPAN/doc/misc/ancient/tutorial/eg/itimers.pl . + +=head2 How can I measure time under a second? + +In general, you may not be able to. The Time::HiRes module (available +from CPAN) provides this functionality for some systems. + +In general, you may not be able to. But if you system supports both the +syscall() function in Perl as well as a system call like gettimeofday(2), +then you may be able to do something like this: + + require 'sys/syscall.ph'; + + $TIMEVAL_T = "LL"; + + $done = $start = pack($TIMEVAL_T, ()); + + syscall( &SYS_gettimeofday, $start, 0)) != -1 + or die "gettimeofday: $!"; + + ########################## + # DO YOUR OPERATION HERE # + ########################## + + syscall( &SYS_gettimeofday, $done, 0) != -1 + or die "gettimeofday: $!"; + + @start = unpack($TIMEVAL_T, $start); + @done = unpack($TIMEVAL_T, $done); + + # fix microseconds + for ($done[1], $start[1]) { $_ /= 1_000_000 } + + $delta_time = sprintf "%.4f", ($done[0] + $done[1] ) + - + ($start[0] + $start[1] ); + +=head2 How can I do an atexit() or setjmp()/longjmp()? (Exception handling) + +Release 5 of Perl added the END block, which can be used to simulate +atexit(). Each package's END block is called when the program or +thread ends (see L<perlmod> manpage for more details). It isn't +called when untrapped signals kill the program, though, so if you use +END blocks you should also use + + use sigtrap qw(die normal-signals); + +Perl's exception-handling mechanism is its eval() operator. You can +use eval() as setjmp and die() as longjmp. For details of this, see +the section on signals, especially the time-out handler for a blocking +flock() in L<perlipc/"Signals"> and chapter 6 of the Camel. + +If exception handling is all you're interested in, try the +exceptions.pl library (part of the standard perl distribution). + +If you want the atexit() syntax (and an rmexit() as well), try the +AtExit module available from CPAN. + +=head2 Why doesn't my sockets program work under System V (Solaris)? What does the error message "Protocol not supported" mean? + +Some Sys-V based systems, notably Solaris 2.X, redefined some of the +standard socket constants. Since these were constant across all +architectures, they were often hardwired into perl code. The proper +way to deal with this is to "use Socket" to get the correct values. + +Note that even though SunOS and Solaris are binary compatible, these +values are different. Go figure. + +=head2 How can I call my system's unique C functions from Perl? + +In most cases, you write an external module to do it - see the answer +to "Where can I learn about linking C with Perl? [h2xs, xsubpp]". +However, if the function is a system call, and your system supports +syscall(), you can use the syscall function (documented in +L<perlfunc>). + +Remember to check the modules that came with your distribution, and +CPAN as well - someone may already have written a module to do it. + +=head2 Where do I get the include files to do ioctl() or syscall()? + +Historically, these would be generated by the h2ph tool, part of the +standard perl distribution. This program converts cpp(1) directives +in C header files to files containing subroutine definitions, like +&SYS_getitimer, which you can use as arguments to your functions. +It doesn't work perfectly, but it usually gets most of the job done. +Simple files like F<errno.h>, F<syscall.h>, and F<socket.h> were fine, +but the hard ones like F<ioctl.h> nearly always need to hand-edited. +Here's how to install the *.ph files: + + 1. become super-user + 2. cd /usr/include + 3. h2ph *.h */*.h + +If your system supports dynamic loading, for reasons of portability and +sanity you probably ought to use h2xs (also part of the standard perl +distribution). This tool converts C header files to Perl extensions. +See L<perlxstut> for how to get started with h2xs. + +If your system doesn't support dynamic loading, you still probably +ought to use h2xs. See L<perlxstut> and L<ExtUtils::MakeMaker> for +more information (in brief, just use B<make perl> instead of a plain +B<make> to rebuild perl with a new static extension). + +=head2 Why do setuid perl scripts complain about kernel problems? + +Some operating systems have bugs in the kernel that make setuid +scripts inherently insecure. Perl gives you a number of options +(described in L<perlsec>) to work around such systems. + +=head2 How can I open a pipe both to and from a command? + +The IPC::Open2 module (part of the standard perl distribution) is an +easy-to-use approach that internally uses pipe(), fork(), and exec() +to do the job. Make sure you read the deadlock warnings in its +documentation, though (see L<IPC::Open2>). + +=head2 Why can't I get the output of a command with system()? + +You're confusing the purpose of system() and backticks (``). system() +runs a command and returns exit status information (as a 16 bit value: +the low 8 bits are the signal the process died from, if any, and +the high 8 bits are the actual exit value). Backticks (``) run a +command and return what it sent to STDOUT. + + $exit_status = system("mail-users"); + $output_string = `ls`; + +=head2 How can I capture STDERR from an external command? + +There are three basic ways of running external commands: + + system $cmd; # using system() + $output = `$cmd`; # using backticks (``) + open (PIPE, "cmd |"); # using open() + +With system(), both STDOUT and STDERR will go the same place as the +script's versions of these, unless the command redirects them. +Backticks and open() read B<only> the STDOUT of your command. + +With any of these, you can change file descriptors before the call: + + open(STDOUT, ">logfile"); + system("ls"); + +or you can use Bourne shell file-descriptor redirection: + + $output = `$cmd 2>some_file`; + open (PIPE, "cmd 2>some_file |"); + +You can also use file-descriptor redirection to make STDERR a +duplicate of STDOUT: + + $output = `$cmd 2>&1`; + open (PIPE, "cmd 2>&1 |"); + +Note that you I<cannot> simply open STDERR to be a dup of STDOUT +in your Perl program and avoid calling the shell to do the redirection. +This doesn't work: + + open(STDERR, ">&STDOUT"); + $alloutput = `cmd args`; # stderr still escapes + +This fails because the open() makes STDERR go to where STDOUT was +going at the time of the open(). The backticks then make STDOUT go to +a string, but don't change STDERR (which still goes to the old +STDOUT). + +Note that you I<must> use Bourne shell (sh(1)) redirection syntax in +backticks, not csh(1)! Details on why Perl's system() and backtick +and pipe opens all use the Bourne shell are in +http://www.perl.com/CPAN/doc/FMTEYEWTK/versus/csh.whynot . + +You may also use the IPC::Open3 module (part of the standard perl +distribution), but be warned that it has a different order of +arguments from IPC::Open2 (see L<IPC::Open3>). + +=head2 Why doesn't open() return an error when a pipe open fails? + +It does, but probably not how you expect it to. On systems that +follow the standard fork()/exec() paradigm (eg, Unix), it works like +this: open() causes a fork(). In the parent, open() returns with the +process ID of the child. The child exec()s the command to be piped +to/from. The parent can't know whether the exec() was successful or +not - all it can return is whether the fork() succeeded or not. To +find out if the command succeeded, you have to catch SIGCHLD and +wait() to get the exit status. You should also catch SIGPIPE if +you're writing to the child -- you may not have found out the exec() +failed by the time you write. This is documented in L<perlipc>. + +On systems that follow the spawn() paradigm, open() I<might> do what +you expect - unless perl uses a shell to start your command. In this +case the fork()/exec() description still applies. + +=head2 What's wrong with using backticks in a void context? + +Strictly speaking, nothing. Stylistically speaking, it's not a good +way to write maintainable code because backticks have a (potentially +humungous) return value, and you're ignoring it. It's may also not be very +efficient, because you have to read in all the lines of output, allocate +memory for them, and then throw it away. Too often people are lulled +to writing: + + `cp file file.bak`; + +And now they think "Hey, I'll just always use backticks to run programs." +Bad idea: backticks are for capturing a program's output; the system() +function is for running programs. + +Consider this line: + + `cat /etc/termcap`; + +You haven't assigned the output anywhere, so it just wastes memory +(for a little while). Plus you forgot to check C<$?> to see whether +the program even ran correctly. Even if you wrote + + print `cat /etc/termcap`; + +In most cases, this could and probably should be written as + + system("cat /etc/termcap") == 0 + or die "cat program failed!"; + +Which will get the output quickly (as its generated, instead of only +at the end ) and also check the return value. + +system() also provides direct control over whether shell wildcard +processing may take place, whereas backticks do not. + +=head2 How can I call backticks without shell processing? + +This is a bit tricky. Instead of writing + + @ok = `grep @opts '$search_string' @filenames`; + +You have to do this: + + my @ok = (); + if (open(GREP, "-|")) { + while (<GREP>) { + chomp; + push(@ok, $_); + } + close GREP; + } else { + exec 'grep', @opts, $search_string, @filenames; + } + +Just as with system(), no shell escapes happen when you exec() a list. + +=head2 Why can't my script read from STDIN after I gave it EOF (^D on Unix, ^Z on MS-DOS)? + +Because some stdio's set error and eof flags that need clearing. The +POSIX module defines clearerr() that you can use. That is the +technically correct way to do it. Here are some less reliable +workarounds: + +=over 4 + +=item 1 + +Try keeping around the seekpointer and go there, like this: + + $where = tell(LOG); + seek(LOG, $where, 0); + +=item 2 + +If that doesn't work, try seeking to a different part of the file and +then back. + +=item 3 + +If that doesn't work, try seeking to a different part of +the file, reading something, and then seeking back. + +=item 4 + +If that doesn't work, give up on your stdio package and use sysread. + +=back + +=head2 How can I convert my shell script to perl? + +Learn Perl and rewrite it. Seriously, there's no simple converter. +Things that are awkward to do in the shell are easy to do in Perl, and +this very awkwardness is what would make a shell->perl converter +nigh-on impossible to write. By rewriting it, you'll think about what +you're really trying to do, and hopefully will escape the shell's +pipeline datastream paradigm, which while convenient for some matters, +causes many inefficiencies. + +=head2 Can I use perl to run a telnet or ftp session? + +Try the Net::FTP, TCP::Client, and Net::Telnet modules (available from +CPAN). http://www.perl.com/CPAN/scripts/netstuff/telnet.emul.shar +will also help for emulating the telnet protocol, but Net::Telnet is +quite probably easier to use.. + +If all you want to do is pretend to be telnet but don't need +the initial telnet handshaking, then the standard dual-process +approach will suffice: + + use IO::Socket; # new in 5.004 + $handle = IO::Socket::INET->new('www.perl.com:80') + || die "can't connect to port 80 on www.perl.com: $!"; + $handle->autoflush(1); + if (fork()) { # XXX: undef means failure + select($handle); + print while <STDIN>; # everything from stdin to socket + } else { + print while <$handle>; # everything from socket to stdout + } + close $handle; + exit; + +=head2 How can I write expect in Perl? + +Once upon a time, there was a library called chat2.pl (part of the +standard perl distribution), which never really got finished. These +days, your best bet is to look at the Comm.pl library available from +CPAN. + +=head2 Is there a way to hide perl's command line from programs such as "ps"? + +First of all note that if you're doing this for security reasons (to +avoid people seeing passwords, for example) then you should rewrite +your program so that critical information is never given as an +argument. Hiding the arguments won't make your program completely +secure. + +To actually alter the visible command line, you can assign to the +variable $0 as documented in L<perlvar>. This won't work on all +operating systems, though. Daemon programs like sendmail place their +state there, as in: + + $0 = "orcus [accepting connections]"; + +=head2 I {changed directory, modified my environment} in a perl script. How come the change disappeared when I exited the script? How do I get my changes to be visible? + +=over 4 + +=item Unix + +In the strictest sense, it can't be done -- the script executes as a +different process from the shell it was started from. Changes to a +process are not reflected in its parent, only in its own children +created after the change. There is shell magic that may allow you to +fake it by eval()ing the script's output in your shell; check out the +comp.unix.questions FAQ for details. + +=item VMS + +Change to %ENV persist after Perl exits, but directory changes do not. + +=back + +=head2 How do I close a process's filehandle without waiting for it to complete? + +Assuming your system supports such things, just send an appropriate signal +to the process (see L<perlfunc/"kill">. It's common to first send a TERM +signal, wait a little bit, and then send a KILL signal to finish it off. + +=head2 How do I fork a daemon process? + +If by daemon process you mean one that's detached (disassociated from +its tty), then the following process is reported to work on most +Unixish systems. Non-Unix users should check their Your_OS::Process +module for other solutions. + +=over 4 + +=item * + +Open /dev/tty and use the the TIOCNOTTY ioctl on it. See L<tty(4)> +for details. + +=item * + +Change directory to / + +=item * + +Reopen STDIN, STDOUT, and STDERR so they're not connected to the old +tty. + +=item * + +Background yourself like this: + + fork && exit; + +=back + +=head2 How do I make my program run with sh and csh? + +See the F<eg/nih> script (part of the perl source distribution). + +=head2 How do I find out if I'm running interactively or not? + +Good question. Sometimes C<-t STDIN> and C<-t STDOUT> can give clues, +sometimes not. + + if (-t STDIN && -t STDOUT) { + print "Now what? "; + } + +On POSIX systems, you can test whether your own process group matches +the current process group of your controlling terminal as follows: + + use POSIX qw/getpgrp tcgetpgrp/; + open(TTY, "/dev/tty") or die $!; + $tpgrp = tcgetpgrp(TTY); + $pgrp = getpgrp(); + if ($tpgrp == $pgrp) { + print "foreground\n"; + } else { + print "background\n"; + } + +=head2 How do I timeout a slow event? + +Use the alarm() function, probably in conjunction with a signal +handler, as documented L<perlipc/"Signals"> and chapter 6 of the +Camel. You may instead use the more flexible Sys::AlarmCall module +available from CPAN. + +=head2 How do I set CPU limits? + +Use the BSD::Resource module from CPAN. + +=head2 How do I avoid zombies on a Unix system? + +Use the reaper code from L<perlipc/"Signals"> to call wait() when a +SIGCHLD is received, or else use the double-fork technique described +in L<perlfunc/fork>. + +=head2 How do I use an SQL database? + +There are a number of excellent interfaces to SQL databases. See the +DBD::* modules available from +http://www.perl.com/CPAN/modules/dbperl/DBD . + +=head2 How do I make a system() exit on control-C? + +You can't. You need to imitate the system() call (see L<perlipc> for +sample code) and then have a signal handler for the INT signal that +passes the signal on to the subprocess. + +=head2 How do I open a file without blocking? + +If you're lucky enough to be using a system that supports +non-blocking reads (most Unixish systems do), you need only to use the +O_NDELAY or O_NONBLOCK flag from the Fcntl module in conjunction with +sysopen(): + + use Fcntl; + sysopen(FH, "/tmp/somefile", O_WRONLY|O_NDELAY|O_CREAT, 0644) + or die "can't open /tmp/somefile: $!": + +=head2 How do I install a CPAN module? + +The easiest way is to have the CPAN module do it for you. This module +comes with perl version 5.004 and later. To manually install the CPAN +module, or any well-behaved CPAN module for that matter, follow these +steps: + +=over 4 + +=item 1 + +Unpack the source into a temporary area. + +=item 2 + + perl Makefile.PL + +=item 3 + + make + +=item 4 + + make test + +=item 5 + + make install + +=back + +If your version of perl is compiled without dynamic loading, then you +just need to replace step 3 (B<make>) with B<make perl> and you will +get a new F<perl> binary with your extension linked in. + +See L<ExtUtils::MakeMaker> for more details on building extensions, +the question "How do I keep my own module/library directory?" + +=head2 How do I keep my own module/library directory? + +When you build modules, use the PREFIX option when generating +Makefiles: + + perl Makefile.PL PREFIX=/u/mydir/perl + +then either set the PERL5LIB environment variable before you run +scripts that use the modules/libraries (see L<perlrun>) or say + + use lib '/u/mydir/perl'; + +See Perl's L<lib> for more information. + +=head2 How do I add the directory my program lives in to the module/library search path? + + use FindBin; + use lib "$FindBin:Bin"; + use your_own_modules; + +=head2 How do I add a directory to my include path at runtime? + +Here are the suggested ways of modifying your include path: + + the PERLLIB environment variable + the PERL5LIB environment variable + the perl -Idir commpand line flag + the use lib pragma, as in + use lib "$ENV{HOME}/myown_perllib"; + +The latter is particularly useful because it knows about machine +dependent architectures. The lib.pm pragmatic module was first +included with the 5.002 release of Perl. + +=head1 How do I get one key from the terminal at a time, under POSIX? + + #!/usr/bin/perl -w + use strict; + $| = 1; + for (1..4) { + my $got; + print "gimme: "; + $got = getone(); + print "--> $got\n"; + } + exit; + + BEGIN { + use POSIX qw(:termios_h); + + my ($term, $oterm, $echo, $noecho, $fd_stdin); + + $fd_stdin = fileno(STDIN); + + $term = POSIX::Termios->new(); + $term->getattr($fd_stdin); + $oterm = $term->getlflag(); + + $echo = ECHO | ECHOK | ICANON; + $noecho = $oterm & ~$echo; + + sub cbreak { + $term->setlflag($noecho); + $term->setcc(VTIME, 1); + $term->setattr($fd_stdin, TCSANOW); + } + + sub cooked { + $term->setlflag($oterm); + $term->setcc(VTIME, 0); + $term->setattr($fd_stdin, TCSANOW); + } + + sub getone { + my $key = ''; + cbreak(); + sysread(STDIN, $key, 1); + cooked(); + return $key; + } + + } + END { cooked() } + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + END-of-perlfaq8.pod +echo x - perlfaq9.pod +sed 's/^X//' >perlfaq9.pod << 'END-of-perlfaq9.pod' +=head1 NAME + +perlfaq9 - Networking ($Revision: 1.17 $, $Date: 1997/04/24 22:44:29 $) + +=head1 DESCRIPTION + +This section deals with questions related to networking, the internet, +and a few on the web. + +=head2 My CGI script runs from the command line but not the browser. Can you help me fix it? + +Sure, but you probably can't afford our contracting rates :-) + +Seriously, if you can demonstrate that you've read the following FAQs +and that your problem isn't something simple that can be easily +answered, you'll probably receive a courteous and useful reply to your +question if you post it on comp.infosystems.www.authoring.cgi (if it's +something to do with HTTP, HTML, or the CGI protocols). Questions that +appear to be Perl questions but are really CGI ones that are posted to +comp.lang.perl.misc may not be so well received. + +The useful FAQs are: + + http://www.perl.com/perl/faq/idiots-guide.html + http://www3.pair.com/webthing/docs/cgi/faqs/cgifaq.shtml + http://www.perl.com/perl/faq/perl-cgi-faq.html + http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html + http://www.boutell.com/faq/ + +=head2 How do I remove HTML from a string? + +The most correct way (albeit not the fastest) is to use HTML::Parse +from CPAN (part of the libwww-perl distribution, which is a must-have +module for all web hackers). + +Many folks attempt a simple-minded regular expression approach, like +C<s/E<lt>.*?E<gt>//g>, but that fails in many cases because the tags +may continue over line breaks, they may contain quoted angle-brackets, +or HTML comment may be present. Plus folks forget to convert +entities, like C<<> for example. + +Here's one "simple-minded" approach, that works for most files: + + #!/usr/bin/perl -p0777 + s/<(?:[^>'"]*|(['"]).*?\1)*>//gs + +If you want a more complete solution, see the 3-stage striphtml +program in +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/striphtml.gz +. + +=head2 How do I extract URLs? + +A quick but imperfect approach is + + #!/usr/bin/perl -n00 + # qxurl - tchrist@perl.com + print "$2\n" while m{ + < \s* + A \s+ HREF \s* = \s* (["']) (.*?) \1 + \s* > + }gsix; + +This version does not adjust relative URLs, understand alternate +bases, deal with HTML comments, deal with HREF and NAME attributes in +the same tag, or accept URLs themselves as arguments. It also runs +about 100x faster than a more "complete" solution using the LWP suite +of modules, such as the +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/xurl.gz +program. + +=head2 How do I download a file from the user's machine? How do I open a file on another machine? + +In the context of an HTML form, you can use what's known as +B<multipart/form-data> encoding. The CGI.pm module (available from +CPAN) supports this in the start_multipart_form() method, which isn't +the same as the startform() method. + +=head2 How do I make a pop-up menu in HTML? + +Use the B<E<lt>SELECTE<gt>> and B<E<lt>OPTIONE<gt>> tags. The CGI.pm +module (available from CPAN) supports this widget, as well as many +others, including some that it cleverly synthesizes on its own. + +=head2 How do I fetch an HTML file? + +One approach, if you have the lynx text-based HTML browser installed +on your system, is this: + + $html_code = `lynx -source $url`; + $text_data = `lynx -dump $url`; + +The libwww-perl (LWP) modules from CPAN provide a more powerful way to +do this. They work through proxies, and don't require lynx: + + # print HTML from a URL + use LWP::Simple; + getprint "http://www.sn.no/libwww-perl/"; + + # print ASCII from HTML from a URL + use LWP::Simple; + use HTML::Parse; + use HTML::FormatText; + my ($html, $ascii); + $html = get("http://www.perl.com/"); + defined $html + or die "Can't fetch HTML from http://www.perl.com/"; + $ascii = HTML::FormatText->new->format(parse_html($html)); + print $ascii; + +=head2 how do I decode or create those %-encodings on the web? + +Here's an example of decoding: + + $string = "http://altavista.digital.com/cgi-bin/query?pg=q&what=news&fmt=.&q=%2Bcgi-bin+%2Bperl.exe"; + $string =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; + +Encoding is a bit harder, because you can't just blindly change +all the non-alphanumunder character (C<\W>) into their hex escapes. +It's important that characters with special meaning like C</> and C<?> +I<not> be translated. Probably the easiest way to get this right is +to avoid reinventing the wheel and just use the URI::Escape module, +which is part of the libwww-perl package (LWP) available from CPAN. + +=head2 How do I redirect to another page? + +Instead of sending back a C<Content-Type> as the headers of your +reply, send back a C<Location:> header. Officially this should be a +C<URI:> header, so the CGI.pm module (available from CPAN) sends back +both: + + Location: http://www.domain.com/newpage + URI: http://www.domain.com/newpage + +Note that relative URLs in these headers can cause strange effects +because of "optimizations" that servers do. + +=head2 How do I put a password on my web pages? + +That depends. You'll need to read the documentation for your web +server, or perhaps check some of the other FAQs referenced above. + +=head2 How do I edit my .htpasswd and .htgroup files with Perl? + +The HTTPD::UserAdmin and HTTPD::GroupAdmin modules provide a +consistent OO interface to these files, regardless of how they're +stored. Databases may be text, dbm, Berkley DB or any database with a +DBI compatible driver. HTTPD::UserAdmin supports files used by the +`Basic' and `Digest' authentication schemes. Here's an example: + + use HTTPD::UserAdmin (); + HTTPD::UserAdmin + ->new(DB => "/foo/.htpasswd") + ->add($username => $password); + +=head2 How do I make sure users can't enter values into a form that cause my CGI script to do bad things? + +Read the CGI security FAQ, at +http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html, and the +Perl/CGI FAQ at +http://www.perl.com/CPAN/doc/FAQs/cgi/perl-cgi-faq.html. + +In brief: use tainting (see L<perlsec>), which makes sure that data +from outside your script (eg, CGI parameters) are never used in +C<eval> or C<system> calls. In addition to tainting, never use the +single-argument form of system() or exec(). Instead, supply the +command and arguments as a list, which prevents shell globbing. + +=head2 How do I parse an email header? + +For a quick-and-dirty solution, try this solution derived +from page 222 of the 2nd edition of "Programming Perl": + + $/ = ''; + $header = <MSG>; + $header =~ s/\n\s+/ /g; # merge continuation lines + %head = ( UNIX_FROM_LINE, split /^([-\w]+):\s*/m, $header ); + +That solution doesn't do well if, for example, you're trying to +maintain all the Received lines. A more complete approach is to use +the Mail::Header module from CPAN (part of the MailTools package). + +=head2 How do I decode a CGI form? + +A lot of people are tempted to code this up themselves, so you've +probably all seen a lot of code involving C<$ENV{CONTENT_LENGTH}> and +C<$ENV{QUERY_STRING}>. It's true that this can work, but there are +also a lot of versions of this floating around that are quite simply +broken! + +Please do not be tempted to reinvent the wheel. Instead, use the +CGI.pm or CGI_Lite.pm (available from CPAN), or if you're trapped in +the module-free land of perl1 .. perl4, you might look into cgi-lib.pl +(available from http://www.bio.cam.ac.uk/web/form.html). + +=head2 How do I check a valid email address? + +You can't. + +Without sending mail to the address and seeing whether it bounces (and +even then you face the halting problem), you cannot determine whether +an email address is valid. Even if you apply the email header +standard, you can have problems, because there are deliverable +addresses that aren't RFC-822 (the mail header standard) compliant, +and addresses that aren't deliverable which are compliant. + +Many are tempted to try to eliminate many frequently-invalid email +addresses with a simple regexp, such as +C</^[\w.-]+\@([\w.-]\.)+\w+$/>. However, this also throws out many +valid ones, and says nothing about potential deliverability, so is not +suggested. Instead, see +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/ckaddr.gz , +which actually checks against the full RFC spec (except for nested +comments), looks for addresses you may not wish to accept email to +(say, Bill Clinton or your postmaster), and then makes sure that the +hostname given can be looked up in DNS. It's not fast, but it works. + +Here's an alternative strategy used by many CGI script authors: Check +the email address with a simple regexp (such as the one above). If +the regexp matched the address, accept the address. If the regexp +didn't match the address, request confirmation from the user that the +email address they entered was correct. + +=head2 How do I decode a MIME/BASE64 string? + +The MIME-tools package (available from CPAN) handles this and a lot +more. Decoding BASE64 becomes as simple as: + + use MIME::base64; + $decoded = decode_base64($encoded); + +A more direct approach is to use the unpack() function's "u" +format after minor transliterations: + + tr#A-Za-z0-9+/##cd; # remove non-base64 chars + tr#A-Za-z0-9+/# -_#; # convert to uuencoded format + $len = pack("c", 32 + 0.75*length); # compute length byte + print unpack("u", $len . $_); # uudecode and print + +=head2 How do I return the user's email address? + +On systems that support getpwuid, the $E<lt> variable and the +Sys::Hostname module (which is part of the standard perl distribution), +you can probably try using something like this: + + use Sys::Hostname; + $address = sprintf('%s@%s', getpwuid($<), hostname); + +Company policies on email address can mean that this generates addresses +that the company's email system will not accept, so you should ask for +users' email addresses when this matters. Furthermore, not all systems +on which Perl runs are so forthcoming with this information as is Unix. + +The Mail::Util module from CPAN (part of the MailTools package) provides a +mailaddress() function that tries to guess the mail address of the user. +It makes a more intelligent guess than the code above, using information +given when the module was installed, but it could still be incorrect. +Again, the best way is often just to ask the user. + +=head2 How do I send/read mail? + +Sending mail: the Mail::Mailer module from CPAN (part of the MailTools +package) is UNIX-centric, while Mail::Internet uses Net::SMTP which is +not UNIX-centric. Reading mail: use the Mail::Folder module from CPAN +(part of the MailFolder package) or the Mail::Internet module from +CPAN (also part of the MailTools package). + + # sending mail + use Mail::Internet; + use Mail::Header; + # say which mail host to use + $ENV{SMTPHOSTS} = 'mail.frii.com'; + # create headers + $header = new Mail::Header; + $header->add('From', 'gnat@frii.com'); + $header->add('Subject', 'Testing'); + $header->add('To', 'gnat@frii.com'); + # create body + $body = 'This is a test, ignore'; + # create mail object + $mail = new Mail::Internet(undef, Header => $header, Body => \[$body]); + # send it + $mail->smtpsend or die; + +=head2 How do I find out my hostname/domainname/IP address? + +A lot of code has historically cavalierly called the C<`hostname`> +program. While sometimes expedient, this isn't very portable. It's +one of those tradeoffs of convenience versus portability. + +The Sys::Hostname module (part of the standard perl distribution) will +give you the hostname after which you can find out the IP address +(assuming you have working DNS) with a gethostbyname() call. + + use Socket; + use Sys::Hostname; + my $host = hostname(); + my $addr = inet_ntoa(scalar(gethostbyname($name)) || 'localhost'); + +Probably the simplest way to learn your DNS domain name is to grok +it out of /etc/resolv.conf, at least under Unix. Of course, this +assumes several things about your resolv.conf configuration, including +that it exists. + +(We still need a good DNS domain name-learning method for non-Unix +systems.) + +=head2 How do I fetch a news article or the active newsgroups? + +Use the Net::NNTP or News::NNTPClient modules, both available from CPAN. +This can make tasks like fetching the newsgroup list as simple as: + + perl -MNews::NNTPClient + -e 'print News::NNTPClient->new->list("newsgroups")' + +=head2 How do I fetch/put an FTP file? + +LWP::Simple (available from CPAN) can fetch but not put. Net::FTP (also +available from CPAN) is more complex but can put as well as fetch. + +=head2 How can I do RPC in Perl? + +A DCE::RPC module is being developed (but is not yet available), and +will be released as part of the DCE-Perl package (available from +CPAN). No ONC::RPC module is known. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + diff --git a/pod/perlfaq9.pod b/pod/perlfaq9.pod new file mode 100644 index 0000000000..d7faca02e3 --- /dev/null +++ b/pod/perlfaq9.pod @@ -0,0 +1,331 @@ +=head1 NAME + +perlfaq9 - Networking ($Revision: 1.16 $, $Date: 1997/04/23 18:12:06 $) + +=head1 DESCRIPTION + +This section deals with questions related to networking, the internet, +and a few on the web. + +=head2 My CGI script runs from the command line but not the browser. Can you help me fix it? + +Sure, but you probably can't afford our contracting rates :-) + +Seriously, if you can demonstrate that you've read the following FAQs +and that your problem isn't something simple that can be easily +answered, you'll probably receive a courteous and useful reply to your +question if you post it on comp.infosystems.www.authoring.cgi (if it's +something to do with HTTP, HTML, or the CGI protocols). Questions that +appear to be Perl questions but are really CGI ones that are posted to +comp.lang.perl.misc may not be so well received. + +The useful FAQs are: + + http://www.perl.com/perl/faq/idiots-guide.html + http://www3.pair.com/webthing/docs/cgi/faqs/cgifaq.shtml + http://www.perl.com/perl/faq/perl-cgi-faq.html + http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html + http://www.boutell.com/faq/ + +=head2 How do I remove HTML from a string? + +The most correct way (albeit not the fastest) is to use HTML::Parse +from CPAN (part of the libwww-perl distribution, which is a must-have +module for all web hackers). + +Many folks attempt a simple-minded regular expression approach, like +C<s/E<lt>.*?E<gt>//g>, but that fails in many cases because the tags +may continue over line breaks, they may contain quoted angle-brackets, +or HTML comment may be present. Plus folks forget to convert +entities, like C<<> for example. + +Here's one "simple-minded" approach, that works for most files: + + #!/usr/bin/perl -p0777 + s/<(?:[^>'"]*|(['"]).*?\1)*>//gs + +If you want a more complete solution, see the 3-stage striphtml +program in +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/striphtml.gz +. + +=head2 How do I extract URLs? + +A quick but imperfect approach is + + #!/usr/bin/perl -n00 + # qxurl - tchrist@perl.com + print "$2\n" while m{ + < \s* + A \s+ HREF \s* = \s* (["']) (.*?) \1 + \s* > + }gsix; + +This version does not adjust relative URLs, understand alternate +bases, deal with HTML comments, deal with HREF and NAME attributes in +the same tag, or accept URLs themselves as arguments. It also runs +about 100x faster than a more "complete" solution using the LWP suite +of modules, such as the +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/xurl.gz +program. + +=head2 How do I download a file from the user's machine? How do I open a file on another machine? + +In the context of an HTML form, you can use what's known as +B<multipart/form-data> encoding. The CGI.pm module (available from +CPAN) supports this in the start_multipart_form() method, which isn't +the same as the startform() method. + +=head2 How do I make a pop-up menu in HTML? + +Use the B<E<lt>SELECTE<gt>> and B<E<lt>OPTIONE<gt>> tags. The CGI.pm +module (available from CPAN) supports this widget, as well as many +others, including some that it cleverly synthesizes on its own. + +=head2 How do I fetch an HTML file? + +One approach, if you have the lynx text-based HTML browser installed +on your system, is this: + + $html_code = `lynx -source $url`; + $text_data = `lynx -dump $url`; + +The libwww-perl (LWP) modules from CPAN provide a more powerful way to +do this. They work through proxies, and don't require lynx: + + # print HTML from a URL + use LWP::Simple; + getprint "http://www.sn.no/libwww-perl/"; + + # print ASCII from HTML from a URL + use LWP::Simple; + use HTML::Parse; + use HTML::FormatText; + my ($html, $ascii); + $html = get("http://www.perl.com/"); + defined $html + or die "Can't fetch HTML from http://www.perl.com/"; + $ascii = HTML::FormatText->new->format(parse_html($html)); + print $ascii; + +=head2 how do I decode or create those %-encodings on the web? + +Here's an example of decoding: + + $string = "http://altavista.digital.com/cgi-bin/query?pg=q&what=news&fmt=.&q=%2Bcgi-bin+%2Bperl.exe"; + $string =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; + +Encoding is a bit harder, because you can't just blindly change +all the non-alphanumunder character (C<\W>) into their hex escapes. +It's important that characters with special meaning like C</> and C<?> +I<not> be translated. Probably the easiest way to get this right is +to avoid reinventing the wheel and just use the URI::Escape module, +which is part of the libwww-perl package (LWP) available from CPAN. + +=head2 How do I redirect to another page? + +Instead of sending back a C<Content-Type> as the headers of your +reply, send back a C<Location:> header. Officially this should be a +C<URI:> header, so the CGI.pm module (available from CPAN) sends back +both: + + Location: http://www.domain.com/newpage + URI: http://www.domain.com/newpage + +Note that relative URLs in these headers can cause strange effects +because of "optimizations" that servers do. + +=head2 How do I put a password on my web pages? + +That depends. You'll need to read the documentation for your web +server, or perhaps check some of the other FAQs referenced above. + +=head2 How do I edit my .htpasswd and .htgroup files with Perl? + +The HTTPD::UserAdmin and HTTPD::GroupAdmin modules provide a +consistent OO interface to these files, regardless of how they're +stored. Databases may be text, dbm, Berkley DB or any database with a +DBI compatible driver. HTTPD::UserAdmin supports files used by the +`Basic' and `Digest' authentication schemes. Here's an example: + + use HTTPD::UserAdmin (); + HTTPD::UserAdmin + ->new(DB => "/foo/.htpasswd") + ->add($username => $password); + +=head2 How do I make sure users can't enter values into a form that cause my CGI script to do bad things? + +Read the CGI security FAQ, at +http://www-genome.wi.mit.edu/WWW/faqs/www-security-faq.html, and the +Perl/CGI FAQ at +http://www.perl.com/CPAN/doc/FAQs/cgi/perl-cgi-faq.html. + +In brief: use tainting (see L<perlsec>), which makes sure that data +from outside your script (eg, CGI parameters) are never used in +C<eval> or C<system> calls. In addition to tainting, never use the +single-argument form of system() or exec(). Instead, supply the +command and arguments as a list, which prevents shell globbing. + +=head2 How do I parse an email header? + +For a quick-and-dirty solution, try this solution derived +from page 222 of the 2nd edition of "Programming Perl": + + $/ = ''; + $header = <MSG>; + $header =~ s/\n\s+/ /g; # merge continuation lines + %head = ( UNIX_FROM_LINE, split /^([-\w]+):\s*/m, $header ); + +That solution doesn't do well if, for example, you're trying to +maintain all the Received lines. A more complete approach is to use +the Mail::Header module from CPAN (part of the MailTools package). + +=head2 How do I decode a CGI form? + +A lot of people are tempted to code this up themselves, so you've +probably all seen a lot of code involving C<$ENV{CONTENT_LENGTH}> and +C<$ENV{QUERY_STRING}>. It's true that this can work, but there are +also a lot of versions of this floating around that are quite simply +broken! + +Please do not be tempted to reinvent the wheel. Instead, use the +CGI.pm or CGI_Lite.pm (available from CPAN), or if you're trapped in +the module-free land of perl1 .. perl4, you might look into cgi-lib.pl +(available from http://www.bio.cam.ac.uk/web/form.html). + +=head2 How do I check a valid email address? + +You can't. + +Without sending mail to the address and seeing whether it bounces (and +even then you face the halting problem), you cannot determine whether +an email address is valid. Even if you apply the email header +standard, you can have problems, because there are deliverable +addresses that aren't RFC-822 (the mail header standard) compliant, +and addresses that aren't deliverable which are compliant. + +Many are tempted to try to eliminate many frequently-invalid email +addresses with a simple regexp, such as +C</^[\w.-]+\@([\w.-]\.)+\w+$/>. However, this also throws out many +valid ones, and says nothing about potential deliverability, so is not +suggested. Instead, see +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/ckaddr.gz , +which actually checks against the full RFC spec (except for nested +comments), looks for addresses you may not wish to accept email to +(say, Bill Clinton or your postmaster), and then makes sure that the +hostname given can be looked up in DNS. It's not fast, but it works. + +Here's an alternative strategy used by many CGI script authors: Check +the email address with a simple regexp (such as the one above). If +the regexp matched the address, accept the address. If the regexp +didn't match the address, request confirmation from the user that the +email address they entered was correct. + +=head2 How do I decode a MIME/BASE64 string? + +The MIME-tools package (available from CPAN) handles this and a lot +more. Decoding BASE64 becomes as simple as: + + use MIME::base64; + $decoded = decode_base64($encoded); + +A more direct approach is to use the unpack() function's "u" +format after minor transliterations: + + tr#A-Za-z0-9+/##cd; # remove non-base64 chars + tr#A-Za-z0-9+/# -_#; # convert to uuencoded format + $len = pack("c", 32 + 0.75*length); # compute length byte + print unpack("u", $len . $_); # uudecode and print + +=head2 How do I return the user's email address? + +On systems that support getpwuid, the $E<lt> variable and the +Sys::Hostname module (which is part of the standard perl distribution), +you can probably try using something like this: + + use Sys::Hostname; + $address = sprintf('%s@%s', getpwuid($<), hostname); + +Company policies on email address can mean that this generates addresses +that the company's email system will not accept, so you should ask for +users' email addresses when this matters. Furthermore, not all systems +on which Perl runs are so forthcoming with this information as is Unix. + +The Mail::Util module from CPAN (part of the MailTools package) provides a +mailaddress() function that tries to guess the mail address of the user. +It makes a more intelligent guess than the code above, using information +given when the module was installed, but it could still be incorrect. +Again, the best way is often just to ask the user. + +=head2 How do I send/read mail? + +Sending mail: the Mail::Mailer module from CPAN (part of the MailTools +package) is UNIX-centric, while Mail::Internet uses Net::SMTP which is +not UNIX-centric. Reading mail: use the Mail::Folder module from CPAN +(part of the MailFolder package) or the Mail::Internet module from +CPAN (also part of the MailTools package). + + # sending mail + use Mail::Internet; + use Mail::Header; + # say which mail host to use + $ENV{SMTPHOSTS} = 'mail.frii.com'; + # create headers + $header = new Mail::Header; + $header->add('From', 'gnat@frii.com'); + $header->add('Subject', 'Testing'); + $header->add('To', 'gnat@frii.com'); + # create body + $body = 'This is a test, ignore'; + # create mail object + $mail = new Mail::Internet(undef, Header => $header, Body => \[$body]); + # send it + $mail->smtpsend or die; + +=head2 How do I find out my hostname/domainname/IP address? + +A lot of code has historically cavalierly called the C<`hostname`> +program. While sometimes expedient, this isn't very portable. It's +one of those tradeoffs of convenience versus portability. + +The Sys::Hostname module (part of the standard perl distribution) will +give you the hostname after which you can find out the IP address +(assuming you have working DNS) with a gethostbyname() call. + + use Socket; + use Sys::Hostname; + my $host = hostname(); + my $addr = inet_ntoa(scalar(gethostbyname($host || 'localhost'))); + +Probably the simplest way to learn your DNS domain name is to grok +it out of /etc/resolv.conf, at least under Unix. Of course, this +assumes several things about your resolv.conf configuration, including +that it exists. + +(We still need a good DNS domain name-learning method for non-Unix +systems.) + +=head2 How do I fetch a news article or the active newsgroups? + +Use the Net::NNTP or News::NNTPClient modules, both available from CPAN. +This can make tasks like fetching the newsgroup list as simple as: + + perl -MNews::NNTPClient + -e 'print News::NNTPClient->new->list("newsgroups")' + +=head2 How do I fetch/put an FTP file? + +LWP::Simple (available from CPAN) can fetch but not put. Net::FTP (also +available from CPAN) is more complex but can put as well as fetch. + +=head2 How can I do RPC in Perl? + +A DCE::RPC module is being developed (but is not yet available), and +will be released as part of the DCE-Perl package (available from +CPAN). No ONC::RPC module is known. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1997 Tom Christiansen and Nathan Torkington. +All rights reserved. See L<perlfaq> for distribution information. + diff --git a/pod/perlform.pod b/pod/perlform.pod index cf0bc068f1..7e540b8ff6 100644 --- a/pod/perlform.pod +++ b/pod/perlform.pod @@ -5,20 +5,20 @@ perlform - Perl formats =head1 DESCRIPTION Perl has a mechanism to help you generate simple reports and charts. To -facilitate this, Perl helps you code up your output page -close to how it will look when it's printed. It can keep -track of things like how many lines on a page, what page you're on, when to -print page headers, etc. Keywords are borrowed from FORTRAN: -format() to declare and write() to execute; see their entries in -L<perlfunc>. Fortunately, the layout is much more legible, more like -BASIC's PRINT USING statement. Think of it as a poor man's nroff(1). - -Formats, like packages and subroutines, are declared rather than executed, -so they may occur at any point in your program. (Usually it's best to -keep them all together though.) They have their own namespace apart from -all the other "types" in Perl. This means that if you have a function -named "Foo", it is not the same thing as having a format named "Foo". -However, the default name for the format associated with a given +facilitate this, Perl helps you code up your output page close to how it +will look when it's printed. It can keep track of things like how many +lines are on a page, what page you're on, when to print page headers, +etc. Keywords are borrowed from FORTRAN: format() to declare and write() +to execute; see their entries in L<perlfunc>. Fortunately, the layout is +much more legible, more like BASIC's PRINT USING statement. Think of it +as a poor man's nroff(1). + +Formats, like packages and subroutines, are declared rather than +executed, so they may occur at any point in your program. (Usually it's +best to keep them all together though.) They have their own namespace +apart from all the other "types" in Perl. This means that if you have a +function named "Foo", it is not the same thing as having a format named +"Foo". However, the default name for the format associated with a given filehandle is the same as the name of the filehandle. Thus, the default format for STDOUT is name "STDOUT", and the default format for filehandle TEMP is name "TEMP". They just look the same. They aren't. @@ -29,8 +29,8 @@ Output record formats are declared as follows: FORMLIST . -If name is omitted, format "STDOUT" is defined. FORMLIST consists of a -sequence of lines, each of which may be of one of three types: +If name is omitted, format "STDOUT" is defined. FORMLIST consists of +a sequence of lines, each of which may be one of three types: =over 4 @@ -54,7 +54,7 @@ with either "@" (at) or "^" (caret). These lines do not undergo any kind of variable interpolation. The at field (not to be confused with the array marker @) is the normal kind of field; the other kind, caret fields, are used to do rudimentary multi-line text block filling. The length of the field -is supplied by padding out the field with multiple "<", ">", or "|" +is supplied by padding out the field with multiple "E<lt>", "E<gt>", or "|" characters to specify, respectively, left justification, right justification, or centering. If the variable would exceed the width specified, it is truncated. @@ -64,7 +64,7 @@ characters (with an optional ".") to specify a numeric field. This way you can line up the decimal points. If any value supplied for these fields contains a newline, only the text up to the newline is printed. Finally, the special field "@*" can be used for printing multi-line, -non-truncated values; it should appear by itself on a line. +nontruncated values; it should appear by itself on a line. The values are specified on the following line in the same order as the picture fields. The expressions providing the values should be @@ -72,7 +72,14 @@ separated by commas. The expressions are all evaluated in a list context before the line is processed, so a single list expression could produce multiple list elements. The expressions may be spread out to more than one line if enclosed in braces. If so, the opening brace must be the first -token on the first line. +token on the first line. If an expression evaluates to a number with a +decimal part, and if the corresponding picture specifies that the decimal +part should appear in the output (that is, any picture except multiple "#" +characters B<without> an embedded "."), the character used for the decimal +point is B<always> determined by the current LC_NUMERIC locale. This +means that, if, for example, the run-time environment happens to specify a +German locale, "," will be used instead of the default ".". See +L<perllocale> and L<"WARNINGS"> for more information. Picture fields that begin with ^ rather than @ are treated specially. With a # field, the field is blanked out if the value is undefined. For @@ -98,9 +105,9 @@ first, the line will be repeated until all the fields on the line are exhausted. (If you use a field of the at variety, the expression you supply had better not give the same value every time forever!) -Top-of-form processing is by default handled by a format with the +Top-of-form processing is by default handled by a format with the same name as the current filehandle with "_TOP" concatenated to it. -It's triggered at the top of each page. See <perlfunc/write()>. +It's triggered at the top of each page. See L<perlfunc/write>. Examples: @@ -147,22 +154,22 @@ Examples: . It is possible to intermix print()s with write()s on the same output -channel, but you'll have to handle $- ($FORMAT_LINES_LEFT) +channel, but you'll have to handle C<$-> (C<$FORMAT_LINES_LEFT>) yourself. =head2 Format Variables -The current format name is stored in the variable C<$~> ($FORMAT_NAME), -and the current top of form format name is in C<$^> ($FORMAT_TOP_NAME). -The current output page number is stored in C<$%> ($FORMAT_PAGE_NUMBER), -and the number of lines on the page is in C<$=> ($FORMAT_LINES_PER_PAGE). +The current format name is stored in the variable C<$~> (C<$FORMAT_NAME>), +and the current top of form format name is in C<$^> (C<$FORMAT_TOP_NAME>). +The current output page number is stored in C<$%> (C<$FORMAT_PAGE_NUMBER>), +and the number of lines on the page is in C<$=> (C<$FORMAT_LINES_PER_PAGE>). Whether to autoflush output on this handle is stored in C<$|> -($OUTPUT_AUTOFLUSH). The string output before each top of page (except -the first) is stored in C<$^L> ($FORMAT_FORMFEED). These variables are +(C<$OUTPUT_AUTOFLUSH>). The string output before each top of page (except +the first) is stored in C<$^L> (C<$FORMAT_FORMFEED>). These variables are set on a per-filehandle basis, so you'll need to select() into a different one to affect them: - select((select(OUTF), + select((select(OUTF), $~ = "My_Other_Format", $^ = "My_Top_Format" )[0]); @@ -187,7 +194,7 @@ If you use the English module, you can even read the variable names: select($ofh); But you still have those funny select()s. So just use the FileHandle -module. Now, you can access these special variables using lower-case +module. Now, you can access these special variables using lowercase method names instead: use FileHandle; @@ -198,25 +205,25 @@ Much better! =head1 NOTES -Since the values line may contain arbitrary expressions (for at fields, +Because the values line may contain arbitrary expressions (for at fields, not caret fields), you can farm out more sophisticated processing to other functions, like sprintf() or one of your own. For example: - format Ident = + format Ident = @<<<<<<<<<<<<<<< &commify($n) . To get a real at or caret into the field, do this: - format Ident = + format Ident = I have an @ here. "@" . To center a whole line of text, do something like this: - format Ident = + format Ident = @||||||||||||||||||||||||||||||||||||||||||||||| "Some text line" . @@ -233,12 +240,12 @@ on the current number of columns, and then eval() it: . '$entry' . "\n"; . ".\n"; print $format if $Debugging; - eval $format; + eval $format; die $@ if $@; Which would generate a format looking something like this: - format STDOUT = + format STDOUT = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $entry ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ @@ -247,7 +254,7 @@ Which would generate a format looking something like this: Here's a little program that's somewhat like fmt(1): - format = + format = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ~~ $_ @@ -257,7 +264,7 @@ Here's a little program that's somewhat like fmt(1): while (<>) { s/\s*\n\s*/ /g; write; - } + } =head2 Footers @@ -270,10 +277,10 @@ Here's one strategy: If you have a fixed-size footer, you can get footers by checking $FORMAT_LINES_LEFT before each write() and print the footer yourself if necessary. -Here's another strategy; open a pipe to yourself, using C<open(MESELF, "|-")> -(see L<perlfunc/open()>) and always write() to MESELF instead of -STDOUT. Have your child process postprocesses its STDIN to rearrange -headers and footers however you like. Not very convenient, but doable. +Here's another strategy: Open a pipe to yourself, using C<open(MYSELF, "|-")> +(see L<perlfunc/open()>) and always write() to MYSELF instead of STDOUT. +Have your child process massage its STDIN to rearrange headers and footers +however you like. Not very convenient, but doable. =head2 Accessing Formatting Internals @@ -298,7 +305,7 @@ is to printf(), do this: $^A = ""; formline($format,@_); return $^A; - } + } $string = swrite(<<'END', 1, 2, 3); Check me out @@ -306,10 +313,25 @@ is to printf(), do this: END print $string; -=head1 WARNING +=head1 WARNINGS + +The lone dot that ends a format can also prematurely end an email +message passing through a misconfigured Internet mailer (and based on +experience, such misconfiguration is the rule, not the exception). So +when sending format code through email, you should indent it so that +the format-ending dot is not on the left margin; this will prevent +email cutoff. Lexical variables (declared with "my") are not visible within a format unless the format is declared within the scope of the lexical -variable. (They weren't visible at all before version 5.001.) Furthermore, -lexical aliases will not be compiled correctly: see -L<perlfunc/my> for other issues. +variable. (They weren't visible at all before version 5.001.) + +Formats are the only part of Perl which unconditionally use information +from a program's locale; if a program's environment specifies an +LC_NUMERIC locale, it is always used to specify the decimal point +character in formatted output. Perl ignores all other aspects of locale +handling unless the C<use locale> pragma is in effect. Formatted output +cannot be controlled by C<use locale> because the pragma is tied to the +block structure of the program, and, for historical reasons, formats +exist outside that block structure. See L<perllocale> for further +discussion of locale handling. diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 28b5442e90..e3c4c9546d 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -14,8 +14,8 @@ a unary operator, but merely separates the arguments of a list operator. A unary operator generally provides a scalar context to its argument, while a list operator may provide either scalar and list contexts for its arguments. If it does both, the scalar arguments will -be first, and the list argument will follow. (Note that there can only -ever be one list argument.) For instance, splice() has three scalar +be first, and the list argument will follow. (Note that there can ever +be only one list argument.) For instance, splice() has three scalar arguments followed by a list. In the syntax descriptions that follow, list operators that expect a @@ -28,18 +28,18 @@ Elements of the LIST should be separated by commas. Any function in the list below may be used either with or without parentheses around its arguments. (The syntax descriptions omit the -parens.) If you use the parens, the simple (but occasionally +parentheses.) If you use the parentheses, the simple (but occasionally surprising) rule is this: It I<LOOKS> like a function, therefore it I<IS> a function, and precedence doesn't matter. Otherwise it's a list operator or unary operator, and precedence does matter. And whitespace between the function and left parenthesis doesn't count--so you need to be careful sometimes: - print 1+2+3; # Prints 6. - print(1+2) + 3; # Prints 3. - print (1+2)+3; # Also prints 3! - print +(1+2)+3; # Prints 6. - print ((1+2)+3); # Prints 6. + print 1+2+4; # Prints 7. + print(1+2) + 4; # Prints 3. + print (1+2)+4; # Also prints 3! + print +(1+2)+4; # Prints 7. + print ((1+2)+4); # Prints 7. If you run Perl with the B<-w> switch it can warn you about this. For example, the third line above produces: @@ -48,7 +48,7 @@ example, the third line above produces: Useless use of integer addition in void context at - line 1. For functions that can be used in either a scalar or list context, -non-abortive failure is generally indicated in a scalar context by +nonabortive failure is generally indicated in a scalar context by returning the undefined value, and in a list context by returning the null list. @@ -56,9 +56,7 @@ Remember the following rule: =over 8 -=item - -I<THERE IS NO GENERAL RULE FOR CONVERTING A LIST INTO A SCALAR!> +=item I<THERE IS NO GENERAL RULE FOR CONVERTING A LIST INTO A SCALAR!> =back @@ -110,7 +108,7 @@ delete, each, exists, keys, values binmode, close, closedir, dbmclose, dbmopen, die, eof, fileno, flock, format, getc, print, printf, read, readdir, -rewinddir, seek, seekdir, select, syscall, sysread, +rewinddir, seek, seekdir, select, syscall, sysread, sysseek, syswrite, tell, telldir, truncate, warn, write =item Functions for fixed length data or records @@ -119,7 +117,7 @@ pack, read, syscall, sysread, syswrite, unpack, vec =item Functions for filehandles, files, or directories --X, chdir, chmod, chown, chroot, fcntl, glob, ioctl, link, +I<-X>, chdir, chmod, chown, chroot, fcntl, glob, ioctl, link, lstat, mkdir, open, opendir, readlink, rename, rmdir, stat, symlink, umask, unlink, utime @@ -128,7 +126,7 @@ stat, symlink, umask, unlink, utime caller, continue, die, do, dump, eval, exit, goto, last, next, redo, return, sub, wantarray -=item Keywords related to scoping +=item Keywords related to scoping caller, import, local, my, package, use @@ -183,8 +181,8 @@ gmtime, localtime, time, times =item Functions new in perl5 abs, bless, chomp, chr, exists, formline, glob, import, lc, -lcfirst, map, my, no, qx, qw, ref, sub*, sysopen, tie, tied, uc, -ucfirst, untie, use +lcfirst, map, my, no, prototype, qx, qw, readline, readpipe, +ref, sub*, sysopen, tie, tied, uc, ucfirst, untie, use * - C<sub> was a keyword in perl4, but in perl5 it is an operator which can be used in expressions. @@ -193,12 +191,10 @@ operator which can be used in expressions. dbmclose, dbmopen - =back =head2 Alphabetical Listing of Perl Functions - =over 8 =item -X FILEHANDLE @@ -229,7 +225,7 @@ operator may be any of: -e File exists. -z File has zero size. - -s File has non-zero size (returns size). + -s File has nonzero size (returns size). -f File is a plain file. -d File is a directory. @@ -252,12 +248,12 @@ operator may be any of: -C Same for inode change time. The interpretation of the file permission operators C<-r>, C<-R>, C<-w>, -C<-W>, C<-x> and C<-X> is based solely on the mode of the file and the +C<-W>, C<-x>, and C<-X> is based solely on the mode of the file and the uids and gids of the user. There may be other reasons you can't actually read, write or execute the file. Also note that, for the superuser, -C<-r>, C<-R>, C<-w> and C<-W> always return 1, and C<-x> and C<-X> return +C<-r>, C<-R>, C<-w>, and C<-W> always return 1, and C<-x> and C<-X> return 1 if any execute bit is set in the mode. Scripts run by the superuser may -thus need to do a stat() in order to determine the actual mode of the +thus need to do a stat() to determine the actual mode of the file, or temporarily set the uid to something else. Example: @@ -274,17 +270,17 @@ following a minus are interpreted as file tests. The C<-T> and C<-B> switches work as follows. The first block or so of the file is examined for odd characters such as strange control codes or -characters with the high bit set. If too many odd characters (>30%) +characters with the high bit set. If too many odd characters (E<gt>30%) are found, it's a C<-B> file, otherwise it's a C<-T> file. Also, any file containing null in the first block is considered a binary file. If C<-T> or C<-B> is used on a filehandle, the current stdio buffer is examined rather than the first block. Both C<-T> and C<-B> return TRUE on a null -file, or a file at EOF when testing a filehandle. Because you have to +file, or a file at EOF when testing a filehandle. Because you have to read a file to do the C<-T> test, on most occasions you want to use a C<-f> against the file first, as in C<next unless -f $file && -T $file>. -If any of the file tests (or either the stat() or lstat() operators) are given the -special filehandle consisting of a solitary underline, then the stat +If any of the file tests (or either the stat() or lstat() operators) are given +the special filehandle consisting of a solitary underline, then the stat structure of the previous file test (or stat operator) is used, saving a system call. (This doesn't work with C<-t>, and you need to remember that lstat() and C<-l> will leave values in the stat structure for the @@ -304,7 +300,10 @@ symbolic link, not the real file.) Example: =item abs VALUE +=item abs + Returns the absolute value of its argument. +If VALUE is omitted, uses $_. =item accept NEWSOCKET,GENERICSOCKET @@ -314,8 +313,11 @@ See example in L<perlipc/"Sockets: Client/Server Communication">. =item alarm SECONDS +=item alarm + Arranges to have a SIGALRM delivered to this process after the -specified number of seconds have elapsed. (On some machines, +specified number of seconds have elapsed. If SECONDS is not specified, +the value stored in $_ is used. (On some machines, unfortunately, the elapsed time may be up to one second less than you specified because of how seconds are counted.) Only one timer may be counting at once. Each call disables the previous timer, and an @@ -324,14 +326,38 @@ starting a new one. The returned value is the amount of time remaining on the previous timer. For delays of finer granularity than one second, you may use Perl's -syscall() interface to access setitimer(2) if your system supports it, -or else see L</select()> below. It is not advised to intermix alarm() +syscall() interface to access setitimer(2) if your system supports it, +or else see L</select()>. It is usually a mistake to intermix alarm() and sleep() calls. +If you want to use alarm() to time out a system call you need to use an +eval/die pair. You can't rely on the alarm causing the system call to +fail with $! set to EINTR because Perl sets up signal handlers to +restart system calls on some systems. Using eval/die always works. + + eval { + local $SIG{ALRM} = sub { die "alarm\n" }; # NB \n required + alarm $timeout; + $nread = sysread SOCKET, $buffer, $size; + alarm 0; + }; + die if $@ && $@ ne "alarm\n"; # propagate errors + if ($@) { + # timed out + } + else { + # didn't + } + =item atan2 Y,X Returns the arctangent of Y/X in the range -PI to PI. +For the tangent operation, you may use the POSIX::tan() +function, or use the familiar relation: + + sub tan { sin($_[0]) / cos($_[0]) } + =item bind SOCKET,NAME Binds a network address to a socket, just as the bind system call @@ -344,9 +370,9 @@ L<perlipc/"Sockets: Client/Server Communication">. Arranges for the file to be read or written in "binary" mode in operating systems that distinguish between binary and text files. Files that are not in binary mode have CR LF sequences translated to LF on input and LF -translated to CR LF on output. Binmode has no effect under Unix; in DOS +translated to CR LF on output. Binmode has no effect under Unix; in MS-DOS and similarly archaic systems, it may be imperative--otherwise your -DOS-damaged C library may mangle your file. The key distinction between +MS-DOS-damaged C library may mangle your file. The key distinction between systems that need binmode and those that don't is their text file formats. Systems like Unix and Plan9 that delimit lines with a single character, and that encode that character in C as '\n', do not need @@ -357,10 +383,10 @@ is taken as the name of the filehandle. =item bless REF -This function tells the referenced object (passed as REF) that it is now +This function tells the thingy referenced by REF that it is now an object in the CLASSNAME package--or the current package if no CLASSNAME is specified, which is often the case. It returns the reference for -convenience, since a bless() is often the last thing in a constructor. +convenience, because a bless() is often the last thing in a constructor. Always use the two-argument version if the function doing the blessing might be inherited by a derived class. See L<perlobj> for more about the blessing (and blessings) of objects. @@ -370,8 +396,9 @@ blessing (and blessings) of objects. =item caller Returns the context of the current subroutine call. In a scalar context, -returns TRUE if there is a caller, that is, if we're in a subroutine or -eval() or require(), and FALSE otherwise. In a list context, returns +returns the caller's package name if there is a caller, that is, if +we're in a subroutine or eval() or require(), and the undefined value +otherwise. In a list context, returns ($package, $filename, $line) = caller; @@ -379,12 +406,21 @@ With EXPR, it returns some extra information that the debugger uses to print a stack trace. The value of EXPR indicates how many call frames to go back before the current one. - ($package, $filename, $line, - $subroutine, $hasargs, $wantargs) = caller($i); + ($package, $filename, $line, $subroutine, + $hasargs, $wantarray, $evaltext, $is_require) = caller($i); + +Here $subroutine may be C<"(eval)"> if the frame is not a subroutine +call, but an C<eval>. In such a case additional elements $evaltext and +$is_require are set: $is_require is true if the frame is created by a +C<require> or C<use> statement, $evaltext contains the text of the +C<eval EXPR> statement. In particular, for a C<eval BLOCK> statement, +$filename is C<"(eval)">, but $evaltext is undefined. (Note also that +each C<use> statement creates a C<require> frame inside an C<eval EXPR>) +frame. Furthermore, when called from within the DB package, caller returns more detailed information: it sets the list variable @DB::args to be the -arguments with which that subroutine was invoked. +arguments with which the subroutine was invoked. =item chdir EXPR @@ -396,10 +432,15 @@ otherwise. See example under die(). Changes the permissions of a list of files. The first element of the list must be the numerical mode, which should probably be an octal -number. Returns the number of files successfully changed. +number, and which definitely should I<not> a string of octal digits: +C<0644> is okay, C<'0644'> is not. Returns the number of files +successfully changed. See also L</oct>, if all you have is a string. $cnt = chmod 0755, 'foo', 'bar'; chmod 0755, @executables; + $mode = '0644'; chmod $mode, 'foo'; # !!! sets mode to --w----r-T + $mode = '0644'; chmod oct($mode), 'foo'; # this is better + $mode = 0644; chmod $mode, 'foo'; # this is best =item chomp VARIABLE @@ -409,12 +450,12 @@ number. Returns the number of files successfully changed. This is a slightly safer version of chop (see below). It removes any line ending that corresponds to the current value of C<$/> (also known as -$INPUT_RECORD_SEPARATOR in the C<English> module). It returns the number -of characters removed. It's often used to remove the newline from the -end of an input record when you're worried that the final record may be -missing its newline. When in paragraph mode (C<$/ = "">), it removes all -trailing newlines from the string. If VARIABLE is omitted, it chomps -$_. Example: +$INPUT_RECORD_SEPARATOR in the C<English> module). It returns the total +number of characters removed from all its arguments. It's often used to +remove the newline from the end of an input record when you're worried +that the final record may be missing its newline. When in paragraph mode +(C<$/ = "">), it removes all trailing newlines from the string. If +VARIABLE is omitted, it chomps $_. Example: while (<>) { chomp; # avoid \n on last field @@ -468,7 +509,7 @@ Returns the number of files successfully changed. $cnt = chown $uid, $gid, 'foo', 'bar'; chown $uid, $gid, @filenames; -Here's an example that looks up non-numeric uids in the passwd file: +Here's an example that looks up nonnumeric uids in the passwd file: print "User: "; chop($user = <STDIN>); @@ -481,22 +522,28 @@ Here's an example that looks up non-numeric uids in the passwd file: @ary = <${pattern}>; # expand filenames chown $uid, $gid, @ary; -On most systems, you are not allowed to change the ownership of the +On most systems, you are not allowed to change the ownership of the file unless you're the superuser, although you should be able to change the group to any of your secondary groups. On insecure systems, these restrictions may be relaxed, but this is not a portable assumption. =item chr NUMBER +=item chr + Returns the character represented by that NUMBER in the character set. -For example, C<chr(65)> is "A" in ASCII. +For example, C<chr(65)> is "A" in ASCII. For the reverse, use L</ord>. + +If NUMBER is omitted, uses $_. =item chroot FILENAME +=item chroot + This function works as the system call by the same name: it makes the named directory the new root directory for all further pathnames that begin with a "/" by your process and all of its children. (It doesn't -change your current working directory is unaffected.) For security +change your current working directory, which is unaffected.) For security reasons, this call is restricted to the superuser. If FILENAME is omitted, does chroot to $_. @@ -504,8 +551,12 @@ omitted, does chroot to $_. Closes the file or pipe associated with the file handle, returning TRUE only if stdio successfully flushes buffers and closes the system file -descriptor. You don't have to close FILEHANDLE if you are immediately -going to do another open() on it, since open() will close it for you. (See +descriptor. If the file handle came from a piped open C<close> will +additionally return FALSE if one of the other system calls involved +fails or if the program exits with non-zero status. (If the problem was +that the program exited non-zero $! will be set to 0.) +You don't have to close FILEHANDLE if you are immediately +going to do another open() on it, because open() will close it for you. (See open().) However, an explicit close on an input file resets the line counter ($.), while the implicit close done by open() does not. Also, closing a pipe will wait for the process executing on the pipe to @@ -546,6 +597,11 @@ statement). Returns the cosine of EXPR (expressed in radians). If EXPR is omitted takes cosine of $_. +For the inverse cosine operation, you may use the POSIX::acos() +function, or use this relation: + + sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) } + =item crypt PLAINTEXT,SALT Encrypts a string exactly like the crypt(3) function in the C library @@ -554,6 +610,11 @@ extirpated as a potential munition). This can prove useful for checking the password file for lousy passwords, amongst other things. Only the guys wearing white hats should do this. +Note that crypt is intended to be a one-way function, much like breaking +eggs to make an omelette. There is no (known) corresponding decrypt +function. As a result, this function isn't all that useful for +cryptography. (For that, see your nearby CPAN mirror.) + Here's an example that makes sure that whoever runs this program knows their own password: @@ -570,36 +631,36 @@ their own password: die "Sorry...\n"; } else { print "ok\n"; - } + } -Of course, typing in your own password to whoever asks you +Of course, typing in your own password to whomever asks you for it is unwise. -=item dbmclose ASSOC_ARRAY +=item dbmclose HASH [This function has been superseded by the untie() function.] -Breaks the binding between a DBM file and an associative array. +Breaks the binding between a DBM file and a hash. -=item dbmopen ASSOC,DBNAME,MODE +=item dbmopen HASH,DBNAME,MODE [This function has been superseded by the tie() function.] -This binds a dbm(3), ndbm(3), sdbm(3), gdbm(), or Berkeley DB file to an -associative array. ASSOC is the name of the associative array. (Unlike -normal open, the first argument is I<NOT> a filehandle, even though it -looks like one). DBNAME is the name of the database (without the F<.dir> -or F<.pag> extension if any). If the database does not exist, it is -created with protection specified by MODE (as modified by the umask()). -If your system only supports the older DBM functions, you may perform only -one dbmopen() in your program. In older versions of Perl, if your system -had neither DBM nor ndbm, calling dbmopen() produced a fatal error; it now -falls back to sdbm(3). - -If you don't have write access to the DBM file, you can only read -associative array variables, not set them. If you want to test whether -you can write, either use file tests or try setting a dummy array entry -inside an eval(), which will trap the error. +This binds a dbm(3), ndbm(3), sdbm(3), gdbm(), or Berkeley DB file to a +hash. HASH is the name of the hash. (Unlike normal open, the first +argument is I<NOT> a filehandle, even though it looks like one). DBNAME +is the name of the database (without the F<.dir> or F<.pag> extension if +any). If the database does not exist, it is created with protection +specified by MODE (as modified by the umask()). If your system supports +only the older DBM functions, you may perform only one dbmopen() in your +program. In older versions of Perl, if your system had neither DBM nor +ndbm, calling dbmopen() produced a fatal error; it now falls back to +sdbm(3). + +If you don't have write access to the DBM file, you can only read hash +variables, not set them. If you want to test whether you can write, +either use file tests or try setting a dummy hash entry inside an eval(), +which will trap the error. Note that functions such as keys() and values() may return huge array values when used on large DBM files. You may prefer to use the each() @@ -613,22 +674,35 @@ function to iterate over large DBM files. Example: dbmclose(%HIST); See also L<AnyDBM_File> for a more general description of the pros and -cons of the various dbm apparoches, as well as L<DB_File> for a particularly +cons of the various dbm approaches, as well as L<DB_File> for a particularly rich implementation. =item defined EXPR -Returns a boolean value saying whether EXPR has a real value -or not. Many operations return the undefined value under exceptional -conditions, such as end of file, uninitialized variable, system error -and such. This function allows you to distinguish between an undefined -null scalar and a defined null scalar with operations that might return -a real null string, such as referencing elements of an array. You may -also check to see if arrays or subroutines exist. Use of defined on -predefined variables is not guaranteed to produce intuitive results. +=item defined -When used on a hash array element, it tells you whether the value -is defined, not whether the key exists in the hash. Use exists() for that. +Returns a Boolean value telling whether EXPR has a value other than +the undefined value C<undef>. If EXPR is not present, C<$_> will be +checked. + +Many operations return C<undef> to indicate failure, end of file, +system error, uninitialized variable, and other exceptional +conditions. This function allows you to distinguish C<undef> from +other values. (A simple Boolean test will not distinguish among +C<undef>, zero, the empty string, and "0", which are all equally +false.) Note that since C<undef> is a valid scalar, its presence +doesn't I<necessarily> indicate an exceptional condition: pop() +returns C<undef> when its argument is an empty array, I<or> when the +element to return happens to be C<undef>. + +You may also use defined() to check whether a subroutine exists. On +the other hand, use of defined() upon aggregates (hashes and arrays) +is not guaranteed to produce intuitive results, and should probably be +avoided. + +When used on a hash element, it tells you whether the value is defined, +not whether the key exists in the hash. Use L</exists> for the latter +purpose. Examples: @@ -636,15 +710,12 @@ Examples: print "$val\n" while defined($val = pop(@ary)); die "Can't readlink $sym: $!" unless defined($value = readlink $sym); - eval '@foo = ()' if defined(@foo); - die "No XYZ package defined" unless defined %_XYZ; sub foo { defined &$bar ? &$bar(@_) : die "No bar"; } + $debugging = 0 unless defined $debugging; -See also undef(). - -Note: many folks tend to overuse defined(), and then are surprised to -discover that the number 0 and the null string are, in fact, defined -concepts. For example, if you say +Note: Many folks tend to overuse defined(), and then are surprised to +discover that the number 0 and "" (the zero-length string) are, in fact, +defined values. For example, if you say "ab" =~ /a(.*)b/; @@ -652,44 +723,69 @@ the pattern match succeeds, and $1 is defined, despite the fact that it matched "nothing". But it didn't really match nothing--rather, it matched something that happened to be 0 characters long. This is all very above-board and honest. When a function returns an undefined value, -it's an admission that it couldn't give you an honest answer. So -you should only use defined() when you're questioning the integrity -of what you're trying to do. At other times, a simple comparison to -0 or "" is what you want. +it's an admission that it couldn't give you an honest answer. So you +should use defined() only when you're questioning the integrity of what +you're trying to do. At other times, a simple comparison to 0 or "" is +what you want. + +Currently, using defined() on an entire array or hash reports whether +memory for that aggregate has ever been allocated. So an array you set +to the empty list appears undefined initially, and one that once was full +and that you then set to the empty list still appears defined. You +should instead use a simple test for size: + + if (@an_array) { print "has array elements\n" } + if (%a_hash) { print "has hash members\n" } + +Using undef() on these, however, does clear their memory and then report +them as not defined anymore, but you shoudln't do that unless you don't +plan to use them again, because it saves time when you load them up +again to have memory already ready to be filled. + +This counterintuitive behaviour of defined() on aggregates may be +changed, fixed, or broken in a future release of Perl. + +See also L</undef>, L</exists>, L</ref>. =item delete EXPR -Deletes the specified value from its hash array. Returns the deleted -value, or the undefined value if nothing was deleted. Deleting from -C<$ENV{}> modifies the environment. Deleting from an array tied to a DBM -file deletes the entry from the DBM file. (But deleting from a tie()d -hash doesn't necessarily return anything.) +Deletes the specified key(s) and their associated values from a hash. +For each key, returns the deleted value associated with that key, or +the undefined value if there was no such key. Deleting from C<$ENV{}> +modifies the environment. Deleting from a hash tied to a DBM file +deletes the entry from the DBM file. (But deleting from a tie()d hash +doesn't necessarily return anything.) -The following deletes all the values of an associative array: +The following deletes all the values of a hash: - foreach $key (keys %ARRAY) { - delete $ARRAY{$key}; + foreach $key (keys %HASH) { + delete $HASH{$key}; } -(But it would be faster to use the undef() command.) Note that the -EXPR can be arbitrarily complicated as long as the final operation is -a hash key lookup: +And so does this: + + delete @HASH{keys %HASH} + +(But both of these are slower than the undef() command.) Note that the +EXPR can be arbitrarily complicated as long as the final operation is a +hash element lookup or hash slice: delete $ref->[$x][$y]{$key}; + delete @{$ref->[$x][$y]}{$key1, $key2, @morekeys}; =item die LIST Outside of an eval(), prints the value of LIST to C<STDERR> and exits with -the current value of $! (errno). If $! is 0, exits with the value of -C<($? E<gt>E<gt> 8)> (backtick `command` status). If C<($? E<gt>E<gt> 8)> is 0, -exits with 255. Inside an eval(), the error message is stuffed into C<$@>, -and the eval() is terminated with the undefined value; this makes die() -the way to raise an exception. +the current value of C<$!> (errno). If C<$!> is 0, exits with the value of +C<($? E<gt>E<gt> 8)> (backtick `command` status). If C<($? E<gt>E<gt> 8)> +is 0, exits with 255. Inside an eval(), the error message is stuffed into +C<$@>, and the eval() is terminated with the undefined value; this makes +die() the way to raise an exception. Equivalent examples: die "Can't cd to spool: $!\n" unless chdir '/usr/spool/news'; - chdir '/usr/spool/news' or die "Can't cd to spool: $!\n" + chdir '/usr/spool/news' or die "Can't cd to spool: $!\n" If the value of EXPR does not end in a newline, the current script line number and input line number (if any) are also printed, and a newline @@ -707,6 +803,12 @@ produce, respectively See also exit() and warn(). +You can arrange for a callback to be called just before the die() does +its deed, by setting the C<$SIG{__DIE__}> hook. The associated handler +will be called with the error text and can change the error message, if +it sees fit, by calling die() again. See L<perlvar> for details on +setting C<%SIG> entries, and eval() for some examples. + =item do BLOCK Not really a function. Returns the value of the last command in the @@ -774,19 +876,27 @@ Example: QUICKSTART: Getopt('f'); -=item each ASSOC_ARRAY +=item each HASH + +When called in a list context, returns a 2-element array consisting of the +key and value for the next element of a hash, so that you can iterate over +it. When called in a scalar context, returns the key for only the next +element in the hash. (Note: Keys may be "0" or "", which are logically +false; you may wish to avoid constructs like C<while ($k = each %foo) {}> +for this reason.) -Returns a 2-element array consisting of the key and value for the next -value of an associative array, so that you can iterate over it. -Entries are returned in an apparently random order. When the array is -entirely read, a null array is returned (which when assigned produces a -FALSE (0) value). The next call to each() after that will start -iterating again. The iterator can be reset only by reading all the -elements from the array. You should not add elements to an array while -you're iterating over it. There is a single iterator for each -associative array, shared by all each(), keys() and values() function -calls in the program. The following prints out your environment like -the printenv(1) program, only in a different order: +Entries are returned in an apparently random order. When the hash is +entirely read, a null array is returned in list context (which when +assigned produces a FALSE (0) value), and C<undef> is returned in a +scalar context. The next call to each() after that will start iterating +again. There is a single iterator for each hash, shared by all each(), +keys(), and values() function calls in the program; it can be reset by +reading all the elements from the hash, or by evaluating C<keys HASH> or +C<values HASH>. If you add or delete elements of a hash while you're +iterating over it, you may get entries skipped or duplicated, so don't. + +The following prints out your environment like the printenv(1) program, +only in a different order: while (($key,$value) = each %ENV) { print "$key=$value\n"; @@ -809,11 +919,11 @@ C<eof(FILEHANDLE)> on it) after end-of-file is reached. Filetypes such as terminals may lose the end-of-file condition if you do. An C<eof> without an argument uses the last file read as argument. -Empty parentheses () may be used to indicate -the pseudofile formed of the files listed on the command line, i.e. -C<eof()> is reasonable to use inside a while (E<lt>E<gt>) loop to detect the end -of only the last file. Use C<eof(ARGV)> or eof without the parentheses to -test I<EACH> file in a while (E<lt>E<gt>) loop. Examples: +Empty parentheses () may be used to indicate the pseudo file formed of +the files listed on the command line, i.e., C<eof()> is reasonable to +use inside a C<while (E<lt>E<gt>)> loop to detect the end of only the +last file. Use C<eof(ARGV)> or eof without the parentheses to test +I<EACH> file in a while (E<lt>E<gt>) loop. Examples: # reset line numbering on each input file while (<>) { @@ -832,7 +942,7 @@ test I<EACH> file in a while (E<lt>E<gt>) loop. Examples: } Practical hint: you almost never need to use C<eof> in Perl, because the -input operators return undef when they run out of data. +input operators return undef when they run out of data. =item eval EXPR @@ -840,17 +950,22 @@ input operators return undef when they run out of data. EXPR is parsed and executed as if it were a little Perl program. It is executed in the context of the current Perl program, so that any -variable settings, subroutine or format definitions remain afterwards. +variable settings or subroutine and format definitions remain afterwards. The value returned is the value of the last expression evaluated, or a -return statement may be used, just as with subroutines. +return statement may be used, just as with subroutines. The last +expression is evaluated in scalar or array context, depending on the +context of the eval. If there is a syntax error or runtime error, or a die() statement is executed, an undefined value is returned by eval(), and C<$@> is set to the error message. If there was no error, C<$@> is guaranteed to be a null -string. If EXPR is omitted, evaluates $_. The final semicolon, if -any, may be omitted from the expression. +string. If EXPR is omitted, evaluates C<$_>. The final semicolon, if +any, may be omitted from the expression. Beware that using eval() +neither silences perl from printing warnings to STDERR, nor does it +stuff the text of warning messages into C<$@>. To do either of those, +you have to use the C<$SIG{__WARN__}> facility. See warn() and L<perlvar>. -Note that, since eval() traps otherwise-fatal errors, it is useful for +Note that, because eval() traps otherwise-fatal errors, it is useful for determining whether a particular feature (such as socket() or symlink()) is implemented. It is also Perl's exception trapping mechanism, where the die operator is used to raise exceptions. @@ -860,7 +975,7 @@ form to trap run-time errors without incurring the penalty of recompiling each time. The error, if any, is still returned in C<$@>. Examples: - # make divide-by-zero non-fatal + # make divide-by-zero nonfatal eval { $answer = $a / $b; }; warn $@ if $@; # same thing, but less efficient @@ -872,7 +987,25 @@ Examples: # a run-time error eval '$answer ='; # sets $@ -With an eval(), you should be especially careful to remember what's +When using the eval{} form as an exception trap in libraries, you may +wish not to trigger any C<__DIE__> hooks that user code may have +installed. You can use the C<local $SIG{__DIE__}> construct for this +purpose, as shown in this example: + + # a very private exception trap for divide-by-zero + eval { local $SIG{'__DIE__'}; $answer = $a / $b; }; warn $@ if $@; + +This is especially significant, given that C<__DIE__> hooks can call +die() again, which has the effect of changing their error messages: + + # __DIE__ hooks may modify error messages + { + local $SIG{'__DIE__'} = sub { (my $x = $_[0]) =~ s/foo/bar/g; die $x }; + eval { die "foo foofs here" }; + print $@ if $@; # prints "bar barfs here" + } + +With an eval(), you should be especially careful to remember what's being looked at when: eval $x; # CASE 1 @@ -884,19 +1017,23 @@ being looked at when: eval "\$$x++" # CASE 5 $$x++; # CASE 6 -Cases 1 and 2 above behave identically: they run the code contained in the -variable $x. (Although case 2 has misleading double quotes making the -reader wonder what else might be happening (nothing is).) Cases 3 and 4 -likewise behave in the same way: they run the code <$x>, which does -nothing at all. (Case 4 is preferred for purely visual reasons.) Case 5 -is a place where normally you I<WOULD> like to use double quotes, except -that in that particular situation, you can just use symbolic references -instead, as in case 6. +Cases 1 and 2 above behave identically: they run the code contained in +the variable $x. (Although case 2 has misleading double quotes making +the reader wonder what else might be happening (nothing is).) Cases 3 +and 4 likewise behave in the same way: they run the code '$x', which +does nothing but return the value of C<$x>. (Case 4 is preferred for +purely visual reasons, but it also has the advantage of compiling at +compile-time instead of at run-time.) Case 5 is a place where +normally you I<WOULD> like to use double quotes, except that in this +particular situation, you can just use symbolic references instead, as +in case 6. =item exec LIST -The exec() function executes a system command I<AND NEVER RETURNS>. Use -the system() function if you want it to return. +The exec() function executes a system command I<AND NEVER RETURNS>, +unless the command does not exist and is executed directly instead of +via C</bin/sh -c> (see below). Use system() instead of exec() if you +want it to return. If there is more than one argument in LIST, or if LIST is an array with more than one value, calls execvp(3) with the arguments in LIST. If @@ -914,7 +1051,7 @@ If you don't really want to execute the first argument, but want to lie to the program you are executing about its own name, you can specify the program you actually want to run as an "indirect object" (without a comma) in front of the LIST. (This always forces interpretation of the -LIST as a multi-valued list, even if there is only a single scalar in +LIST as a multivalued list, even if there is only a single scalar in the list.) Example: $shell = '/bin/csh'; @@ -933,7 +1070,7 @@ if the corresponding value is undefined. print "Defined\n" if defined $array{$key}; print "True\n" if $array{$key}; -A hash element can only be TRUE if it's defined, and defined if +A hash element can be TRUE only if it's defined, and defined if it exists, but the reverse doesn't necessarily hold true. Note that the EXPR can be arbitrarily complicated as long as the final @@ -951,11 +1088,20 @@ are called before exit.) Example: $ans = <STDIN>; exit 0 if $ans =~ /^[Xx]/; -See also die(). If EXPR is omitted, exits with 0 status. +See also die(). If EXPR is omitted, exits with 0 status. The only +universally portable values for EXPR are 0 for success and 1 for error; +all other values are subject to unpredictable interpretation depending +on the environment in which the Perl program is running. + +You shouldn't use exit() to abort a subroutine if there's any chance that +someone might want to trap whatever error happened. Use die() instead, +which can be trapped by an eval(). =item exp EXPR -Returns I<e> (the natural logarithm base) to the power of EXPR. +=item exp + +Returns I<e> (the natural logarithm base) to the power of EXPR. If EXPR is omitted, gives C<exp($_)>. =item fcntl FILEHANDLE,FUNCTION,SCALAR @@ -980,31 +1126,50 @@ value is taken as the name of the filehandle. =item flock FILEHANDLE,OPERATION -Calls flock(2) on FILEHANDLE. See L<flock(2)> for definition of -OPERATION. Returns TRUE for success, FALSE on failure. Will produce a -fatal error if used on a machine that doesn't implement either flock(2) or -fcntl(2). The fcntl(2) system call will be automatically used if flock(2) -is missing from your system. This makes flock() the portable file locking -strategy, although it will only lock entire files, not records. Note also -that some versions of flock() cannot lock things over the network; you -would need to use the more system-specific fcntl() for that. +Calls flock(2), or an emulation of it, on FILEHANDLE. Returns TRUE for +success, FALSE on failure. Produces a fatal error if used on a machine +that doesn't implement flock(2), fcntl(2) locking, or lockf(3). flock() +is Perl's portable file locking interface, although it locks only entire +files, not records. + +OPERATION is one of LOCK_SH, LOCK_EX, or LOCK_UN, possibly combined with +LOCK_NB. These constants are traditionally valued 1, 2, 8 and 4, but +you can use the symbolic names if import them from the Fcntl module, +either individually, or as a group using the ':flock' tag. LOCK_SH +requests a shared lock, LOCK_EX requests an exclusive lock, and LOCK_UN +releases a previously requested lock. If LOCK_NB is added to LOCK_SH or +LOCK_EX then flock() will return immediately rather than blocking +waiting for the lock (check the return status to see if you got it). + +To avoid the possibility of mis-coordination, Perl flushes FILEHANDLE +before (un)locking it. + +Note that the emulation built with lockf(3) doesn't provide shared +locks, and it requires that FILEHANDLE be open with write intent. These +are the semantics that lockf(3) implements. Most (all?) systems +implement lockf(3) in terms of fcntl(2) locking, though, so the +differing semantics shouldn't bite too many people. + +Note also that some versions of flock() cannot lock things over the +network; you would need to use the more system-specific fcntl() for +that. If you like you can force Perl to ignore your system's flock(2) +function, and so provide its own fcntl(2)-based emulation, by passing +the switch C<-Ud_flock> to the F<Configure> program when you configure +perl. Here's a mailbox appender for BSD systems. - $LOCK_SH = 1; - $LOCK_EX = 2; - $LOCK_NB = 4; - $LOCK_UN = 8; + use Fcntl ':flock'; # import LOCK_* constants sub lock { - flock(MBOX,$LOCK_EX); + flock(MBOX,LOCK_EX); # and, in case someone appended # while we were waiting... seek(MBOX, 0, 2); } sub unlock { - flock(MBOX,$LOCK_UN); + flock(MBOX,LOCK_UN); } open(MBOX, ">>/usr/spool/mail/$ENV{'USER'}") @@ -1021,15 +1186,15 @@ See also L<DB_File> for other flock() examples. Does a fork(2) system call. Returns the child pid to the parent process and 0 to the child process, or C<undef> if the fork is unsuccessful. Note: unflushed buffers remain unflushed in both processes, which means -you may need to set C<$|> ($AUTOFLUSH in English) or call the -autoflush() FileHandle method to avoid duplicate output. +you may need to set C<$|> ($AUTOFLUSH in English) or call the autoflush() +method of IO::Handle to avoid duplicate output. If you fork() without ever waiting on your children, you will accumulate zombies: $SIG{CHLD} = sub { wait }; -There's also the double-fork trick (error checking on +There's also the double-fork trick (error checking on fork() returns omitted); unless ($pid = fork) { @@ -1047,25 +1212,30 @@ fork() returns omitted); See also L<perlipc> for more examples of forking and reaping moribund children. +Note that if your forked child inherits system file descriptors like +STDIN and STDOUT that are actually connected by a pipe or socket, even +if you exit, the remote server (such as, say, httpd or rsh) won't think +you're done. You should reopen those to /dev/null if it's any issue. + =item format Declare a picture format with use by the write() function. For example: - format Something = + format Something = Test: @<<<<<<<< @||||| @>>>>> $str, $%, '$' . int($num) . $str = "widget"; - $num = $cost/$quantiy; + $num = $cost/$quantity; $~ = 'Something'; write; See L<perlform> for many details and examples. -=item formline PICTURE, LIST +=item formline PICTURE,LIST This is an internal function used by C<format>s, though you may call it too. It formats (see L<perlform>) a list of values according to the @@ -1080,7 +1250,7 @@ that the C<~> and C<~~> tokens will treat the entire PICTURE as a single line. You may therefore need to use multiple formlines to implement a single record format, just like the format compiler. -Be careful if you put double quotes around the picture, since an "C<@>" +Be careful if you put double quotes around the picture, because an "C<@>" character may be taken to mean the beginning of an array name. formline() always returns TRUE. See L<perlform> for other examples. @@ -1097,7 +1267,7 @@ single-characters, however. For that, try something more like: system "stty cbreak </dev/tty >/dev/tty 2>&1"; } else { - system "stty", '-icanon', 'eol', "\001"; + system "stty", '-icanon', 'eol', "\001"; } $key = getc(STDIN); @@ -1106,24 +1276,26 @@ single-characters, however. For that, try something more like: system "stty -cbreak </dev/tty >/dev/tty 2>&1"; } else { - system "stty", 'icanon', 'eol', '^@'; # ascii null + system "stty", 'icanon', 'eol', '^@'; # ASCII null } print "\n"; -Determination of whether to whether $BSD_STYLE should be set -is left as an exercise to the reader. +Determination of whether $BSD_STYLE should be set +is left as an exercise to the reader. +The POSIX::getattr() function can do this more portably on systems +alleging POSIX compliance. See also the C<Term::ReadKey> module from your nearest CPAN site; -details on CPAN can be found on L<perlmod/CPAN> +details on CPAN can be found on L<perlmod/CPAN>. =item getlogin Returns the current login from F</etc/utmp>, if any. If null, use -getpwuid(). +getpwuid(). - $login = getlogin || (getpwuid($<))[0] || "Kilroy"; + $login = getlogin || getpwuid($<) || "Kilroy"; -Do not consider getlogin() for authorentication: it is not as +Do not consider getlogin() for authentication: it is not as secure as getpwuid(). =item getpeername SOCKET @@ -1138,10 +1310,12 @@ Returns the packed sockaddr address of other end of the SOCKET connection. =item getpgrp PID -Returns the current process group for the specified PID, 0 for the +Returns the current process group for the specified PID. Use +a PID of 0 to get the current process group for the current process. Will raise an exception if used on a machine that doesn't implement getpgrp(2). If PID is omitted, returns process -group of current process. +group of current process. Note that the POSIX version of getpgrp() +does not accept a PID argument, so only PID==0 is truly portable. =item getppid @@ -1265,23 +1439,37 @@ Returns the socket option requested, or undefined if there is an error. =item glob EXPR -Returns the value of EXPR with filename expansions such as a shell -would do. This is the internal function implementing the <*.*> -operator, except it's easier to use. +=item glob + +Returns the value of EXPR with filename expansions such as a shell would +do. This is the internal function implementing the C<E<lt>*.cE<gt>> +operator, but you can use it directly. If EXPR is omitted, $_ is used. +The C<E<lt>*.cE<gt>> operator is discussed in more detail in +L<perlop/"I/O Operators">. =item gmtime EXPR Converts a time as returned by the time function to a 9-element array -with the time localized for the standard Greenwich timezone. +with the time localized for the standard Greenwich time zone. Typically used as follows: - + # 0 1 2 3 4 5 6 7 8 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time); All array elements are numeric, and come straight out of a struct tm. In particular this means that $mon has the range 0..11 and $wday has -the range 0..6. If EXPR is omitted, does C<gmtime(time())>. +the range 0..6 with sunday as day 0. Also, $year is the number of +years since 1900, I<not> simply the last two digits of the year. + +If EXPR is omitted, does C<gmtime(time())>. + +In a scalar context, returns the ctime(3) value: + + $now_string = gmtime; # e.g., "Thu Oct 13 04:54:34 1994" + +Also see the timegm() function provided by the Time::Local module, +and the strftime(3) function available via the POSIX module. =item goto LABEL @@ -1292,8 +1480,9 @@ the range 0..6. If EXPR is omitted, does C<gmtime(time())>. The goto-LABEL form finds the statement labeled with LABEL and resumes execution there. It may not be used to go into any construct that requires initialization, such as a subroutine or a foreach loop. It -also can't be used to go into a construct that is optimized away. It -can be used to go almost anywhere else within the dynamic scope, +also can't be used to go into a construct that is optimized away, +or to get out of a block or subroutine given to sort(). +It can be used to go almost anywhere else within the dynamic scope, including out of subroutines, but it's usually better to use some other construct such as last or die. The author of Perl has never felt the need to use this form of goto (in Perl, that is--C is another matter). @@ -1316,6 +1505,10 @@ will be able to tell that this routine was called first. =item grep EXPR,LIST +This is similar in spirit to, but not the same as, grep(1) +and its relatives. In particular, it is not limited to using +regular expressions. + Evaluates the BLOCK or EXPR for each element of LIST (locally setting $_ to each element) and returns the list value consisting of those elements for which the expression evaluated to TRUE. In a scalar @@ -1327,23 +1520,31 @@ or equivalently, @foo = grep {!/^#/} @bar; # weed out comments -Note that, since $_ is a reference into the list value, it can be used +Note that, because $_ is a reference into the list value, it can be used to modify the elements of the array. While this is useful and supported, it can cause bizarre results if the LIST is not a named -array. +array. Similarly, grep returns aliases into the original list, +much like the way that L<Foreach Loops>'s index variable aliases the list +elements. That is, modifying an element of a list returned by grep +actually modifies the element in the original list. =item hex EXPR -Interprets EXPR as a hex string and returns the corresponding decimal -value. (To convert strings that might start with 0 or 0x see -oct().) If EXPR is omitted, uses $_. +=item hex + +Interprets EXPR as a hex string and returns the corresponding +value. (To convert strings that might start with either 0 or 0x +see L</oct>.) If EXPR is omitted, uses $_. + + print hex '0xAf'; # prints '175' + print hex 'aF'; # same =item import -There is no built-in import() function. It is merely an ordinary +There is no builtin import() function. It is merely an ordinary method (subroutine) defined (or inherited) by modules that wish to export names to another module. The use() function calls the import() method -for the package used. See also L</use>, L<perlmod>, and L<Exporter>. +for the package used. See also L</use()>, L<perlmod>, and L<Exporter>. =item index STR,SUBSTR,POSITION @@ -1351,12 +1552,14 @@ for the package used. See also L</use>, L<perlmod>, and L<Exporter>. Returns the position of the first occurrence of SUBSTR in STR at or after POSITION. If POSITION is omitted, starts searching from the beginning of -the string. The return value is based at 0 (or whatever you've set the $[ +the string. The return value is based at 0 (or whatever you've set the C<$[> variable to--but don't do that). If the substring is not found, returns one less than the base, ordinarily -1. =item int EXPR +=item int + Returns the integer portion of EXPR. If EXPR is omitted, uses $_. =item ioctl FILEHANDLE,FUNCTION,SCALAR @@ -1369,7 +1572,7 @@ first to get the correct function definitions. If F<ioctl.ph> doesn't exist or doesn't have the correct definitions you'll have to roll your own, based on your C header files such as F<E<lt>sys/ioctl.hE<gt>>. (There is a Perl script called B<h2ph> that comes with the Perl kit which -may help you in this, but it's non-trivial.) SCALAR will be read and/or +may help you in this, but it's nontrivial.) SCALAR will be read and/or written depending on the FUNCTION--a pointer to the string value of SCALAR will be passed as the third argument of the actual ioctl call. (If SCALAR has no string value but does have a numeric value, that value will be @@ -1406,7 +1609,7 @@ system: =item join EXPR,LIST -Joins the separate strings of LIST or ARRAY into a single string with +Joins the separate strings of LIST into a single string with fields separated by the value of EXPR, and returns the string. Example: @@ -1414,14 +1617,15 @@ Example: See L<perlfunc/split>. -=item keys ASSOC_ARRAY +=item keys HASH -Returns a normal array consisting of all the keys of the named -associative array. (In a scalar context, returns the number of keys.) -The keys are returned in an apparently random order, but it is the same -order as either the values() or each() function produces (given that -the associative array has not been modified). Here is yet another way -to print your environment: +Returns a normal array consisting of all the keys of the named hash. (In +a scalar context, returns the number of keys.) The keys are returned in +an apparently random order, but it is the same order as either the +values() or each() function produces (given that the hash has not been +modified). As a side effect, it resets HASH's iterator. + +Here is yet another way to print your environment: @keys = keys %ENV; @values = values %ENV; @@ -1435,17 +1639,31 @@ or how about sorted by key: print $key, '=', $ENV{$key}, "\n"; } -To sort an array by value, you'll need to use a C<sort{}> -function. Here's a descending numeric sort of a hash by its values: +To sort an array by value, you'll need to use a C<sort> function. +Here's a descending numeric sort of a hash by its values: foreach $key (sort { $hash{$b} <=> $hash{$a} } keys %hash)) { printf "%4d %s\n", $hash{$key}, $key; } +As an lvalue C<keys> allows you to increase the number of hash buckets +allocated for the given hash. This can gain you a measure of efficiency if +you know the hash is going to get big. (This is similar to pre-extending +an array by assigning a larger number to $#array.) If you say + + keys %hash = 200; + +then C<%hash> will have at least 200 buckets allocated for it. These +buckets will be retained even if you do C<%hash = ()>, use C<undef +%hash> if you want to free the storage while C<%hash> is still in scope. +You can't shrink the number of buckets allocated for the hash using +C<keys> in this way (but you needn't worry about doing this by accident, +as trying has no effect). + =item kill LIST -Sends a signal to a list of processes. The first element of -the list must be the signal to send. Returns the number of +Sends a signal to a list of processes. The first element of +the list must be the signal to send. Returns the number of processes successfully signaled. $cnt = kill 1, $child1, $child2; @@ -1455,7 +1673,7 @@ Unlike in the shell, in Perl if the I<SIGNAL> is negative, it kills process groups instead of processes. (On System V, a negative I<PROCESS> number will also kill process groups, but that's not portable.) That means you usually want to use positive not negative signals. You may also -use a signal name in quotes. See the L<perlipc/"Signals"> man page for details. +use a signal name in quotes. See L<perlipc/"Signals"> for details. =item last LABEL @@ -1473,18 +1691,28 @@ C<continue> block, if any, is not executed: =item lc EXPR +=item lc + Returns an lowercased version of EXPR. This is the internal function -implementing the \L escape in double-quoted strings. -Should respect any POSIX setlocale() settings. +implementing the \L escape in double-quoted strings. +Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. + +If EXPR is omitted, uses $_. =item lcfirst EXPR +=item lcfirst + Returns the value of EXPR with the first character lowercased. This is the internal function implementing the \l escape in double-quoted strings. -Should respect any POSIX setlocale() settings. +Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. + +If EXPR is omitted, uses $_. =item length EXPR +=item length + Returns the length in characters of the value of EXPR. If EXPR is omitted, returns length of $_. @@ -1501,8 +1729,8 @@ it succeeded, FALSE otherwise. See example in L<perlipc/"Sockets: Client/Server =item local EXPR A local modifies the listed variables to be local to the enclosing block, -subroutine, C<eval{}> or C<do>. If more than one value is listed, the -list must be placed in parens. See L<perlsub/"Temporary Values via +subroutine, C<eval{}>, or C<do>. If more than one value is listed, the +list must be placed in parentheses. See L<perlsub/"Temporary Values via local()"> for details. But you really probably want to be using my() instead, because local() isn't @@ -1512,25 +1740,31 @@ via my()"> for details. =item localtime EXPR Converts a time as returned by the time function to a 9-element array -with the time analyzed for the local timezone. Typically used as +with the time analyzed for the local time zone. Typically used as follows: + # 0 1 2 3 4 5 6 7 8 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); All array elements are numeric, and come straight out of a struct tm. In particular this means that $mon has the range 0..11 and $wday has -the range 0..6. If EXPR is omitted, does localtime(time). +the range 0..6 with sunday as day 0. Also, $year is the number of +years since 1900, that is, $year is 123 in year 2023. -In a scalar context, prints out the ctime(3) value: +If EXPR is omitted, uses the current time (C<localtime(time)>). - $now_string = localtime; # e.g. "Thu Oct 13 04:54:34 1994" +In a scalar context, returns the ctime(3) value: -Also see the F<timelocal.pl> library, and the strftime(3) function available -via the POSIX modulie. + $now_string = localtime; # e.g., "Thu Oct 13 04:54:34 1994" + +Also see the Time::Local module, and the strftime(3) and mktime(3) +function available via the POSIX module. =item log EXPR +=item log + Returns logarithm (base I<e>) of EXPR. If EXPR is omitted, returns log of $_. @@ -1538,10 +1772,14 @@ of $_. =item lstat EXPR +=item lstat + Does the same thing as the stat() function, but stats a symbolic link instead of the file the symbolic link points to. If symbolic links are unimplemented on your system, a normal stat() is done. +If EXPR is omitted, stats $_. + =item m// The match operator. See L<perlop>. @@ -1572,7 +1810,7 @@ is just a funny way to write Creates the directory specified by FILENAME, with permissions specified by MODE (as modified by umask). If it succeeds it returns 1, otherwise -it returns 0 and sets $! (errno). +it returns 0 and sets C<$!> (errno). =item msgctl ID,CMD,ARG @@ -1606,7 +1844,7 @@ an error. A "my" declares the listed variables to be local (lexically) to the enclosing block, subroutine, C<eval>, or C<do/require/use>'d file. If -more than one value is listed, the list must be placed in parens. See +more than one value is listed, the list must be placed in parentheses. See L<perlsub/"Private Variables via my()"> for details. =item next LABEL @@ -1631,42 +1869,54 @@ See the "use" function, which "no" is the opposite of. =item oct EXPR +=item oct + Interprets EXPR as an octal string and returns the corresponding -decimal value. (If EXPR happens to start off with 0x, interprets it as +value. (If EXPR happens to start off with 0x, interprets it as a hex string instead.) The following will handle decimal, octal, and hex in the standard Perl or C notation: $val = oct($val) if $val =~ /^0/; -If EXPR is omitted, uses $_. +If EXPR is omitted, uses $_. This function is commonly used when +a string such as "644" needs to be converted into a file mode, for +example. (Although perl will automatically convert strings into +numbers as needed, this automatic conversion assumes base 10.) =item open FILEHANDLE,EXPR =item open FILEHANDLE Opens the file whose filename is given by EXPR, and associates it with -FILEHANDLE. If FILEHANDLE is an expression, its value is used as the name -of the real filehandle wanted. If EXPR is omitted, the scalar variable of -the same name as the FILEHANDLE contains the filename. If the filename -begins with "<" or nothing, the file is opened for input. If the filename -begins with ">", the file is opened for output. If the filename begins -with ">>", the file is opened for appending. You can put a '+' in front -of the '>' or '<' to indicate that you want both read and write access to -the file; thus '+<' is usually preferred for read/write updates--the '+>' -mode would clobber the file first. These correspond to the fopen(3) modes -of 'r', 'r+', 'w', 'w+', 'a', and 'a+'. - -If the filename begins with "|", the filename is interpreted -as a command to which output is to be piped, and if the filename ends with -a "|", the filename is interpreted See L<perlipc/"Using open() for IPC"> -for more examples of this. as command which pipes input to us. (You may -not have a raw open() to a command that pipes both in I<and> out, but see See L<open2>, -L<open3>, and L<perlipc/"Bidirectional Communication"> for alternatives.) - -Opening '-' opens STDIN and opening '>-' opens STDOUT. Open returns -non-zero upon success, the undefined value otherwise. If the open +FILEHANDLE. If FILEHANDLE is an expression, its value is used as the +name of the real filehandle wanted. If EXPR is omitted, the scalar +variable of the same name as the FILEHANDLE contains the filename. +(Note that lexical variables--those declared with C<my>--will not work +for this purpose; so if you're using C<my>, specify EXPR in your call +to open.) + +If the filename begins with '<' or nothing, the file is opened for input. +If the filename begins with '>', the file is truncated and opened for +output. If the filename begins with '>>', the file is opened for +appending. You can put a '+' in front of the '>' or '<' to indicate that +you want both read and write access to the file; thus '+<' is almost +always preferred for read/write updates--the '+>' mode would clobber the +file first. The prefix and the filename may be separated with spaces. +These various prefixes correspond to the fopen(3) modes of 'r', 'r+', 'w', +'w+', 'a', and 'a+'. + +If the filename begins with "|", the filename is interpreted as a command +to which output is to be piped, and if the filename ends with a "|", the +filename is interpreted See L<perlipc/"Using open() for IPC"> for more +examples of this. as command which pipes input to us. (You may not have +a raw open() to a command that pipes both in I<and> out, but see +L<IPC::Open2>, L<IPC::Open3>, and L<perlipc/"Bidirectional Communication"> +for alternatives.) + +Opening '-' opens STDIN and opening 'E<gt>-' opens STDOUT. Open returns +nonzero upon success, the undefined value otherwise. If the open involved a pipe, the return value happens to be the pid of the -subprocess. +subprocess. If you're unfortunate enough to be running Perl on a system that distinguishes between text files and binary files (modern operating @@ -1714,11 +1964,12 @@ Examples: } You may also, in the Bourne shell tradition, specify an EXPR beginning -with ">&", in which case the rest of the string is interpreted as the +with "E<gt>&", in which case the rest of the string is interpreted as the name of a filehandle (or file descriptor, if numeric) which is to be -duped and opened. You may use & after >, >>, <, +>, +>> and +<. The +duped and opened. You may use & after E<gt>, E<gt>E<gt>, E<lt>, +E<gt>, ++E<gt>E<gt>, and +E<lt>. The mode you specify should match the mode of the original filehandle. -(Duping a filehandle does not take into acount any existing contents of +(Duping a filehandle does not take into account any existing contents of stdio buffers.) Here is a script that saves, redirects, and restores STDOUT and STDERR: @@ -1746,23 +1997,23 @@ STDERR: print STDERR "stderr 2\n"; -If you specify "<&=N", where N is a number, then Perl will do an +If you specify "E<lt>&=N", where N is a number, then Perl will do an equivalent of C's fdopen() of that file descriptor; this is more parsimonious of file descriptors. For example: open(FILEHANDLE, "<&=$fd") -If you open a pipe on the command "-", i.e. either "|-" or "-|", then +If you open a pipe on the command "-", i.e., either "|-" or "-|", then there is an implicit fork done, and the return value of open is the pid of the child within the parent process, and 0 within the child -process. (Use defined($pid) to determine whether the open was successful.) +process. (Use C<defined($pid)> to determine whether the open was successful.) The filehandle behaves normally for the parent, but i/o to that filehandle is piped from/to the STDOUT/STDIN of the child process. In the child process the filehandle isn't opened--i/o happens from/to the new STDOUT or STDIN. Typically this is used like the normal piped open when you want to exercise more control over just how the pipe command gets executed, such as when you are running setuid, and -don't want to have to scan shell commands for metacharacters. +don't want to have to scan shell commands for metacharacters. The following pairs are more or less equivalent: open(FOO, "|tr '[a-z]' '[A-Z]'"); @@ -1773,22 +2024,24 @@ The following pairs are more or less equivalent: See L<perlipc/"Safe Pipe Opens"> for more examples of this. -Explicitly closing any piped filehandle causes the parent process to -wait for the child to finish, and returns the status value in $?. -Note: on any operation which may do a fork, unflushed buffers remain -unflushed in both processes, which means you may need to set $| to +NOTE: On any operation which may do a fork, unflushed buffers remain +unflushed in both processes, which means you may need to set C<$|> to avoid duplicate output. -Using the FileHandle constructor from the FileHandle package, +Closing any piped filehandle causes the parent process to wait for the +child to finish, and returns the status value in C<$?>. + +Using the constructor from the IO::Handle package (or one of its +subclasses, such as IO::File or IO::Socket), you can generate anonymous filehandles which have the scope of whatever variables hold references to them, and automatically close whenever and however you leave that scope: - use FileHandle; + use IO::File; ... sub read_myfile_munged { my $ALL = shift; - my $handle = new FileHandle; + my $handle = new IO::File; open($handle, "myfile") or die "myfile: $!"; $first = <$handle> or return (); # Automatically closed here. @@ -1798,7 +2051,7 @@ and however you leave that scope: } The filename that is passed to open will have leading and trailing -whitespace deleted. In order to open a file with arbitrary weird +whitespace deleted. To open a file with arbitrary weird characters in it, it's necessary to protect any leading and trailing whitespace thusly: @@ -1809,7 +2062,7 @@ If you want a "real" C open() (see L<open(2)> on your system), then you should use the sysopen() function. This is another way to protect your filenames from interpretation. For example: - use FileHandle; + use IO::Handle; sysopen(HANDLE, $path, O_RDWR|O_CREAT|O_EXCL, 0700) or die "sysopen $path: $!"; HANDLE->autoflush(1); @@ -1822,13 +2075,15 @@ See L</seek()> for some details about mixing reading and writing. =item opendir DIRHANDLE,EXPR Opens a directory named EXPR for processing by readdir(), telldir(), -seekdir(), rewinddir() and closedir(). Returns TRUE if successful. +seekdir(), rewinddir(), and closedir(). Returns TRUE if successful. DIRHANDLEs have their own namespace separate from FILEHANDLEs. =item ord EXPR +=item ord + Returns the numeric ascii value of the first character of EXPR. If -EXPR is omitted, uses $_. +EXPR is omitted, uses $_. For the reverse, see L</chr>. =item pack TEMPLATE,LIST @@ -1846,17 +2101,29 @@ follows: c A signed char value. C An unsigned char value. + s A signed short value. S An unsigned short value. + (This 'short' is _exactly_ 16 bits, which may differ from + what a local C compiler calls 'short'.) + i A signed integer value. I An unsigned integer value. + (This 'integer' is _at_least_ 32 bits wide. Its exact size + depends on what a local C compiler calls 'int', and may + even be larger than the 'long' described in the next item.) + l A signed long value. L An unsigned long value. + (This 'long' is _exactly_ 32 bits, which may differ from + what a local C compiler calls 'long'.) - n A short in "network" order. - N A long in "network" order. + n A short in "network" (big-endian) order. + N A long in "network" (big-endian) order. v A short in "VAX" (little-endian) order. V A long in "VAX" (little-endian) order. + (These 'shorts' and 'longs' are _exactly_ 16 bits and + _exactly_ 32 bits, respectively.) f A single-precision float in the native format. d A double-precision float in the native format. @@ -1866,12 +2133,17 @@ follows: u A uuencoded string. + w A BER compressed integer. Its bytes represent an unsigned + integer in base 128, most significant digit first, with as few + digits as possible. Bit eight (the high bit) is set on each + byte except the last. + x A null byte. X Back up a byte. @ Null fill to absolute position. Each letter may optionally be followed by a number which gives a repeat -count. With all types except "a", "A", "b", "B", "h" and "H", and "P" the +count. With all types except "a", "A", "b", "B", "h", "H", and "P" the pack function will gobble up that many values from the LIST. A * for the repeat count means to use however many items are left. The "a" and "A" types gobble just one value, but pack it as a string of length count, @@ -1887,7 +2159,7 @@ point data written on one machine may not be readable on another - even if both use IEEE floating point arithmetic (as the endian-ness of the memory representation is not part of the IEEE spec). Note that Perl uses doubles internally for all numeric calculation, and converting from double into -float and thence back to double again will lose precision (i.e. +float and thence back to double again will lose precision (i.e., C<unpack("f", pack("f", $foo)>) will not in general equal $foo). Examples: @@ -1928,11 +2200,11 @@ Declares the compilation unit as being in the given namespace. The scope of the package declaration is from the declaration itself through the end of the enclosing block (the same scope as the local() operator). All further unqualified dynamic identifiers will be in this namespace. A package -statement only affects dynamic variables--including those you've used +statement affects only dynamic variables--including those you've used local() on--but I<not> lexical variables created with my(). Typically it would be the first declaration in a file to be included by the C<require> or C<use> operator. You can switch into a package in more than one place; -it merely influences which symbol table is used by the compiler for the +it influences merely which symbol table is used by the compiler for the rest of that block. You can refer to variables and filehandles in other packages by prefixing the identifier with the package name and a double colon: C<$Package::Variable>. If the package name is null, the C<main> @@ -1946,14 +2218,16 @@ and classes. See L<perlsub> for other scoping issues. Opens a pair of connected pipes like the corresponding system call. Note that if you set up a loop of piped processes, deadlock can occur unless you are very careful. In addition, note that Perl's pipes use -stdio buffering, so you may need to set $| to flush your WRITEHANDLE +stdio buffering, so you may need to set C<$|> to flush your WRITEHANDLE after each command, depending on the application. -See L<open2>, L<open3>, and L<perlipc/"Bidirectional Communication"> +See L<IPC::Open2>, L<IPC::Open3>, and L<perlipc/"Bidirectional Communication"> for examples of such things. =item pop ARRAY +=item pop + Pops and returns the last value of the array, shortening the array by 1. Has a similar effect to @@ -1966,8 +2240,13 @@ like shift(). =item pos SCALAR +=item pos + Returns the offset of where the last C<m//g> search left off for the variable -in question. May be modified to change that offset. +is in question ($_ is used when the variable is not specified). May be +modified to change that offset. Such modification will also influence +the C<\G> zero-width assertion in regular expressions. See L<perlre> and +L<perlop>. =item print FILEHANDLE LIST @@ -1980,9 +2259,9 @@ if successful. FILEHANDLE may be a scalar variable name, in which case the variable contains the name of or a reference to the filehandle, thus introducing one level of indirection. (NOTE: If FILEHANDLE is a variable and the next token is a term, it may be misinterpreted as an operator unless you -interpose a + or put parens around the arguments.) If FILEHANDLE is +interpose a + or put parentheses around the arguments.) If FILEHANDLE is omitted, prints by default to standard output (or to the last selected -output channel--see select()). If LIST is also omitted, prints $_ to +output channel--see L</select>). If LIST is also omitted, prints $_ to STDOUT. To set the default output channel to something other than STDOUT use the select operation. Note that, because print takes a LIST, anything in the LIST is evaluated in a list context, and any @@ -1990,20 +2269,32 @@ subroutine that you call will have one or more of its expressions evaluated in a list context. Also be careful not to follow the print keyword with a left parenthesis unless you want the corresponding right parenthesis to terminate the arguments to the print--interpose a + or -put parens around all the arguments. +put parentheses around all the arguments. Note that if you're storing FILEHANDLES in an array or other expression, -you will have to use a block returning its value instead +you will have to use a block returning its value instead: print { $files[$i] } "stuff\n"; print { $OK ? STDOUT : STDERR } "stuff\n"; -=item printf FILEHANDLE LIST +=item printf FILEHANDLE FORMAT, LIST -=item printf LIST +=item printf FORMAT, LIST -Equivalent to a "print FILEHANDLE sprintf(LIST)". The first argument -of the list will be interpreted as the printf format. +Equivalent to C<print FILEHANDLE sprintf(FORMAT, LIST)>. The first argument +of the list will be interpreted as the printf format. If C<use locale> is +in effect, the character used for the decimal point in formatted real numbers +is affected by the LC_NUMERIC locale. See L<perllocale>. + +Don't fall into the trap of using a printf() when a simple +print() would do. The print() is more efficient, and less +error prone. + +=item prototype FUNCTION + +Returns the prototype of a function as a string (or C<undef> if the +function has no prototype). FUNCTION is a reference to, or the name of, +the function whose prototype you want to retrieve. =item push ARRAY,LIST @@ -2029,25 +2320,29 @@ Generalized quotes. See L<perlop>. =item quotemeta EXPR -Returns the value of EXPR with with all regular expression -metacharacters backslashed. This is the internal function implementing +=item quotemeta + +Returns the value of EXPR with all non-alphanumeric +characters backslashed. (That is, all characters not matching +C</[A-Za-z_0-9]/> will be preceded by a backslash in the +returned string, regardless of any locale settings.) +This is the internal function implementing the \Q escape in double-quoted strings. +If EXPR is omitted, uses $_. + =item rand EXPR =item rand Returns a random fractional number between 0 and the value of EXPR. -(EXPR should be positive.) If EXPR is omitted, returns a value between -0 and 1. This function produces repeatable sequences unless srand() -is invoked. See also srand(). +(EXPR should be positive.) If EXPR is omitted, returns a value between +0 and 1. Automatically calls srand() unless srand() has already been +called. See also srand(). -(Note: if your rand function consistently returns numbers that are too +(Note: If your rand function consistently returns numbers that are too large or too small, then your version of Perl was probably compiled -with the wrong number of RANDBITS. As a workaround, you can usually -multiply EXPR by the correct power of 2 to get the range you want. -This will make your script unportable, however. It's better to recompile -if you can.) +with the wrong number of RANDBITS.) =item read FILEHANDLE,SCALAR,LENGTH,OFFSET @@ -2069,7 +2364,7 @@ directory. If there are no more entries, returns an undefined value in a scalar context or a null list in a list context. If you're planning to filetest the return values out of a readdir(), you'd -better prepend the directory in question. Otherwise, since we didn't +better prepend the directory in question. Otherwise, because we didn't chdir() there, it would have been testing the wrong file. opendir(DIR, $some_dir) || die "can't opendir $some_dir: $!"; @@ -2078,9 +2373,11 @@ chdir() there, it would have been testing the wrong file. =item readlink EXPR +=item readlink + Returns the value of a symbolic link, if symbolic links are implemented. If not, gives a fatal error. If there is some system -error, returns the undefined value and sets $! (errno). If EXPR is +error, returns the undefined value and sets C<$!> (errno). If EXPR is omitted, uses $_. =item recv SOCKET,SCALAR,LEN,FLAGS @@ -2090,7 +2387,7 @@ data into variable SCALAR from the specified SOCKET filehandle. Actually does a C recvfrom(), so that it can returns the address of the sender. Returns the undefined value if there's an error. SCALAR will be grown or shrunk to the length actually read. Takes the same flags -as the system call of the same name. +as the system call of the same name. See L<perlipc/"UDP: Message Passing"> for examples. =item redo LABEL @@ -2122,8 +2419,11 @@ themselves about what was just input: =item ref EXPR -Returns a TRUE value if EXPR is a reference, FALSE otherwise. The value -returned depends on the type of thing the reference is a reference to. +=item ref + +Returns a TRUE value if EXPR is a reference, FALSE otherwise. If EXPR +is not specified, $_ will be used. The value returned depends on the +type of thing the reference is a reference to. Builtin types include: REF @@ -2133,22 +2433,22 @@ Builtin types include: CODE GLOB -If the referenced object has been blessed into a package, then that package +If the referenced object has been blessed into a package, then that package name is returned instead. You can think of ref() as a typeof() operator. if (ref($r) eq "HASH") { - print "r is a reference to an associative array.\n"; - } + print "r is a reference to a hash.\n"; + } if (!ref ($r) { print "r is not a reference at all.\n"; - } + } See also L<perlref>. =item rename OLDNAME,NEWNAME Changes the name of a file. Returns 1 for success, 0 otherwise. Will -not work across filesystem boundaries. +not work across file system boundaries. =item require EXPR @@ -2156,7 +2456,7 @@ not work across filesystem boundaries. Demands some semantics specified by EXPR, or by $_ if EXPR is not supplied. If EXPR is numeric, demands that the current version of Perl -($] or $PERL_VERSION) be equal or greater than EXPR. +(C<$]> or $PERL_VERSION) be equal or greater than EXPR. Otherwise, demands that a library file be included if it hasn't already been included. The file is included via the do-FILE mechanism, which is @@ -2190,11 +2490,12 @@ end such a file with "1;" unless you're sure it'll return TRUE otherwise. But it's better just to put the "C<1;>", in case you add more statements. -If EXPR is a bare word, the require assumes a "F<.pm>" extension for you, -to make it easy to load standard modules. This form of loading of +If EXPR is a bareword, the require assumes a "F<.pm>" extension and +replaces "F<::>" with "F</>" in the filename for you, +to make it easy to load standard modules. This form of loading of modules does not risk altering your namespace. -For a yet-more-powerful import facility, see the L</use()> and +For a yet-more-powerful import facility, see L</use> and L<perlmod>. =item reset EXPR @@ -2206,36 +2507,52 @@ variables and reset ?? searches so that they work again. The expression is interpreted as a list of single characters (hyphens allowed for ranges). All variables and arrays beginning with one of those letters are reset to their pristine state. If the expression is -omitted, one-match searches (?pattern?) are reset to match again. Only -resets variables or searches in the current package. Always returns +omitted, one-match searches (?pattern?) are reset to match again. Resets +only variables or searches in the current package. Always returns 1. Examples: reset 'X'; # reset all X variables reset 'a-z'; # reset lower case variables reset; # just reset ?? searches -Resetting "A-Z" is not recommended since you'll wipe out your -ARGV and ENV arrays. Only resets package variables--lexical variables +Resetting "A-Z" is not recommended because you'll wipe out your +ARGV and ENV arrays. Resets only package variables--lexical variables are unaffected, but they clean themselves up on scope exit anyway, -so anymore you probably want to use them instead. See L</my>. +so you'll probably want to use them instead. See L</my>. -=item return LIST +=item return EXPR -Returns from a subroutine or eval with the value specified. (Note that -in the absence of a return a subroutine or eval() will automatically -return the value of the last expression evaluated.) +=item return + +Returns from a subroutine, eval(), or do FILE with the value of the +given EXPR. Evaluation of EXPR may be in a list, scalar, or void +context, depending on how the return value will be used, and the context +may vary from one execution to the next (see wantarray()). If no EXPR +is given, returns an empty list in a list context, an undefined value in +a scalar context, or nothing in a void context. + +(Note that in the absence of a return, a subroutine, eval, or do FILE +will automatically return the value of the last expression evaluated.) =item reverse LIST In a list context, returns a list value consisting of the elements -of LIST in the opposite order. In a scalar context, returns a string -value consisting of the bytes of the first element of LIST in the -opposite order. +of LIST in the opposite order. In a scalar context, concatenates the +elements of LIST, and returns a string value consisting of those bytes, +but in the opposite order. - print reverse <>; # line tac + print reverse <>; # line tac, last line first - undef $/; - print scalar reverse scalar <>; # byte tac + undef $/; # for efficiency of <> + print scalar reverse <>; # byte tac, last line tsrif + +This operator is also handy for inverting a hash, although there are some +caveats. If a value is duplicated in the original hash, only one of those +can be represented as a key in the inverted hash. Also, this has to +unwind one hash and build a whole new one, which may take some time +on a large hash. + + %by_name = reverse %by_address; # Invert the hash =item rewinddir DIRHANDLE @@ -2252,8 +2569,10 @@ last occurrence at or before that position. =item rmdir FILENAME +=item rmdir + Deletes the directory specified by FILENAME if it is empty. If it -succeeds it returns 1, otherwise it returns 0 and sets $! (errno). If +succeeds it returns 1, otherwise it returns 0 and sets C<$!> (errno). If FILENAME is omitted, uses $_. =item s/// @@ -2263,11 +2582,11 @@ The substitution operator. See L<perlop>. =item scalar EXPR Forces EXPR to be interpreted in a scalar context and returns the value -of EXPR. +of EXPR. @counts = ( scalar @a, scalar @b, scalar @c ); -There is no equivalent operator to force an expression to +There is no equivalent operator to force an expression to be interpolated in a list context because it's in practice never needed. If you really wanted to do so, however, you could use the construction C<@{[ (some expression) ]}>, but usually a simple @@ -2275,26 +2594,30 @@ C<(some expression)> suffices. =item seek FILEHANDLE,POSITION,WHENCE -Randomly positions the file pointer for FILEHANDLE, just like the fseek() -call of stdio. FILEHANDLE may be an expression whose value gives the name -of the filehandle. The values for WHENCE are 0 to set the file pointer to -POSITION, 1 to set the it to current plus POSITION, and 2 to set it to EOF -plus offset. You may use the values SEEK_SET, SEEK_CUR, and SEEK_END for -this from POSIX module. Returns 1 upon success, 0 otherwise. +Sets FILEHANDLE's position, just like the fseek() call of stdio. +FILEHANDLE may be an expression whose value gives the name of the +filehandle. The values for WHENCE are 0 to set the new position to +POSITION, 1 to set it to the current position plus POSITION, and 2 to +set it to EOF plus POSITION (typically negative). For WHENCE you may +use the constants SEEK_SET, SEEK_CUR, and SEEK_END from either the +IO::Seekable or the POSIX module. Returns 1 upon success, 0 otherwise. + +If you want to position file for sysread() or syswrite(), don't use +seek() -- buffering makes its effect on the file's system position +unpredictable and non-portable. Use sysseek() instead. On some systems you have to do a seek whenever you switch between reading and writing. Amongst other things, this may have the effect of calling -stdio's clearerr(3). A "whence" of 1 (SEEK_CUR) is useful for not moving -the file pointer: +stdio's clearerr(3). A WHENCE of 1 (SEEK_CUR) is useful for not moving +the file position: seek(TEST,0,1); This is also useful for applications emulating C<tail -f>. Once you hit EOF on your read, and then sleep for a while, you might have to stick in a -seek() to reset things. First the simple trick listed above to clear the -filepointer. The seek() doesn't change the current position, but it -I<does> clear the end-of-file condition on the handle, so that the next -C<E<lt>FILEE<gt>> makes Perl try again to read something. Hopefully. +seek() to reset things. The seek() doesn't change the current position, +but it I<does> clear the end-of-file condition on the handle, so that the +next C<E<lt>FILEE<gt>> makes Perl try again to read something. We hope. If that doesn't work (some stdios are particularly cantankerous), then you may need something more like this: @@ -2339,12 +2662,12 @@ actual filehandle. Thus: Some programmers may prefer to think of filehandles as objects with methods, preferring to write the last example as: - use FileHandle; + use IO::Handle; STDERR->autoflush(1); =item select RBITS,WBITS,EBITS,TIMEOUT -This calls the select(2) system call with the bitmasks specified, which +This calls the select(2) system call with the bit masks specified, which can be constructed using fileno() and vec(), along these lines: $rin = $win = $ein = ''; @@ -2370,23 +2693,23 @@ The usual idiom is: ($nfound,$timeleft) = select($rout=$rin, $wout=$win, $eout=$ein, $timeout); -or to block until something becomes ready just do this +or to block until something becomes ready just do this $nfound = select($rout=$rin, $wout=$win, $eout=$ein, undef); -Most systems do not both to return anything useful in $timeleft, so +Most systems do not bother to return anything useful in $timeleft, so calling select() in a scalar context just returns $nfound. -Any of the bitmasks can also be undef. The timeout, if specified, is +Any of the bit masks can also be undef. The timeout, if specified, is in seconds, which may be fractional. Note: not all implementations are capable of returning the $timeleft. If not, they always return $timeleft equal to the supplied $timeout. -You can effect a 250-microsecond sleep this way: +You can effect a sleep of 250 milliseconds this way: select(undef, undef, undef, 0.25); -B<WARNING>: Do not attempt to mix buffered I/O (like read() or <FH>) +B<WARNING>: Do not attempt to mix buffered I/O (like read() or E<lt>FHE<gt>) with select(). You have to use sysread() instead. =item semctl ID,SEMNUM,CMD,ARG @@ -2432,7 +2755,9 @@ See L<perlipc/"UDP: Message Passing"> for examples. Sets the current process group for the specified PID, 0 for the current process. Will produce a fatal error if used on a machine that doesn't -implement setpgrp(2). +implement setpgrp(2). If the arguments are omitted, it defaults to +0,0. Note that the POSIX version of setpgrp() does not accept any +arguments, so only setpgrp 0,0 is portable. =item setpriority WHICH,WHO,PRIORITY @@ -2456,7 +2781,7 @@ array, returns the undefined value. If ARRAY is omitted, shifts the @ARGV array in the main program, and the @_ array in subroutines. (This is determined lexically.) See also unshift(), push(), and pop(). Shift() and unshift() do the same thing to the left end of an array -that push() and pop() do to the right end. +that pop() and push() do to the right end. =item shmctl ID,CMD,ARG @@ -2488,9 +2813,16 @@ has the same interpretation as in the system call of the same name. =item sin EXPR +=item sin + Returns the sine of EXPR (expressed in radians). If EXPR is omitted, returns sine of $_. +For the inverse sine operation, you may use the POSIX::asin() +function, or use this relation: + + sub asin { atan2($_[0], sqrt(1 - $_[0] * $_[0])) } + =item sleep EXPR =item sleep @@ -2498,27 +2830,29 @@ returns sine of $_. Causes the script to sleep for EXPR seconds, or forever if no EXPR. May be interrupted by sending the process a SIGALRM. Returns the number of seconds actually slept. You probably cannot mix alarm() and -sleep() calls, since sleep() is often implemented using alarm(). +sleep() calls, because sleep() is often implemented using alarm(). On some older systems, it may sleep up to a full second less than what you requested, depending on how it counts seconds. Most modern systems always sleep the full amount. For delays of finer granularity than one second, you may use Perl's -syscall() interface to access setitimer(2) if your system supports it, -or else see L</select()> below. +syscall() interface to access setitimer(2) if your system supports it, +or else see L</select()> below. + +See also the POSIX module's sigpause() function. =item socket SOCKET,DOMAIN,TYPE,PROTOCOL Opens a socket of the specified kind and attaches it to filehandle -SOCKET. DOMAIN, TYPE and PROTOCOL are specified the same as for the +SOCKET. DOMAIN, TYPE, and PROTOCOL are specified the same as for the system call of the same name. You should "use Socket;" first to get the proper definitions imported. See the example in L<perlipc/"Sockets: Client/Server Communication">. =item socketpair SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL Creates an unnamed pair of sockets in the specified domain, of the -specified type. DOMAIN, TYPE and PROTOCOL are specified the same as +specified type. DOMAIN, TYPE, and PROTOCOL are specified the same as for the system call of the same name. If unimplemented, yields a fatal error. Returns TRUE if successful. @@ -2528,16 +2862,15 @@ error. Returns TRUE if successful. =item sort LIST -Sorts the LIST and returns the sorted list value. Nonexistent values -of arrays are stripped out. If SUBNAME or BLOCK is omitted, sorts -in standard string comparison order. If SUBNAME is specified, it -gives the name of a subroutine that returns an integer less than, equal -to, or greater than 0, depending on how the elements of the array are -to be ordered. (The <=> and cmp operators are extremely useful in such -routines.) SUBNAME may be a scalar variable name, in which case the -value provides the name of the subroutine to use. In place of a -SUBNAME, you can provide a BLOCK as an anonymous, in-line sort -subroutine. +Sorts the LIST and returns the sorted list value. If SUBNAME or BLOCK +is omitted, sorts in standard string comparison order. If SUBNAME is +specified, it gives the name of a subroutine that returns an integer +less than, equal to, or greater than 0, depending on how the elements +of the array are to be ordered. (The C<E<lt>=E<gt>> and C<cmp> +operators are extremely useful in such routines.) SUBNAME may be a +scalar variable name, in which case the value provides the name of the +subroutine to use. In place of a SUBNAME, you can provide a BLOCK as +an anonymous, in-line sort subroutine. In the interests of efficiency the normal calling code for subroutines is bypassed, with the following effects: the subroutine may not be a @@ -2546,6 +2879,12 @@ the subroutine not via @_ but as the package global variables $a and $b (see example below). They are passed by reference, so don't modify $a and $b. And don't try to declare them as lexicals either. +You also cannot exit out of the sort block or subroutine using any of the +loop control operators described in L<perlsyn> or with goto(). + +When C<use locale> is in effect, C<sort LIST> sorts LIST according to the +current collation locale. See L<perllocale>. + Examples: # sort lexically @@ -2555,7 +2894,7 @@ Examples: @articles = sort {$a cmp $b} @files; # now case-insensitively - @articles = sort { uc($a) cmp uc($b)} @files; + @articles = sort {uc($a) cmp uc($b)} @files; # same thing in reversed order @articles = sort {$b cmp $a} @files; @@ -2568,12 +2907,12 @@ Examples: # sort using explicit subroutine name sub byage { - $age{$a} <=> $age{$b}; # presuming integers + $age{$a} <=> $age{$b}; # presuming numeric } @sortedclass = sort byage @class; - # this sorts the %age associative arrays by value - # instead of key using an inline function + # this sorts the %age hash by value instead of key + # using an in-line function @eldest = sort { $age{$b} <=> $age{$a} } keys %age; sub backwards { $b cmp $a; } @@ -2586,8 +2925,8 @@ Examples: print sort @george, 'to', @harry; # prints AbelAxedCainPunishedcatchaseddoggonetoxyz - # inefficiently sort by descending numeric compare using - # the first integer after the first = sign, or the + # inefficiently sort by descending numeric compare using + # the first integer after the first = sign, or the # whole record case-insensitively otherwise @new = sort { @@ -2600,10 +2939,10 @@ Examples: # we'll build auxiliary indices instead # for speed @nums = @caps = (); - for (@old) { + for (@old) { push @nums, /=(\d+)/; push @caps, uc($_); - } + } @new = @old[ sort { $nums[$b] <=> $nums[$a] @@ -2619,7 +2958,7 @@ Examples: $a->[2] cmp $b->[2] } map { [$_, /=(\d+)/, uc($_)] } @old; -If you're and using strict, you I<MUST NOT> declare $a +If you're using strict, you I<MUST NOT> declare $a and $b as lexicals. They are package globals. That means if you're in the C<main> package, it's @@ -2633,6 +2972,13 @@ but if you're in the C<FooPack> package, it's @articles = sort {$FooPack::b <=> $FooPack::a} @files; +The comparison function is required to behave. If it returns +inconsistent results (sometimes saying $x[1] is less than $x[2] and +sometimes saying the opposite, for example) the Perl interpreter will +probably crash and dump core. This is entirely due to and dependent +upon your system's qsort(3) library routine; this routine often avoids +sanity checks in the interest of speed. + =item splice ARRAY,OFFSET,LENGTH,LIST =item splice ARRAY,OFFSET,LENGTH @@ -2643,7 +2989,7 @@ Removes the elements designated by OFFSET and LENGTH from an array, and replaces them with the elements of LIST, if any. Returns the elements removed from the array. The array grows or shrinks as necessary. If LENGTH is omitted, removes everything from OFFSET onward. The -following equivalencies hold (assuming $[ == 0): +following equivalences hold (assuming C<$[ == 0>): push(@a,$x,$y) splice(@a,$#a+1,0,$x,$y) pop(@a) splice(@a,-1) @@ -2698,7 +3044,7 @@ characters at each point it matches that way. For example: produces the output 'h:i:t:h:e:r:e'. -The LIMIT parameter can be used to partially split a line +The LIMIT parameter can be used to split a line partially ($login, $passwd, $remainder) = split(/:/, $_, 3); @@ -2711,13 +3057,13 @@ into more fields than you really need. If the PATTERN contains parentheses, additional array elements are created from each matching substring in the delimiter. - split(/([,-])/, "1-10,20"); + split(/([,-])/, "1-10,20", 3); produces the list value (1, '-', 10, ',', 20) -If you had the entire header of a normal Unix email message in $header, +If you had the entire header of a normal Unix email message in $header, you could split it up into fields and their values this way: $header =~ s/\n\s+/ /g; # fix continuation lines @@ -2739,66 +3085,167 @@ Example: open(passwd, '/etc/passwd'); while (<passwd>) { - ($login, $passwd, $uid, $gid, $gcos, + ($login, $passwd, $uid, $gid, $gcos, $home, $shell) = split(/:/); ... } -(Note that $shell above will still have a newline on it. See L</chop>, +(Note that $shell above will still have a newline on it. See L</chop>, L</chomp>, and L</join>.) -=item sprintf FORMAT,LIST - -Returns a string formatted by the usual printf conventions of the C -language. See L<sprintf(3)> or L<printf(3)> on your system for details. -(The * character for an indirectly specified length is not -supported, but you can get the same effect by interpolating a variable -into the pattern.) Some C libraries' implementations of sprintf() can -dump core when fed ludicrous arguments. +=item sprintf FORMAT, LIST + +Returns a string formatted by the usual printf conventions of the +C library function sprintf(). See L<sprintf(3)> or L<printf(3)> +on your system for an explanation of the general principles. + +Perl does all of its own sprintf() formatting -- it emulates the C +function sprintf(), but it doesn't use it (except for floating-point +numbers, and even then only the standard modifiers are allowed). As a +result, any non-standard extensions in your local sprintf() are not +available from Perl. + +Perl's sprintf() permits the following universally-known conversions: + + %% a percent sign + %c a character with the given number + %s a string + %d a signed integer, in decimal + %u an unsigned integer, in decimal + %o an unsigned integer, in octal + %x an unsigned integer, in hexadecimal + %e a floating-point number, in scientific notation + %f a floating-point number, in fixed decimal notation + %g a floating-point number, in %e or %f notation + +In addition, Perl permits the following widely-supported conversions: + + %X like %x, but using upper-case letters + %E like %e, but using an upper-case "E" + %G like %g, but with an upper-case "E" (if applicable) + %p a pointer (outputs the Perl value's address in hexadecimal) + %n special: *stores* the number of characters output so far + into the next variable in the parameter list + +Finally, for backward (and we do mean "backward") compatibility, Perl +permits these unnecessary but widely-supported conversions: + + %i a synonym for %d + %D a synonym for %ld + %U a synonym for %lu + %O a synonym for %lo + %F a synonym for %f + +Perl permits the following universally-known flags between the C<%> +and the conversion letter: + + space prefix positive number with a space + + prefix positive number with a plus sign + - left-justify within the field + 0 use zeros, not spaces, to right-justify + # prefix octal with "0", hex with "0x" + number minimum field width + .number "precision": digits after decimal point for floating-point, + max length for string, minimum length for integer + l interpret integer as C type "long" or "unsigned long" + h interpret integer as C type "short" or "unsigned short" + +There is also one Perl-specific flag: + + V interpret integer as Perl's standard integer type + +Where a number would appear in the flags, an asterisk ("*") may be +used instead, in which case Perl uses the next item in the parameter +list as the given number (that is, as the field width or precision). +If a field width obtained through "*" is negative, it has the same +effect as the '-' flag: left-justification. + +If C<use locale> is in effect, the character used for the decimal +point in formatted real numbers is affected by the LC_NUMERIC locale. +See L<perllocale>. =item sqrt EXPR +=item sqrt + Return the square root of EXPR. If EXPR is omitted, returns square root of $_. =item srand EXPR -Sets the random number seed for the C<rand> operator. If EXPR is omitted, -does C<srand(time)>. Many folks use an explicit C<srand(time ^ $$)> -instead. Of course, you'd need something much more random than that for -cryptographic purposes, since it's easy to guess the current time. -Checksumming the compressed output of rapidly changing operating system -status programs is the usual method. Examples are posted regularly to -the comp.security.unix newsgroup. +=item srand + +Sets the random number seed for the C<rand> operator. If EXPR is +omitted, uses a semi-random value based on the current time and process +ID, among other things. In versions of Perl prior to 5.004 the default +seed was just the current time(). This isn't a particularly good seed, +so many old programs supply their own seed value (often C<time ^ $$> or +C<time ^ ($$ + ($$ << 15))>), but that isn't necessary any more. + +In fact, it's usually not necessary to call srand() at all, because if +it is not called explicitly, it is called implicitly at the first use of +the C<rand> operator. However, this was not the case in version of Perl +before 5.004, so if your script will run under older Perl versions, it +should call srand(). + +Note that you need something much more random than the default seed for +cryptographic purposes. Checksumming the compressed output of one or more +rapidly changing operating system status programs is the usual method. For +example: + + srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`); + +If you're particularly concerned with this, see the Math::TrulyRandom +module in CPAN. + +Do I<not> call srand() multiple times in your program unless you know +exactly what you're doing and why you're doing it. The point of the +function is to "seed" the rand() function so that rand() can produce +a different sequence each time you run your program. Just do it once at the +top of your program, or you I<won't> get random numbers out of rand()! + +Frequently called programs (like CGI scripts) that simply use + + time ^ $$ + +for a seed can fall prey to the mathematical property that + + a^b == (a+1)^(b+1) + +one-third of the time. So don't do that. =item stat FILEHANDLE =item stat EXPR +=item stat + Returns a 13-element array giving the status info for a file, either the -file opened via FILEHANDLE, or named by EXPR. Returns a null list if -the stat fails. Typically used as follows: +file opened via FILEHANDLE, or named by EXPR. If EXPR is omitted, it +stats $_. Returns a null list if the stat fails. Typically used as +follows: + ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($filename); -Not all fields are supported on all filesystem types. Here are the +Not all fields are supported on all filesystem types. Here are the meaning of the fields: - dev device number of filesystem - ino inode number - mode file mode (type and permissions) - nlink number of (hard) links to the file - uid numeric user ID of file's owner - gid numer group ID of file's owner - rdev the device identifier (special files only) - size total size of file, in bytes - atime last access time since the epoch - mtime last modify time since the epoch - ctime inode change time (NOT creation type!) since the epoch - blksize preferred blocksize for file system I/O - blocks actual number of blocks allocated + 0 dev device number of filesystem + 1 ino inode number + 2 mode file mode (type and permissions) + 3 nlink number of (hard) links to the file + 4 uid numeric user ID of file's owner + 5 gid numeric group ID of file's owner + 6 rdev the device identifier (special files only) + 7 size total size of file, in bytes + 8 atime last access time since the epoch + 9 mtime last modify time since the epoch + 10 ctime inode change time (NOT creation time!) since the epoch + 11 blksize preferred block size for file system I/O + 12 blocks actual number of blocks allocated (The epoch was at 00:00 January 1, 1970 GMT.) @@ -2810,21 +3257,21 @@ last stat or filetest are returned. Example: print "$file is executable NFS file\n"; } -(This only works on machines for which the device number is negative under NFS.) +(This works on machines only for which the device number is negative under NFS.) =item study SCALAR =item study -Takes extra time to study SCALAR ($_ if unspecified) in anticipation of +Takes extra time to study SCALAR (C<$_> if unspecified) in anticipation of doing many pattern matches on the string before it is next modified. This may or may not save time, depending on the nature and number of patterns you are searching on, and on the distribution of character -frequencies in the string to be searched--you probably want to compare -runtimes with and without it to see which runs faster. Those loops +frequencies in the string to be searched -- you probably want to compare +run times with and without it to see which runs faster. Those loops which scan for many short constant strings (including the constant parts of more complex patterns) will benefit most. You may have only -one study active at a time--if you study a different scalar the first +one study active at a time -- if you study a different scalar the first is "unstudied". (The way study works is this: a linked list of every character in the string to be searched is made, so we know, for example, where all the 'k' characters are. From each search string, @@ -2855,7 +3302,7 @@ runtime, you can build an entire loop as a string and eval that to avoid recompiling all your patterns all the time. Together with undefining $/ to input entire files as one record, this can be very fast, often faster than specialized programs like fgrep(1). The following -scans a list of files (@files) for a list of words (@words), and prints +scans a list of files (C<@files>) for a list of words (C<@words>), and prints out the names of those files that contain a match: $search = 'while (<>) { study;'; @@ -2866,7 +3313,7 @@ out the names of those files that contain a match: @ARGV = @files; undef $/; eval $search; # this screams - $/ = "\n"; # put back to normal input delim + $/ = "\n"; # put back to normal input delimiter foreach $file (sort keys(%seen)) { print $file, "\n"; } @@ -2880,7 +3327,7 @@ out the names of those files that contain a match: This is subroutine definition, not a real function I<per se>. With just a NAME (and possibly prototypes), it's just a forward declaration. Without a NAME, it's an anonymous function declaration, and does actually return a -value: the CODE ref of the closure you just created. See L<perlsub> and +value: the CODE ref of the closure you just created. See L<perlsub> and L<perlref> for details. =item substr EXPR,OFFSET,LEN @@ -2888,7 +3335,8 @@ L<perlref> for details. =item substr EXPR,OFFSET Extracts a substring out of EXPR and returns it. First character is at -offset 0, or whatever you've set $[ to. If OFFSET is negative, starts +offset 0, or whatever you've set C<$[> to (but don't do that). +If OFFSET is negative, starts that far from the end of the string. If LEN is omitted, returns everything to the end of the string. If LEN is negative, leaves that many characters off the end of the string. @@ -2907,7 +3355,7 @@ Returns 1 for success, 0 otherwise. On systems that don't support symbolic links, produces a fatal error at run time. To check for that, use eval: - $symlink_exists = (eval 'symlink("","");', $@ eq ''); + $symlink_exists = (eval {symlink("","")};, $@ eq ''); =item syscall LIST @@ -2925,7 +3373,7 @@ like numbers. require 'syscall.ph'; # may need to run h2ph syscall(&SYS_write, fileno(STDOUT), "hi there\n", 9); -Note that Perl only supports passing of up to 14 arguments to your system call, +Note that Perl supports passing of up to only 14 arguments to your system call, which in practice should usually suffice. =item sysopen FILEHANDLE,FILENAME,MODE @@ -2949,17 +3397,44 @@ the value of PERMS specifies the permissions of the newly created file. If PERMS is omitted, the default value is 0666, which allows read and write for all. This default is reasonable: see C<umask>. +The IO::File module provides a more object-oriented approach, if you're +into that kind of thing. + =item sysread FILEHANDLE,SCALAR,LENGTH,OFFSET =item sysread FILEHANDLE,SCALAR,LENGTH Attempts to read LENGTH bytes of data into variable SCALAR from the -specified FILEHANDLE, using the system call read(2). It bypasses -stdio, so mixing this with other kinds of reads may cause confusion. -Returns the number of bytes actually read, or undef if there was an -error. SCALAR will be grown or shrunk to the length actually read. An -OFFSET may be specified to place the read data at some other place than -the beginning of the string. +specified FILEHANDLE, using the system call read(2). It bypasses stdio, +so mixing this with other kinds of reads, print(), write(), seek(), or +tell() can cause confusion. Returns the number of bytes actually read, +or undef if there was an error. SCALAR will be grown or shrunk so that +the last byte actually read is the last byte of the scalar after the +read. + +An OFFSET may be specified to place the read data at some place in the +string other than the beginning. A negative OFFSET specifies +placement at that many bytes counting backwards from the end of the +string. A positive OFFSET greater than the length of SCALAR results +in the string being padded to the required size with "\0" bytes before +the result of the read is appended. + +=item sysseek FILEHANDLE,POSITION,WHENCE + +Sets FILEHANDLE's system position using the system call lseek(2). It +bypasses stdio, so mixing this with reads (other than sysread()), +print(), write(), seek(), or tell() may cause confusion. FILEHANDLE may +be an expression whose value gives the name of the filehandle. The +values for WHENCE are 0 to set the new position to POSITION, 1 to set +the it to the current position plus POSITION, and 2 to set it to EOF +plus POSITION (typically negative). For WHENCE, you may use the +constants SEEK_SET, SEEK_CUR, and SEEK_END from either the IO::Seekable +or the POSIX module. + +Returns the new position, or the undefined value on failure. A position +of zero is returned as the string "0 but true"; thus sysseek() returns +TRUE on success and FALSE on failure, yet you can still easily determine +the new position. =item system LIST @@ -2968,9 +3443,42 @@ first, and the parent process waits for the child process to complete. Note that argument processing varies depending on the number of arguments. The return value is the exit status of the program as returned by the wait() call. To get the actual exit value divide by -256. See also L</exec>. This is I<NOT> what you want to use to capture -the output from a command, for that you should merely use backticks, as -described in L<perlop/"`STRING`">. +256. See also L</exec>. This is I<NOT> what you want to use to capture +the output from a command, for that you should use merely backticks or +qx//, as described in L<perlop/"`STRING`">. + +Because system() and backticks block SIGINT and SIGQUIT, killing the +program they're running doesn't actually interrupt your program. + + @args = ("command", "arg1", "arg2"); + system(@args) == 0 + or die "system @args failed: $?" + +Here's a more elaborate example of analysing the return value from +system() on a Unix system to check for all possibilities, including for +signals and core dumps. + + $rc = 0xffff & system @args; + printf "system(%s) returned %#04x: ", "@args", $rc; + if ($rc == 0) { + print "ran with normal exit\n"; + } + elsif ($rc == 0xff00) { + print "command failed: $!\n"; + } + elsif ($rc > 0x80) { + $rc >>= 8; + print "ran with non-zero exit status $rc\n"; + } + else { + print "ran with "; + if ($rc & 0x80) { + $rc &= ~0x80; + print "core dump from "; + } + print "signal $rc\n" + } + $ok = ($rc != 0); =item syswrite FILEHANDLE,SCALAR,LENGTH,OFFSET @@ -2978,16 +3486,21 @@ described in L<perlop/"`STRING`">. Attempts to write LENGTH bytes of data from variable SCALAR to the specified FILEHANDLE, using the system call write(2). It bypasses -stdio, so mixing this with prints may cause confusion. Returns the -number of bytes actually written, or undef if there was an error. An -OFFSET may be specified to get the write data from some other place than -the beginning of the string. +stdio, so mixing this with reads (other than sysread()), print(), +write(), seek(), or tell() may cause confusion. Returns the number of +bytes actually written, or undef if there was an error. If the length +is greater than the available data, only as much data as is available +will be written. + +An OFFSET may be specified to write the data from some part of the +string other than the beginning. A negative OFFSET specifies writing +that many bytes counting backwards from the end of the string. =item tell FILEHANDLE =item tell -Returns the current file position for FILEHANDLE. FILEHANDLE may be an +Returns the current position for FILEHANDLE. FILEHANDLE may be an expression whose value gives the name of the actual filehandle. If FILEHANDLE is omitted, assumes the file last read. @@ -3016,14 +3529,13 @@ use the each() function to iterate over such. Example: # print out history file offsets use NDBM_File; - tie(%HIST, NDBM_File, '/usr/lib/news/history', 1, 0); + tie(%HIST, 'NDBM_File', '/usr/lib/news/history', 1, 0); while (($key,$val) = each %HIST) { print $key, ' = ', unpack('L',$val), "\n"; } untie(%HIST); -A class implementing an associative array should have the following -methods: +A class implementing a hash should have the following methods: TIEHASH classname, LIST DESTROY this @@ -3046,7 +3558,7 @@ A class implementing a scalar should have the following methods: TIESCALAR classname, LIST DESTROY this - FETCH this, + FETCH this, STORE this, value Unlike dbmopen(), the tie() function will not use or require a module @@ -3062,8 +3574,10 @@ package. =item time -Returns the number of non-leap seconds since 00:00:00 UTC, January 1, -1970. Suitable for feeding to gmtime() and localtime(). +Returns the number of non-leap seconds since whatever time the system +considers to be the epoch (that's 00:00:00, January 1, 1904 for MacOS, +and 00:00:00 UTC, January 1, 1970 for most other systems). +Suitable for feeding to gmtime() and localtime(). =item times @@ -3074,7 +3588,7 @@ seconds, for this process and the children of this process. =item tr/// -The translation operator. See L<perlop>. +The translation operator. Same as y///. See L<perlop>. =item truncate FILEHANDLE,LENGTH @@ -3086,44 +3600,59 @@ on your system. =item uc EXPR +=item uc + Returns an uppercased version of EXPR. This is the internal function implementing the \U escape in double-quoted strings. -Should respect any POSIX setlocale() settings. +Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. + +If EXPR is omitted, uses $_. =item ucfirst EXPR +=item ucfirst + Returns the value of EXPR with the first character uppercased. This is the internal function implementing the \u escape in double-quoted strings. -Should respect any POSIX setlocale() settings. +Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. + +If EXPR is omitted, uses $_. =item umask EXPR =item umask -Sets the umask for the process and returns the old one. If EXPR is -omitted, merely returns current umask. +Sets the umask for the process to EXPR and returns the previous value. +If EXPR is omitted, merely returns the current umask. Remember that a +umask is a number, usually given in octal; it is I<not> a string of octal +digits. See also L</oct>, if all you have is a string. =item undef EXPR =item undef Undefines the value of EXPR, which must be an lvalue. Use only on a -scalar value, an entire array, or a subroutine name (using "&"). (Using undef() -will probably not do what you expect on most predefined variables or -DBM list values, so don't do that.) Always returns the undefined value. You can omit -the EXPR, in which case nothing is undefined, but you still get an -undefined value that you could, for instance, return from a -subroutine. Examples: +scalar value, an entire array, an entire hash, or a subroutine name (using +"&"). (Using undef() will probably not do what you expect on most +predefined variables or DBM list values, so don't do that.) Always +returns the undefined value. You can omit the EXPR, in which case +nothing is undefined, but you still get an undefined value that you +could, for instance, return from a subroutine, assign to a variable or +pass as a parameter. Examples: undef $foo; - undef $bar{'blurfl'}; + undef $bar{'blurfl'}; # Compare to: delete $bar{'blurfl'}; undef @ary; - undef %assoc; + undef %hash; undef &mysub; - return (wantarray ? () : undef) if $they_blew_it; + return (wantarray ? (undef, $errmsg) : undef) if $they_blew_it; + select undef, undef, undef, 0.25; + ($a, $b, undef, $c) = &foo; # Ignore third value returned =item unlink LIST +=item unlink + Deletes a list of files. Returns the number of files successfully deleted. @@ -3136,11 +3665,13 @@ the B<-U> flag is supplied to Perl. Even if these conditions are met, be warned that unlinking a directory can inflict damage on your filesystem. Use rmdir instead. +If LIST is omitted, uses $_. + =item unpack TEMPLATE,EXPR Unpack does the reverse of pack: it takes a string representing a structure and expands it out into a list value, returning the array -value. (In a scalar context, it merely returns the first value +value. (In a scalar context, it returns merely the first value produced.) The TEMPLATE has the same format as in the pack function. Here's a subroutine that does substring: @@ -3153,8 +3684,8 @@ and then there's sub ordinal { unpack("c",$_[0]); } # same as ord() -In addition, you may prefix a field with a %<number> to indicate that -you want a <number>-bit checksum of the items instead of the items +In addition, you may prefix a field with a %E<lt>numberE<gt> to indicate that +you want a E<lt>numberE<gt>-bit checksum of the items instead of the items themselves. Default is a 16-bit checksum. For example, the following computes the same number as the System V sum program: @@ -3187,12 +3718,26 @@ reverse. =item use Module +=item use Module VERSION LIST + +=item use VERSION + Imports some semantics into the current package from the named module, generally by aliasing certain subroutine or variable names into your package. It is exactly equivalent to BEGIN { require Module; import Module LIST; } +except that Module I<must> be a bareword. + +If the first argument to C<use> is a number, it is treated as a version +number instead of a module name. If the version of the Perl interpreter +is less than VERSION, then an error message is printed and Perl exits +immediately. This is often useful if you need to check the current +Perl version before C<use>ing library modules which have changed in +incompatible ways from older versions of Perl. (We try not to do +this more than we have to.) + The BEGIN forces the require and import to happen at compile time. The require makes sure the module is loaded into memory if it hasn't been yet. The import is not a builtin--it's just an ordinary static method @@ -3200,7 +3745,9 @@ call into the "Module" package to tell the module to import the list of features back into the current package. The module can implement its import method any way it likes, though most modules just choose to derive their import method via inheritance from the Exporter class that -is defined in the Exporter module. See L<Exporter>. +is defined in the Exporter module. See L<Exporter>. If no import +method can be found then the error is currently silently ignored. This +may change to a fatal error in a future version. If you don't want your namespace altered, explicitly supply an empty list: @@ -3210,6 +3757,13 @@ That is exactly equivalent to BEGIN { require Module; } +If the VERSION argument is present between Module and LIST, then the +C<use> will call the VERSION method in class Module with the given +version as an argument. The default VERSION method, inherited from +the Universal class, croaks if the given version is larger than the +value of the variable $Module::VERSION. (Note that there is not a +comma after VERSION!) + Because this is a wide-open interface, pragmas (compiler directives) are also implemented this way. Currently implemented pragmas are: @@ -3219,16 +3773,18 @@ are also implemented this way. Currently implemented pragmas are: use strict qw(subs vars refs); use subs qw(afunc blurfl); -These pseudomodules import semantics into the current block scope, unlike +These pseudo-modules import semantics into the current block scope, unlike ordinary modules, which import symbols into the current package (which are effective through the end of the file). There's a corresponding "no" command that unimports meanings imported -by use. +by use, i.e., it calls C<unimport Module LIST> instead of C<import>. no integer; no strict 'refs'; +If no unimport method can be found the call fails with a fatal error. + See L<perlmod> for a list of standard modules and pragmas. =item utime LIST @@ -3243,27 +3799,27 @@ to the current time. Example of a "touch" command: $now = time; utime $now, $now, @ARGV; -=item values ASSOC_ARRAY +=item values HASH -Returns a normal array consisting of all the values of the named -associative array. (In a scalar context, returns the number of -values.) The values are returned in an apparently random order, but it -is the same order as either the keys() or each() function would produce -on the same array. See also keys(), each(), and sort(). +Returns a normal array consisting of all the values of the named hash. +(In a scalar context, returns the number of values.) The values are +returned in an apparently random order, but it is the same order as either +the keys() or each() function would produce on the same hash. As a side +effect, it resets HASH's iterator. See also keys(), each(), and sort(). =item vec EXPR,OFFSET,BITS Treats the string in EXPR as a vector of unsigned integers, and -returns the value of the bitfield specified by OFFSET. BITS specifies +returns the value of the bit field specified by OFFSET. BITS specifies the number of bits that are reserved for each entry in the bit -vector. This must be a power of two from 1 to 32. vec() may also be -assigned to, in which case parens are needed to give the expression +vector. This must be a power of two from 1 to 32. vec() may also be +assigned to, in which case parentheses are needed to give the expression the correct precedence as in vec($image, $max_x * $x + $y, 8) = 3; Vectors created with vec() can also be manipulated with the logical -operators |, & and ^, which will assume a bit vector operation is +operators |, &, and ^, which will assume a bit vector operation is desired when both operands are strings. To transform a bit vector into a string or array of 0's and 1's, use these: @@ -3277,20 +3833,20 @@ If you know the exact length in bits, it can be used in place of the *. Waits for a child process to terminate and returns the pid of the deceased process, or -1 if there are no child processes. The status is -returned in $?. +returned in C<$?>. =item waitpid PID,FLAGS Waits for a particular child process to terminate and returns the pid of the deceased process, or -1 if there is no such child process. The -status is returned in $?. If you say +status is returned in C<$?>. If you say - use POSIX "wait_h"; + use POSIX ":sys_wait_h"; ... waitpid(-1,&WNOHANG); then you can do a non-blocking wait for any process. Non-blocking wait -is only available on machines supporting either the waitpid(2) or +is available on machines supporting either the waitpid(2) or wait4(2) system calls. However, waiting for a particular pid with FLAGS of 0 is implemented everywhere. (Perl emulates the system call by remembering the status values of processes that have exited but have @@ -3300,14 +3856,47 @@ not been harvested by the Perl script yet.) Returns TRUE if the context of the currently executing subroutine is looking for a list value. Returns FALSE if the context is looking -for a scalar. +for a scalar. Returns the undefined value if the context is looking +for no value (void context). - return wantarray ? () : undef; + return unless defined wantarray; # don't bother doing more + my @a = complex_calculation(); + return wantarray ? @a : "@a"; =item warn LIST -Produces a message on STDERR just like die(), but doesn't exit or -on an exception. +Produces a message on STDERR just like die(), but doesn't exit or throw +an exception. + +No message is printed if there is a C<$SIG{__WARN__}> handler +installed. It is the handler's responsibility to deal with the message +as it sees fit (like, for instance, converting it into a die()). Most +handlers must therefore make arrangements to actually display the +warnings that they are not prepared to deal with, by calling warn() +again in the handler. Note that this is quite safe and will not +produce an endless loop, since C<__WARN__> hooks are not called from +inside one. + +You will find this behavior is slightly different from that of +C<$SIG{__DIE__}> handlers (which don't suppress the error text, but can +instead call die() again to change it). + +Using a C<__WARN__> handler provides a powerful way to silence all +warnings (even the so-called mandatory ones). An example: + + # wipe out *all* compile-time warnings + BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } } + my $foo = 10; + my $foo = 20; # no warning about duplicate my $foo, + # but hey, you asked for it! + # no compile-time or run-time warnings before here + $DOWARN = 1; + + # run-time warnings enabled after here + warn "\$foo is alive and $foo!"; # does show up + +See L<perlvar> for details on setting C<%SIG> entries, and for more +examples. =item write FILEHANDLE @@ -3317,9 +3906,9 @@ on an exception. Writes a formatted record (possibly multi-line) to the specified file, using the format associated with that file. By default the format for -a file is the one having the same name is the filehandle, but the +a file is the one having the same name as the filehandle, but the format for the current output channel (see the select() function) may be set -explicitly by assigning the name of the format to the $~ variable. +explicitly by assigning the name of the format to the C<$~> variable. Top of form processing is handled automatically: if there is insufficient room on the current page for the formatted record, the @@ -3327,9 +3916,9 @@ page is advanced by writing a form feed, a special top-of-page format is used to format the new page header, and then the record is written. By default the top-of-page format is the name of the filehandle with "_TOP" appended, but it may be dynamically set to the format of your -choice by assigning the name to the $^ variable while the filehandle is +choice by assigning the name to the C<$^> variable while the filehandle is selected. The number of lines remaining on the current page is in -variable $-, which can be set to 0 to force a new page. +variable C<$->, which can be set to 0 to force a new page. If FILEHANDLE is unspecified, output goes to the current default output channel, which starts out as STDOUT but may be changed by the @@ -3341,6 +3930,6 @@ Note that write is I<NOT> the opposite of read. Unfortunately. =item y/// -The translation operator. See L<perlop>. +The translation operator. Same as tr///. See L<perlop>. =back diff --git a/pod/perlguts.pod b/pod/perlguts.pod index 07509bcc04..2eb5229060 100644 --- a/pod/perlguts.pod +++ b/pod/perlguts.pod @@ -8,7 +8,9 @@ This document attempts to describe some of the internal functions of the Perl executable. It is far from complete and probably contains many errors. Please refer any questions or comments to the author below. -=head1 Datatypes +=head1 Variables + +=head2 Datatypes Perl has three typedefs that handle Perl's three main data types: @@ -20,31 +22,33 @@ Each typedef has specific routines that manipulate the various data types. =head2 What is an "IV"? -Perl uses a special typedef IV which is large enough to hold either an -integer or a pointer. +Perl uses a special typedef IV which is a simple integer type that is +guaranteed to be large enough to hold a pointer (as well as an integer). Perl also uses two special typedefs, I32 and I16, which will always be at least 32-bits and 16-bits long, respectively. -=head2 Working with SV's +=head2 Working with SVs An SV can be created and loaded with one command. There are four types of values that can be loaded: an integer value (IV), a double (NV), a string, (PV), and another scalar (SV). -The four routines are: +The five routines are: SV* newSViv(IV); SV* newSVnv(double); SV* newSVpv(char*, int); + SV* newSVpvf(const char*, ...); SV* newSVsv(SV*); -To change the value of an *already-existing* SV, there are five routines: +To change the value of an *already-existing* SV, there are six routines: void sv_setiv(SV*, IV); void sv_setnv(SV*, double); - void sv_setpvn(SV*, char*, int) void sv_setpv(SV*, char*); + void sv_setpvn(SV*, char*, int) + void sv_setpvf(SV*, const char*, ...); void sv_setsv(SV*, SV*); Notice that you can choose to specify the length of the string to be @@ -52,7 +56,16 @@ assigned by using C<sv_setpvn> or C<newSVpv>, or you may allow Perl to calculate the length by using C<sv_setpv> or by specifying 0 as the second argument to C<newSVpv>. Be warned, though, that Perl will determine the string's length by using C<strlen>, which depends on the string terminating -with a NUL character. +with a NUL character. The arguments of C<sv_setpvf> are processed like +C<sprintf>, and the formatted output becomes the value. + +All SVs that will contain strings should, but need not, be terminated +with a NUL character. If it is not NUL-terminated there is a risk of +core dumps and corruptions from code which passes the string to C +functions or system calls which expect a NUL-terminated string. +Perl's own functions typically add a trailing NUL for this reason. +Nevertheless, you should be very careful when you pass a string stored +in an SV to a C function or system call. To access the actual value that an SV points to, you can use the macros: @@ -67,9 +80,9 @@ In the C<SvPV> macro, the length of the string returned is placed into the variable C<len> (this is a macro, so you do I<not> use C<&len>). If you do not care what the length of the data is, use the global variable C<na>. Remember, however, that Perl allows arbitrary strings of data that may both contain -NUL's and not be terminated by a NUL. +NULs and might not be terminated by a NUL. -If you simply want to know if the scalar value is TRUE, you can use: +If you want to know if the scalar value is TRUE, you can use: SvTRUE(SV*) @@ -80,7 +93,9 @@ Perl to allocate more memory for your SV, you can use the macro which will determine if more memory needs to be allocated. If so, it will call the function C<sv_grow>. Note that C<SvGROW> can only increase, not -decrease, the allocated memory of an SV. +decrease, the allocated memory of an SV and that it does not automatically +add a byte for the a trailing NUL (perl's own string functions typically do +C<SvGROW(sv, len + 1)>). If you have an SV and want to know what kind of data Perl thinks is stored in it, you can use the following macros to check the type of SV you have. @@ -107,18 +122,20 @@ you can use the following functions: void sv_catpv(SV*, char*); void sv_catpvn(SV*, char*, int); + void sv_catpvf(SV*, const char*, ...); void sv_catsv(SV*, SV*); The first function calculates the length of the string to be appended by using C<strlen>. In the second, you specify the length of the string -yourself. The third function extends the string stored in the first SV -with the string stored in the second SV. It also forces the second SV to -be interpreted as a string. +yourself. The third function processes its arguments like C<sprintf> and +appends the formatted output. The fourth function extends the string +stored in the first SV with the string stored in the second SV. It also +forces the second SV to be interpreted as a string. If you know the name of a scalar variable, you can get a pointer to its SV by using the following: - SV* perl_get_sv("varname", FALSE); + SV* perl_get_sv("package::varname", FALSE); This returns NULL if the variable does not exist. @@ -146,16 +163,16 @@ Take this code: This code tries to return a new SV (which contains the value 42) if it should return a real value, or undef otherwise. Instead it has returned a null pointer which, somewhere down the line, will cause a segmentation violation, -or just weird results. Change the zero to C<&sv_undef> in the first line and -all will be well. +bus error, or just weird results. Change the zero to C<&sv_undef> in the first +line and all will be well. To free an SV that you've created, call C<SvREFCNT_dec(SV*)>. Normally this -call is not necessary. See the section on B<MORTALITY>. +call is not necessary (see L<Reference Counts and Mortality>). =head2 What's Really Stored in an SV? Recall that the usual method of determining the type of scalar you have is -to use C<Sv*OK> macros. Since a scalar can be both a number and a string, +to use C<Sv*OK> macros. Because a scalar can be both a number and a string, usually these macros will always return TRUE and calling the C<Sv*V> macros will do the appropriate conversion of string to integer/double or integer/double to string. @@ -170,23 +187,23 @@ pointer in an SV, you can use the following three macros instead: These will tell you if you truly have an integer, double, or string pointer stored in your SV. The "p" stands for private. -In general, though, it's best to just use the C<Sv*V> macros. +In general, though, it's best to use the C<Sv*V> macros. -=head2 Working with AV's +=head2 Working with AVs -There are two ways to create and load an AV. The first method just creates -an empty AV: +There are two ways to create and load an AV. The first method creates an +empty AV: AV* newAV(); -The second method both creates the AV and initially populates it with SV's: +The second method both creates the AV and initially populates it with SVs: AV* av_make(I32 num, SV **ptr); The second argument points to an array containing C<num> C<SV*>'s. Once the -AV has been created, the SV's can be destroyed, if so desired. +AV has been created, the SVs can be destroyed, if so desired. -Once the AV has been created, the following operations are possible on AV's: +Once the AV has been created, the following operations are possible on AVs: void av_push(AV*, SV*); SV* av_pop(AV*); @@ -200,63 +217,77 @@ to these new elements. Here are some other functions: - I32 av_len(AV*); /* Returns highest index value in array */ - + I32 av_len(AV*); SV** av_fetch(AV*, I32 key, I32 lval); - /* Fetches value at key offset, but it stores an undef value - at the offset if lval is non-zero */ SV** av_store(AV*, I32 key, SV* val); - /* Stores val at offset key */ -Take note that C<av_fetch> and C<av_store> return C<SV**>'s, not C<SV*>'s. +The C<av_len> function returns the highest index value in array (just +like $#array in Perl). If the array is empty, -1 is returned. The +C<av_fetch> function returns the value at index C<key>, but if C<lval> +is non-zero, then C<av_fetch> will store an undef value at that index. +The C<av_store> function stores the value C<val> at index C<key>. +note that C<av_fetch> and C<av_store> both return C<SV**>'s, not C<SV*>'s +as their return value. void av_clear(AV*); - /* Clear out all elements, but leave the array */ void av_undef(AV*); - /* Undefines the array, removing all elements */ void av_extend(AV*, I32 key); - /* Extend the array to a total of key elements */ + +The C<av_clear> function deletes all the elements in the AV* array, but +does not actually delete the array itself. The C<av_undef> function will +delete all the elements in the array plus the array itself. The +C<av_extend> function extends the array so that it contains C<key> +elements. If C<key> is less than the current length of the array, then +nothing is done. If you know the name of an array variable, you can get a pointer to its AV by using the following: - AV* perl_get_av("varname", FALSE); + AV* perl_get_av("package::varname", FALSE); This returns NULL if the variable does not exist. -=head2 Working with HV's +=head2 Working with HVs To create an HV, you use the following routine: HV* newHV(); -Once the HV has been created, the following operations are possible on HV's: +Once the HV has been created, the following operations are possible on HVs: SV** hv_store(HV*, char* key, U32 klen, SV* val, U32 hash); SV** hv_fetch(HV*, char* key, U32 klen, I32 lval); -The C<klen> parameter is the length of the key being passed in. The C<val> -argument contains the SV pointer to the scalar being stored, and C<hash> is -the pre-computed hash value (zero if you want C<hv_store> to calculate it -for you). The C<lval> parameter indicates whether this fetch is actually a -part of a store operation. +The C<klen> parameter is the length of the key being passed in (Note that +you cannot pass 0 in as a value of C<klen> to tell Perl to measure the +length of the key). The C<val> argument contains the SV pointer to the +scalar being stored, and C<hash> is the precomputed hash value (zero if +you want C<hv_store> to calculate it for you). The C<lval> parameter +indicates whether this fetch is actually a part of a store operation, in +which case a new undefined value will be added to the HV with the supplied +key and C<hv_fetch> will return as if the value had already existed. Remember that C<hv_store> and C<hv_fetch> return C<SV**>'s and not just -C<SV*>. In order to access the scalar value, you must first dereference -the return value. However, you should check to make sure that the return -value is not NULL before dereferencing it. +C<SV*>. To access the scalar value, you must first dereference the return +value. However, you should check to make sure that the return value is +not NULL before dereferencing it. These two functions check if a hash table entry exists, and deletes it. bool hv_exists(HV*, char* key, U32 klen); SV* hv_delete(HV*, char* key, U32 klen, I32 flags); +If C<flags> does not include the C<G_DISCARD> flag then C<hv_delete> will +create and return a mortal copy of the deleted value. + And more miscellaneous functions: void hv_clear(HV*); - /* Clears all entries in hash table */ void hv_undef(HV*); - /* Undefines the hash table */ + +Like their AV counterparts, C<hv_clear> deletes all the entries in the hash +table but does not actually delete the hash table. The C<hv_undef> deletes +both the entries and the hash table itself. Perl keeps the actual data in linked list of structures with a typedef of HE. These contain the actual key and value pointers (plus extra administrative @@ -284,11 +315,11 @@ specified below. If you know the name of a hash variable, you can get a pointer to its HV by using the following: - HV* perl_get_hv("varname", FALSE); + HV* perl_get_hv("package::varname", FALSE); This returns NULL if the variable does not exist. -The hash algorithm, for those who are interested, is: +The hash algorithm is defined in the C<PERL_HASH(hash, key, klen)> macro: i = klen; hash = 0; @@ -296,18 +327,65 @@ The hash algorithm, for those who are interested, is: while (i--) hash = hash * 33 + *s++; +=head2 Hash API Extensions + +Beginning with version 5.004, the following functions are also supported: + + HE* hv_fetch_ent (HV* tb, SV* key, I32 lval, U32 hash); + HE* hv_store_ent (HV* tb, SV* key, SV* val, U32 hash); + + bool hv_exists_ent (HV* tb, SV* key, U32 hash); + SV* hv_delete_ent (HV* tb, SV* key, I32 flags, U32 hash); + + SV* hv_iterkeysv (HE* entry); + +Note that these functions take C<SV*> keys, which simplifies writing +of extension code that deals with hash structures. These functions +also allow passing of C<SV*> keys to C<tie> functions without forcing +you to stringify the keys (unlike the previous set of functions). + +They also return and accept whole hash entries (C<HE*>), making their +use more efficient (since the hash number for a particular string +doesn't have to be recomputed every time). See L<API LISTING> later in +this document for detailed descriptions. + +The following macros must always be used to access the contents of hash +entries. Note that the arguments to these macros must be simple +variables, since they may get evaluated more than once. See +L<API LISTING> later in this document for detailed descriptions of these +macros. + + HePV(HE* he, STRLEN len) + HeVAL(HE* he) + HeHASH(HE* he) + HeSVKEY(HE* he) + HeSVKEY_force(HE* he) + HeSVKEY_set(HE* he, SV* sv) + +These two lower level macros are defined, but must only be used when +dealing with keys that are not C<SV*>s: + + HeKEY(HE* he) + HeKLEN(HE* he) + + =head2 References References are a special type of scalar that point to other data types (including references). -To create a reference, use the following command: +To create a reference, use either of the following functions: - SV* newRV((SV*) thing); + SV* newRV_inc((SV*) thing); + SV* newRV_noinc((SV*) thing); -The C<thing> argument can be any of an C<SV*>, C<AV*>, or C<HV*>. Once -you have a reference, you can use the following macro to dereference the -reference: +The C<thing> argument can be any of an C<SV*>, C<AV*>, or C<HV*>. The +functions are identical except that C<newRV_inc> increments the reference +count of the C<thing>, while C<newRV_noinc> does not. For historical +reasons, C<newRV> is a synonym for C<newRV_inc>. + +Once you have a reference, you can use the following macro to dereference +the reference: SvRV(SV*) @@ -318,8 +396,8 @@ To determine if an SV is a reference, you can use the following macro: SvROK(SV*) -To actually discover what the reference refers to, you must use the following -macro and then check the value returned. +To discover what type of value the reference refers to, use the following +macro and then check the return value. SvTYPE(SvRV(SV*)) @@ -328,10 +406,14 @@ The most useful types that will be returned are: SVt_IV Scalar SVt_NV Scalar SVt_PV Scalar + SVt_RV Scalar SVt_PVAV Array SVt_PVHV Hash SVt_PVCV Code - SVt_PVMG Blessed Scalar + SVt_PVGV Glob (possible a file handle) + SVt_PVMG Blessed or Magical Scalar + + See the sv.h header file for more details. =head2 Blessed References and Class Objects @@ -345,134 +427,113 @@ A reference can be blessed into a package with the following function: SV* sv_bless(SV* sv, HV* stash); The C<sv> argument must be a reference. The C<stash> argument specifies -which class the reference will belong to. See the section on L<Stashes> -for information on converting class names into stashes. +which class the reference will belong to. See +L<Stashes and Globs> for information on converting class names into stashes. /* Still under construction */ Upgrades rv to reference if not already one. Creates new SV for rv to -point to. -If classname is non-null, the SV is blessed into the specified class. -SV is returned. +point to. If C<classname> is non-null, the SV is blessed into the specified +class. SV is returned. SV* newSVrv(SV* rv, char* classname); -Copies integer or double into an SV whose reference is rv. SV is blessed -if classname is non-null. +Copies integer or double into an SV whose reference is C<rv>. SV is blessed +if C<classname> is non-null. SV* sv_setref_iv(SV* rv, char* classname, IV iv); SV* sv_setref_nv(SV* rv, char* classname, NV iv); -Copies pointer (I<not a string!>) into an SV whose reference is rv. -SV is blessed if classname is non-null. +Copies the pointer value (I<the address, not the string!>) into an SV whose +reference is rv. SV is blessed if C<classname> is non-null. SV* sv_setref_pv(SV* rv, char* classname, PV iv); -Copies string into an SV whose reference is rv. -Set length to 0 to let Perl calculate the string length. -SV is blessed if classname is non-null. +Copies string into an SV whose reference is C<rv>. Set length to 0 to let +Perl calculate the string length. SV is blessed if C<classname> is non-null. SV* sv_setref_pvn(SV* rv, char* classname, PV iv, int length); int sv_isa(SV* sv, char* name); int sv_isobject(SV* sv); -=head1 Creating New Variables +=head2 Creating New Variables -To create a new Perl variable, which can be accessed from your Perl script, -use the following routines, depending on the variable type. +To create a new Perl variable with an undef value which can be accessed from +your Perl script, use the following routines, depending on the variable type. - SV* perl_get_sv("varname", TRUE); - AV* perl_get_av("varname", TRUE); - HV* perl_get_hv("varname", TRUE); + SV* perl_get_sv("package::varname", TRUE); + AV* perl_get_av("package::varname", TRUE); + HV* perl_get_hv("package::varname", TRUE); Notice the use of TRUE as the second parameter. The new variable can now be set, using the routines appropriate to the data type. -There are additional bits that may be OR'ed with the TRUE argument to enable -certain extra features. Those bits are: - - 0x02 Marks the variable as multiply defined, thus preventing the - "Indentifier <varname> used only once: possible typo" warning. - 0x04 Issues a "Had to create <varname> unexpectedly" warning if - the variable didn't actually exist. This is useful if - you expected the variable to already exist and want to propagate - this warning back to the user. - -If the C<varname> argument does not contain a package specifier, it is -created in the current package. - -=head1 XSUB's and the Argument Stack - -The XSUB mechanism is a simple way for Perl programs to access C subroutines. -An XSUB routine will have a stack that contains the arguments from the Perl -program, and a way to map from the Perl data structures to a C equivalent. - -The stack arguments are accessible through the C<ST(n)> macro, which returns -the C<n>'th stack argument. Argument 0 is the first argument passed in the -Perl subroutine call. These arguments are C<SV*>, and can be used anywhere -an C<SV*> is used. - -Most of the time, output from the C routine can be handled through use of -the RETVAL and OUTPUT directives. However, there are some cases where the -argument stack is not already long enough to handle all the return values. -An example is the POSIX tzname() call, which takes no arguments, but returns -two, the local timezone's standard and summer time abbreviations. - -To handle this situation, the PPCODE directive is used and the stack is -extended using the macro: - - EXTEND(sp, num); - -where C<sp> is the stack pointer, and C<num> is the number of elements the -stack should be extended by. - -Now that there is room on the stack, values can be pushed on it using the -macros to push IV's, doubles, strings, and SV pointers respectively: - - PUSHi(IV) - PUSHn(double) - PUSHp(char*, I32) - PUSHs(SV*) - -And now the Perl program calling C<tzname>, the two values will be assigned -as in: - - ($standard_abbrev, $summer_abbrev) = POSIX::tzname; - -An alternate (and possibly simpler) method to pushing values on the stack is -to use the macros: - - XPUSHi(IV) - XPUSHn(double) - XPUSHp(char*, I32) - XPUSHs(SV*) - -These macros automatically adjust the stack for you, if needed. - -For more information, consult L<perlxs>. - -=head1 Mortality - -In Perl, values are normally "immortal" -- that is, they are not freed unless -explicitly done so (via the Perl C<undef> call or other routines in Perl -itself). - -Add cruft about reference counts. - int SvREFCNT(SV* sv); - void SvREFCNT_inc(SV* sv); - void SvREFCNT_dec(SV* sv); - -In the above example with C<tzname>, we needed to create two new SV's to push -onto the argument stack, that being the two strings. However, we don't want -these new SV's to stick around forever because they will eventually be -copied into the SV's that hold the two scalar variables. - -An SV (or AV or HV) that is "mortal" acts in all ways as a normal "immortal" -SV, AV, or HV, but is only valid in the "current context". When the Perl -interpreter leaves the current context, the mortal SV, AV, or HV is -automatically freed. Generally the "current context" means a single -Perl statement. +There are additional macros whose values may be bitwise OR'ed with the +C<TRUE> argument to enable certain extra features. Those bits are: + + GV_ADDMULTI Marks the variable as multiply defined, thus preventing the + "Name <varname> used only once: possible typo" warning. + GV_ADDWARN Issues the warning "Had to create <varname> unexpectedly" if + the variable did not exist before the function was called. + +If you do not specify a package name, the variable is created in the current +package. + +=head2 Reference Counts and Mortality + +Perl uses an reference count-driven garbage collection mechanism. SVs, +AVs, or HVs (xV for short in the following) start their life with a +reference count of 1. If the reference count of an xV ever drops to 0, +then it will be destroyed and its memory made available for reuse. + +This normally doesn't happen at the Perl level unless a variable is +undef'ed or the last variable holding a reference to it is changed or +overwritten. At the internal level, however, reference counts can be +manipulated with the following macros: + + int SvREFCNT(SV* sv); + SV* SvREFCNT_inc(SV* sv); + void SvREFCNT_dec(SV* sv); + +However, there is one other function which manipulates the reference +count of its argument. The C<newRV_inc> function, you will recall, +creates a reference to the specified argument. As a side effect, +it increments the argument's reference count. If this is not what +you want, use C<newRV_noinc> instead. + +For example, imagine you want to return a reference from an XSUB function. +Inside the XSUB routine, you create an SV which initially has a reference +count of one. Then you call C<newRV_inc>, passing it the just-created SV. +This returns the reference as a new SV, but the reference count of the +SV you passed to C<newRV_inc> has been incremented to two. Now you +return the reference from the XSUB routine and forget about the SV. +But Perl hasn't! Whenever the returned reference is destroyed, the +reference count of the original SV is decreased to one and nothing happens. +The SV will hang around without any way to access it until Perl itself +terminates. This is a memory leak. + +The correct procedure, then, is to use C<newRV_noinc> instead of +C<newRV_inc>. Then, if and when the last reference is destroyed, +the reference count of the SV will go to zero and it will be destroyed, +stopping any memory leak. + +There are some convenience functions available that can help with the +destruction of xVs. These functions introduce the concept of "mortality". +An xV that is mortal has had its reference count marked to be decremented, +but not actually decremented, until "a short time later". Generally the +term "short time later" means a single Perl statement, such as a call to +an XSUB function. The actual determinant for when mortal xVs have their +reference count decremented depends on two macros, SAVETMPS and FREETMPS. +See L<perlcall> and L<perlxs> for more details on these macros. + +"Mortalization" then is at its simplest a deferred C<SvREFCNT_dec>. +However, if you mortalize a variable twice, the reference count will +later be decremented twice. + +You should be careful about creating mortal variables. Strange things +can happen if you make the same value mortal within multiple contexts, +or if you make a variable mortal multiple times. To create a mortal variable, use the functions: @@ -480,34 +541,22 @@ To create a mortal variable, use the functions: SV* sv_2mortal(SV*) SV* sv_mortalcopy(SV*) -The first call creates a mortal SV, the second converts an existing SV to -a mortal SV, the third creates a mortal copy of an existing SV. +The first call creates a mortal SV, the second converts an existing +SV to a mortal SV (and thus defers a call to C<SvREFCNT_dec>), and the +third creates a mortal copy of an existing SV. -The mortal routines are not just for SV's -- AV's and HV's can be made mortal -by passing their address (and casting them to C<SV*>) to the C<sv_2mortal> or -C<sv_mortalcopy> routines. +The mortal routines are not just for SVs -- AVs and HVs can be +made mortal by passing their address (type-casted to C<SV*>) to the +C<sv_2mortal> or C<sv_mortalcopy> routines. ->From Ilya: -Beware that the sv_2mortal() call is eventually equivalent to -svREFCNT_dec(). A value can happily be mortal in two different contexts, -and it will be svREFCNT_dec()ed twice, once on exit from these -contexts. It can also be mortal twice in the same context. This means -that you should be very careful to make a value mortal exactly as many -times as it is needed. The value that go to the Perl stack I<should> -be mortal. +=head2 Stashes and Globs -You should be careful about creating mortal variables. It is possible for -strange things to happen should you make the same value mortal within -multiple contexts. - -=head1 Stashes - -A stash is a hash table (associative array) that contains all of the -different objects that are contained within a package. Each key of the -stash is a symbol name (shared by all the different types of objects -that have the same name), and each value in the hash table is called a -GV (for Glob Value). This GV in turn contains references to the various -objects of that name, including (but not limited to) the following: +A "stash" is a hash that contains all of the different objects that +are contained within a package. Each key of the stash is a symbol +name (shared by all the different types of objects that have the same +name), and each value in the hash table is a GV (Glob Value). This GV +in turn contains references to the various objects of that name, +including (but not limited to) the following: Scalar Value Array Value @@ -517,11 +566,11 @@ objects of that name, including (but not limited to) the following: Format Subroutine -Perl stores various stashes in a separate GV structure (for global -variable) but represents them with an HV structure. The keys in this -larger GV are the various package names; the values are the C<GV*>'s -which are stashes. It may help to think of a stash purely as an HV, -and that the term "GV" means the global variable hash. +There is a single stash called "defstash" that holds the items that exist +in the "main" package. To get at the items in other packages, append the +string "::" to the package name. The items in the "Foo" package are in +the stash "Foo::" in defstash. The items in the "Bar::Baz" package are +in the stash "Baz::" in "Bar::"'s stash. To get the stash pointer for a particular package, use the function: @@ -546,8 +595,8 @@ then use the following to get the package name itself: char* HvNAME(HV* stash); -If you need to return a blessed value to your Perl script, you can use the -following function: +If you need to bless or re-bless an object you can use the following +function: SV* sv_bless(SV*, HV* stash) @@ -557,13 +606,51 @@ as any other SV. For more information on references and blessings, consult L<perlref>. -=head1 Magic +=head2 Double-Typed SVs + +Scalar variables normally contain only one type of value, an integer, +double, pointer, or reference. Perl will automatically convert the +actual scalar data from the stored type into the requested type. + +Some scalar variables contain more than one type of scalar data. For +example, the variable C<$!> contains either the numeric value of C<errno> +or its string equivalent from either C<strerror> or C<sys_errlist[]>. + +To force multiple data values into an SV, you must do two things: use the +C<sv_set*v> routines to add the additional scalar type, then set a flag +so that Perl will believe it contains more than one type of data. The +four macros to set the flags are: + + SvIOK_on + SvNOK_on + SvPOK_on + SvROK_on + +The particular macro you must use depends on which C<sv_set*v> routine +you called first. This is because every C<sv_set*v> routine turns on +only the bit for the particular type of data being set, and turns off +all the rest. + +For example, to create a new Perl variable called "dberror" that contains +both the numeric and descriptive string error values, you could use the +following code: + + extern int dberror; + extern char *dberror_list; + + SV* sv = perl_get_sv("dberror", TRUE); + sv_setiv(sv, (IV) dberror); + sv_setpv(sv, dberror_list[dberror]); + SvIOK_on(sv); + +If the order of C<sv_setiv> and C<sv_setpv> had been reversed, then the +macro C<SvPOK_on> would need to be called instead of C<SvIOK_on>. + +=head2 Magic Variables [This section still under construction. Ignore everything here. Post no bills. Everything not permitted is forbidden.] -# Version 6, 1995/1/27 - Any SV may be magical, that is, it has special features that a normal SV does not have. These features are stored in the SV structure in a linked list of C<struct magic>'s, typedef'ed to C<MAGIC>. @@ -594,12 +681,12 @@ If C<sv> is not already magical, Perl uses the C<SvUPGRADE> macro to set the C<SVt_PVMG> flag for the C<sv>. Perl then continues by adding it to the beginning of the linked list of magical features. Any prior entry of the same type of magic is deleted. Note that this can be -overriden, and multiple instances of the same type of magic can be +overridden, and multiple instances of the same type of magic can be associated with an SV. -The C<name> and C<namlem> arguments are used to associate a string with -the magic, typically the name of a variable. C<namlem> is stored in the -C<mg_len> field and if C<name> is non-null and C<namlem> >= 0 a malloc'd +The C<name> and C<namlen> arguments are used to associate a string with +the magic, typically the name of a variable. C<namlen> is stored in the +C<mg_len> field and if C<name> is non-null and C<namlen> >= 0 a malloc'd copy of the name is stored in C<mg_ptr> field. The sv_magic function uses C<how> to determine which, if any, predefined @@ -665,8 +752,8 @@ the various routines for the various magical types begin with C<magic_>. The current kinds of Magic Virtual Tables are: - mg_type MGVTBL Type of magicalness - ------- ------ ------------------- + mg_type MGVTBL Type of magical + ------- ------ ---------------------------- \0 vtbl_sv Regexp??? A vtbl_amagic Operator Overloading a vtbl_amagicelem Operator Overloading @@ -679,6 +766,7 @@ The current kinds of Magic Virtual Tables are: i vtbl_isaelem @ISA array element L 0 (but sets RMAGICAL) Perl Module/Debugger??? l vtbl_dbline Debugger? + o vtbl_collxfrm Locale transformation P vtbl_pack Tied Array or Hash p vtbl_packelem Tied Array or Hash element q vtbl_packelem Tied Scalar or Handle @@ -688,71 +776,97 @@ The current kinds of Magic Virtual Tables are: U vtbl_uvar ??? v vtbl_vec Vector x vtbl_substr Substring??? + y vtbl_itervar Shadow "foreach" iterator variable * vtbl_glob GV??? # vtbl_arylen Array Length . vtbl_pos $. scalar variable - ~ Reserved for extensions, but multiple extensions may clash + ~ None Used by certain extensions -When an upper-case and lower-case letter both exist in the table, then the -upper-case letter is used to represent some kind of composite type (a list -or a hash), and the lower-case letter is used to represent an element of +When an uppercase and lowercase letter both exist in the table, then the +uppercase letter is used to represent some kind of composite type (a list +or a hash), and the lowercase letter is used to represent an element of that composite type. +The '~' magic type is defined specifically for use by extensions and +will not be used by perl itself. Extensions can use ~ magic to 'attach' +private information to variables (typically objects). This is especially +useful because there is no way for normal perl code to corrupt this +private information (unlike using extra elements of a hash object). + +Note that because multiple extensions may be using ~ magic it is +important for extensions to take extra care with it. Typically only +using it on objects blessed into the same class as the extension +is sufficient. It may also be appropriate to add an I32 'signature' +at the top of the private data area and check that. + =head2 Finding Magic MAGIC* mg_find(SV*, int type); /* Finds the magic pointer of that type */ This routine returns a pointer to the C<MAGIC> structure stored in the SV. If the SV does not have that magical feature, C<NULL> is returned. Also, -if the SV is not of type SVt_PVMG, Perl may core-dump. +if the SV is not of type SVt_PVMG, Perl may core dump. int mg_copy(SV* sv, SV* nsv, char* key, STRLEN klen); This routine checks to see what types of magic C<sv> has. If the mg_type -field is an upper-case letter, then the mg_obj is copied to C<nsv>, but -the mg_type field is changed to be the lower-case letter. +field is an uppercase letter, then the mg_obj is copied to C<nsv>, but +the mg_type field is changed to be the lowercase letter. -=head1 Double-Typed SV's +=head1 Subroutines -Scalar variables normally contain only one type of value, an integer, -double, pointer, or reference. Perl will automatically convert the -actual scalar data from the stored type into the requested type. +=head2 XSUBs and the Argument Stack -Some scalar variables contain more than one type of scalar data. For -example, the variable C<$!> contains either the numeric value of C<errno> -or its string equivalent from either C<strerror> or C<sys_errlist[]>. +The XSUB mechanism is a simple way for Perl programs to access C subroutines. +An XSUB routine will have a stack that contains the arguments from the Perl +program, and a way to map from the Perl data structures to a C equivalent. -To force multiple data values into an SV, you must do two things: use the -C<sv_set*v> routines to add the additional scalar type, then set a flag -so that Perl will believe it contains more than one type of data. The -four macros to set the flags are: +The stack arguments are accessible through the C<ST(n)> macro, which returns +the C<n>'th stack argument. Argument 0 is the first argument passed in the +Perl subroutine call. These arguments are C<SV*>, and can be used anywhere +an C<SV*> is used. - SvIOK_on - SvNOK_on - SvPOK_on - SvROK_on +Most of the time, output from the C routine can be handled through use of +the RETVAL and OUTPUT directives. However, there are some cases where the +argument stack is not already long enough to handle all the return values. +An example is the POSIX tzname() call, which takes no arguments, but returns +two, the local time zone's standard and summer time abbreviations. -The particular macro you must use depends on which C<sv_set*v> routine -you called first. This is because every C<sv_set*v> routine turns on -only the bit for the particular type of data being set, and turns off -all the rest. +To handle this situation, the PPCODE directive is used and the stack is +extended using the macro: -For example, to create a new Perl variable called "dberror" that contains -both the numeric and descriptive string error values, you could use the -following code: + EXTEND(sp, num); - extern int dberror; - extern char *dberror_list; +where C<sp> is the stack pointer, and C<num> is the number of elements the +stack should be extended by. - SV* sv = perl_get_sv("dberror", TRUE); - sv_setiv(sv, (IV) dberror); - sv_setpv(sv, dberror_list[dberror]); - SvIOK_on(sv); +Now that there is room on the stack, values can be pushed on it using the +macros to push IVs, doubles, strings, and SV pointers respectively: -If the order of C<sv_setiv> and C<sv_setpv> had been reversed, then the -macro C<SvPOK_on> would need to be called instead of C<SvIOK_on>. + PUSHi(IV) + PUSHn(double) + PUSHp(char*, I32) + PUSHs(SV*) + +And now the Perl program calling C<tzname>, the two values will be assigned +as in: + + ($standard_abbrev, $summer_abbrev) = POSIX::tzname; + +An alternate (and possibly simpler) method to pushing values on the stack is +to use the macros: -=head1 Calling Perl Routines from within C Programs + XPUSHi(IV) + XPUSHn(double) + XPUSHp(char*, I32) + XPUSHs(SV*) + +These macros automatically adjust the stack for you, if needed. Thus, you +do not need to call C<EXTEND> to extend the stack. + +For more information, consult L<perlxs> and L<perlxstut>. + +=head2 Calling Perl Routines from within C Programs There are four routines that can be used to call a Perl subroutine from within a C program. These four are: @@ -787,26 +901,30 @@ functions: XPUSH*() POP*() -For more information, consult L<perlcall>. +For a detailed description of calling conventions from C to Perl, +consult L<perlcall>. -=head1 Memory Allocation +=head2 Memory Allocation -It is strongly suggested that you use the version of malloc that is distributed -with Perl. It keeps pools of various sizes of unallocated memory in order to -more quickly satisfy allocation requests. -However, on some platforms, it may cause spurious malloc or free errors. +It is suggested that you use the version of malloc that is distributed +with Perl. It keeps pools of various sizes of unallocated memory in +order to satisfy allocation requests more quickly. However, on some +platforms, it may cause spurious malloc or free errors. New(x, pointer, number, type); Newc(x, pointer, number, type, cast); Newz(x, pointer, number, type); -These three macros are used to initially allocate memory. The first argument -C<x> was a "magic cookie" that was used to keep track of who called the macro, -to help when debugging memory problems. However, the current code makes no -use of this feature (Larry has switched to using a run-time memory checker), -so this argument can be any number. +These three macros are used to initially allocate memory. + +The first argument C<x> was a "magic cookie" that was used to keep track +of who called the macro, to help when debugging memory problems. However, +the current code makes no use of this feature (most Perl developers now +use run-time memory checkers), so this argument can be any number. + +The second argument C<pointer> should be the name of a variable that will +point to the newly allocated memory. -The second argument C<pointer> will point to the newly allocated memory. The third and fourth arguments C<number> and C<type> specify how many of the specified type of data structure should be allocated. The argument C<type> is passed to C<sizeof>. The final argument to C<Newc>, C<cast>, @@ -835,6 +953,212 @@ destination starting points. Perl will move, copy, or zero out C<number> instances of the size of the C<type> data structure (using the C<sizeof> function). +=head2 PerlIO + +The most recent development releases of Perl has been experimenting with +removing Perl's dependency on the "normal" standard I/O suite and allowing +other stdio implementations to be used. This involves creating a new +abstraction layer that then calls whichever implementation of stdio Perl +was compiled with. All XSUBs should now use the functions in the PerlIO +abstraction layer and not make any assumptions about what kind of stdio +is being used. + +For a complete description of the PerlIO abstraction, consult L<perlapio>. + +=head2 Putting a C value on Perl stack + +A lot of opcodes (this is an elementary operation in the internal perl +stack machine) put an SV* on the stack. However, as an optimization +the corresponding SV is (usually) not recreated each time. The opcodes +reuse specially assigned SVs (I<target>s) which are (as a corollary) +not constantly freed/created. + +Each of the targets is created only once (but see +L<Scratchpads and recursion> below), and when an opcode needs to put +an integer, a double, or a string on stack, it just sets the +corresponding parts of its I<target> and puts the I<target> on stack. + +The macro to put this target on stack is C<PUSHTARG>, and it is +directly used in some opcodes, as well as indirectly in zillions of +others, which use it via C<(X)PUSH[pni]>. + +=head2 Scratchpads + +The question remains on when the SVs which are I<target>s for opcodes +are created. The answer is that they are created when the current unit -- +a subroutine or a file (for opcodes for statements outside of +subroutines) -- is compiled. During this time a special anonymous Perl +array is created, which is called a scratchpad for the current +unit. + +A scratchpad keeps SVs which are lexicals for the current unit and are +targets for opcodes. One can deduce that an SV lives on a scratchpad +by looking on its flags: lexicals have C<SVs_PADMY> set, and +I<target>s have C<SVs_PADTMP> set. + +The correspondence between OPs and I<target>s is not 1-to-1. Different +OPs in the compile tree of the unit can use the same target, if this +would not conflict with the expected life of the temporary. + +=head2 Scratchpads and recursion + +In fact it is not 100% true that a compiled unit contains a pointer to +the scratchpad AV. In fact it contains a pointer to an AV of +(initially) one element, and this element is the scratchpad AV. Why do +we need an extra level of indirection? + +The answer is B<recursion>, and maybe (sometime soon) B<threads>. Both +these can create several execution pointers going into the same +subroutine. For the subroutine-child not write over the temporaries +for the subroutine-parent (lifespan of which covers the call to the +child), the parent and the child should have different +scratchpads. (I<And> the lexicals should be separate anyway!) + +So each subroutine is born with an array of scratchpads (of length 1). +On each entry to the subroutine it is checked that the current +depth of the recursion is not more than the length of this array, and +if it is, new scratchpad is created and pushed into the array. + +The I<target>s on this scratchpad are C<undef>s, but they are already +marked with correct flags. + +=head1 Compiled code + +=head2 Code tree + +Here we describe the internal form your code is converted to by +Perl. Start with a simple example: + + $a = $b + $c; + +This is converted to a tree similar to this one: + + assign-to + / \ + + $a + / \ + $b $c + +(but slightly more complicated). This tree reflect the way Perl +parsed your code, but has nothing to do with the execution order. +There is an additional "thread" going through the nodes of the tree +which shows the order of execution of the nodes. In our simplified +example above it looks like: + + $b ---> $c ---> + ---> $a ---> assign-to + +But with the actual compile tree for C<$a = $b + $c> it is different: +some nodes I<optimized away>. As a corollary, though the actual tree +contains more nodes than our simplified example, the execution order +is the same as in our example. + +=head2 Examining the tree + +If you have your perl compiled for debugging (usually done with C<-D +optimize=-g> on C<Configure> command line), you may examine the +compiled tree by specifying C<-Dx> on the Perl command line. The +output takes several lines per node, and for C<$b+$c> it looks like +this: + + 5 TYPE = add ===> 6 + TARG = 1 + FLAGS = (SCALAR,KIDS) + { + TYPE = null ===> (4) + (was rv2sv) + FLAGS = (SCALAR,KIDS) + { + 3 TYPE = gvsv ===> 4 + FLAGS = (SCALAR) + GV = main::b + } + } + { + TYPE = null ===> (5) + (was rv2sv) + FLAGS = (SCALAR,KIDS) + { + 4 TYPE = gvsv ===> 5 + FLAGS = (SCALAR) + GV = main::c + } + } + +This tree has 5 nodes (one per C<TYPE> specifier), only 3 of them are +not optimized away (one per number in the left column). The immediate +children of the given node correspond to C<{}> pairs on the same level +of indentation, thus this listing corresponds to the tree: + + add + / \ + null null + | | + gvsv gvsv + +The execution order is indicated by C<===E<gt>> marks, thus it is C<3 +4 5 6> (node C<6> is not included into above listing), i.e., +C<gvsv gvsv add whatever>. + +=head2 Compile pass 1: check routines + +The tree is created by the I<pseudo-compiler> while yacc code feeds it +the constructions it recognizes. Since yacc works bottom-up, so does +the first pass of perl compilation. + +What makes this pass interesting for perl developers is that some +optimization may be performed on this pass. This is optimization by +so-called I<check routines>. The correspondence between node names +and corresponding check routines is described in F<opcode.pl> (do not +forget to run C<make regen_headers> if you modify this file). + +A check routine is called when the node is fully constructed except +for the execution-order thread. Since at this time there is no +back-links to the currently constructed node, one can do most any +operation to the top-level node, including freeing it and/or creating +new nodes above/below it. + +The check routine returns the node which should be inserted into the +tree (if the top-level node was not modified, check routine returns +its argument). + +By convention, check routines have names C<ck_*>. They are usually +called from C<new*OP> subroutines (or C<convert>) (which in turn are +called from F<perly.y>). + +=head2 Compile pass 1a: constant folding + +Immediately after the check routine is called the returned node is +checked for being compile-time executable. If it is (the value is +judged to be constant) it is immediately executed, and a I<constant> +node with the "return value" of the corresponding subtree is +substituted instead. The subtree is deleted. + +If constant folding was not performed, the execution-order thread is +created. + +=head2 Compile pass 2: context propagation + +When a context for a part of compile tree is known, it is propagated +down through the tree. Aat this time the context can have 5 values +(instead of 2 for runtime context): void, boolean, scalar, list, and +lvalue. In contrast with the pass 1 this pass is processed from top +to bottom: a node's context determines the context for its children. + +Additional context-dependent optimizations are performed at this time. +Since at this moment the compile tree contains back-references (via +"thread" pointers), nodes cannot be free()d now. To allow +optimized-away nodes at this stage, such nodes are null()ified instead +of free()ing (i.e. their type is changed to OP_NULL). + +=head2 Compile pass 3: peephole optimization + +After the compile tree for a subroutine (or for an C<eval> or a file) +is created, an additional pass over the code is performed. This pass +is neither top-down or bottom-up, but in the execution order (with +additional compilications for conditionals). These optimizations are +done in the subroutine peep(). Optimizations performed at this stage +are subject to the same restrictions as in the pass 2. + =head1 API LISTING This is a listing of functions, macros, flags, and variables that may be @@ -849,7 +1173,8 @@ See C<av_len>. =item av_clear -Clears an array, making it empty. +Clears an array, making it empty. Does not free the memory used by the +array itself. void av_clear _((AV* ar)); @@ -876,8 +1201,9 @@ Returns the highest index in the array. Returns -1 if the array is empty. =item av_make -Creats a new AV and populates it with a list of SVs. The SVs are copied -into the array, so they may be freed after the call to av_make. +Creates a new AV and populates it with a list of SVs. The SVs are copied +into the array, so they may be freed after the call to av_make. The new AV +will have a reference count of 1. AV* av_make _((I32 size, SV** svp)); @@ -890,7 +1216,8 @@ empty. =item av_push -Pushes an SV onto the end of the array. +Pushes an SV onto the end of the array. The array will grow automatically +to accommodate the addition. void av_push _((AV* ar, SV* val)); @@ -910,26 +1237,29 @@ dereferenced to get the original C<SV*>. =item av_undef -Undefines the array. +Undefines the array. Frees the memory used by the array itself. void av_undef _((AV* ar)); =item av_unshift -Unshift an SV onto the beginning of the array. +Unshift the given number of C<undef> values onto the beginning of the +array. The array will grow automatically to accommodate the addition. +You must then use C<av_store> to assign values to these new elements. void av_unshift _((AV* ar, I32 num)); =item CLASS Variable which is setup by C<xsubpp> to indicate the class name for a C++ XS -constructor. This is always a C<char*>. See C<THIS> and L<perlxs>. +constructor. This is always a C<char*>. See C<THIS> and +L<perlxs/"Using XS With C++">. =item Copy The XSUB-writer's interface to the C C<memcpy> function. The C<s> is the source, C<d> is the destination, C<n> is the number of items, and C<t> is -the type. +the type. May fail on overlapping copies. See also C<Move>. (void) Copy( s, d, n, t ); @@ -948,27 +1278,40 @@ Returns the stash of the CV. When Perl is run in debugging mode, with the B<-d> switch, this SV is a boolean which indicates whether subs are being single-stepped. -Single-stepping is automatically turned on after every step. See C<DBsub>. +Single-stepping is automatically turned on after every step. This is the C +variable which corresponds to Perl's $DB::single variable. See C<DBsub>. =item DBsub When Perl is run in debugging mode, with the B<-d> switch, this GV contains -the SV which holds the name of the sub being debugged. See C<DBsingle>. +the SV which holds the name of the sub being debugged. This is the C +variable which corresponds to Perl's $DB::sub variable. See C<DBsingle>. The sub name can be found by SvPV( GvSV( DBsub ), na ) +=item DBtrace + +Trace variable used when Perl is run in debugging mode, with the B<-d> +switch. This is the C variable which corresponds to Perl's $DB::trace +variable. See C<DBsingle>. + =item dMARK -Declare a stack marker for the XSUB. See C<MARK> and C<dORIGMARK>. +Declare a stack marker variable, C<mark>, for the XSUB. See C<MARK> and +C<dORIGMARK>. =item dORIGMARK Saves the original stack mark for the XSUB. See C<ORIGMARK>. +=item dowarn + +The C variable which corresponds to Perl's $^W warning variable. + =item dSP -Declares a stack pointer for the XSUB. See C<SP>. +Declares a stack pointer variable, C<sp>, for the XSUB. See C<SP>. =item dXSARGS @@ -976,6 +1319,16 @@ Sets up stack and mark pointers for an XSUB, calling dSP and dMARK. This is usually handled automatically by C<xsubpp>. Declares the C<items> variable to indicate the number of items on the stack. +=item dXSI32 + +Sets up the C<ix> variable for an XSUB which has aliases. This is usually +handled automatically by C<xsubpp>. + +=item dXSI32 + +Sets up the C<ix> variable for an XSUB which has aliases. This is usually +handled automatically by C<xsubpp>. + =item ENTER Opening bracket on a callback. See C<LEAVE> and L<perlcall>. @@ -997,7 +1350,7 @@ L<perlcall>. =item G_ARRAY -Used to indicate array context. See C<GIMME> and L<perlcall>. +Used to indicate array context. See C<GIMME_V>, C<GIMME> and L<perlcall>. =item G_DISCARD @@ -1010,8 +1363,14 @@ Used to force a Perl C<eval> wrapper around a callback. See L<perlcall>. =item GIMME -The XSUB-writer's equivalent to Perl's C<wantarray>. Returns C<G_SCALAR> or -C<G_ARRAY> for scalar or array context. +A backward-compatible version of C<GIMME_V> which can only return +C<G_SCALAR> or C<G_ARRAY>; in a void context, it returns C<G_SCALAR>. + +=item GIMME_V + +The XSUB-writer's equivalent to Perl's C<wantarray>. Returns +C<G_VOID>, C<G_SCALAR> or C<G_ARRAY> for void, scalar or array +context, respectively. =item G_NOARGS @@ -1019,7 +1378,63 @@ Indicates that no arguments are being sent to a callback. See L<perlcall>. =item G_SCALAR -Used to indicate scalar context. See C<GIMME> and L<perlcall>. +Used to indicate scalar context. See C<GIMME_V>, C<GIMME>, and L<perlcall>. + +=item G_VOID + +Used to indicate void context. See C<GIMME_V> and L<perlcall>. + +=item gv_fetchmeth + +Returns the glob with the given C<name> and a defined subroutine or +C<NULL>. The glob lives in the given C<stash>, or in the stashes +accessable via @ISA and @<UNIVERSAL>. + +The argument C<level> should be either 0 or -1. If C<level==0>, as a +side-effect creates a glob with the given C<name> in the given +C<stash> which in the case of success contains an alias for the +subroutine, and sets up caching info for this glob. Similarly for all +the searched stashes. + +This function grants C<"SUPER"> token as a postfix of the stash name. + +The GV returned from C<gv_fetchmeth> may be a method cache entry, +which is not visible to Perl code. So when calling C<perl_call_sv>, +you should not use the GV directly; instead, you should use the +method's CV, which can be obtained from the GV with the C<GvCV> macro. + + GV* gv_fetchmeth _((HV* stash, char* name, STRLEN len, I32 level)); + +=item gv_fetchmethod + +=item gv_fetchmethod_autoload + +Returns the glob which contains the subroutine to call to invoke the +method on the C<stash>. In fact in the presense of autoloading this may +be the glob for "AUTOLOAD". In this case the corresponding variable +$AUTOLOAD is already setup. + +The third parameter of C<gv_fetchmethod_autoload> determines whether AUTOLOAD +lookup is performed if the given method is not present: non-zero means +yes, look for AUTOLOAD; zero means no, don't look for AUTOLOAD. Calling +C<gv_fetchmethod> is equivalent to calling C<gv_fetchmethod_autoload> with a +non-zero C<autoload> parameter. + +These functions grant C<"SUPER"> token as a prefix of the method name. + +Note that if you want to keep the returned glob for a long time, you +need to check for it being "AUTOLOAD", since at the later time the call +may load a different subroutine due to $AUTOLOAD changing its value. +Use the glob created via a side effect to do this. + +These functions have the same side-effects and as C<gv_fetchmeth> with +C<level==0>. C<name> should be writable if contains C<':'> or C<'\''>. +The warning against passing the GV returned by C<gv_fetchmeth> to +C<perl_call_sv> apply equally to these functions. + + GV* gv_fetchmethod _((HV* stash, char* name)); + GV* gv_fetchmethod_autoload _((HV* stash, char* name, + I32 autoload)); =item gv_stashpv @@ -1039,9 +1454,76 @@ Returns a pointer to the stash for a specified package. See C<gv_stashpv>. Return the SV from the GV. -=item he_free +=item HEf_SVKEY + +This flag, used in the length slot of hash entries and magic +structures, specifies the structure contains a C<SV*> pointer where a +C<char*> pointer is to be expected. (For information only--not to be used). + +=item HeHASH + +Returns the computed hash (type C<U32>) stored in the hash entry. + + HeHASH(HE* he) + +=item HeKEY + +Returns the actual pointer stored in the key slot of the hash entry. +The pointer may be either C<char*> or C<SV*>, depending on the value of +C<HeKLEN()>. Can be assigned to. The C<HePV()> or C<HeSVKEY()> macros +are usually preferable for finding the value of a key. + + HeKEY(HE* he) + +=item HeKLEN + +If this is negative, and amounts to C<HEf_SVKEY>, it indicates the entry +holds an C<SV*> key. Otherwise, holds the actual length of the key. +Can be assigned to. The C<HePV()> macro is usually preferable for finding +key lengths. + + HeKLEN(HE* he) + +=item HePV + +Returns the key slot of the hash entry as a C<char*> value, doing any +necessary dereferencing of possibly C<SV*> keys. The length of +the string is placed in C<len> (this is a macro, so do I<not> use +C<&len>). If you do not care about what the length of the key is, +you may use the global variable C<na>. Remember though, that hash +keys in perl are free to contain embedded nulls, so using C<strlen()> +or similar is not a good way to find the length of hash keys. +This is very similar to the C<SvPV()> macro described elsewhere in +this document. + + HePV(HE* he, STRLEN len) + +=item HeSVKEY + +Returns the key as an C<SV*>, or C<Nullsv> if the hash entry +does not contain an C<SV*> key. -Releases a hash entry from an iterator. See C<hv_iternext>. + HeSVKEY(HE* he) + +=item HeSVKEY_force + +Returns the key as an C<SV*>. Will create and return a temporary +mortal C<SV*> if the hash entry contains only a C<char*> key. + + HeSVKEY_force(HE* he) + +=item HeSVKEY_set + +Sets the key to a given C<SV*>, taking care to set the appropriate flags +to indicate the presence of an C<SV*> key, and returns the same C<SV*>. + + HeSVKEY_set(HE* he, SV* sv) + +=item HeVAL + +Returns the value slot (type C<SV*>) stored in the hash entry. + + HeVAL(HE* he) =item hv_clear @@ -1049,31 +1531,75 @@ Clears a hash, making it empty. void hv_clear _((HV* tb)); +=item hv_delayfree_ent + +Releases a hash entry, such as while iterating though the hash, but +delays actual freeing of key and value until the end of the current +statement (or thereabouts) with C<sv_2mortal>. See C<hv_iternext> +and C<hv_free_ent>. + + void hv_delayfree_ent _((HV* hv, HE* entry)); + =item hv_delete Deletes a key/value pair in the hash. The value SV is removed from the hash -and returned to the caller. The C<lken> is the length of the key. The +and returned to the caller. The C<klen> is the length of the key. The C<flags> value will normally be zero; if set to G_DISCARD then null will be returned. SV* hv_delete _((HV* tb, char* key, U32 klen, I32 flags)); +=item hv_delete_ent + +Deletes a key/value pair in the hash. The value SV is removed from the hash +and returned to the caller. The C<flags> value will normally be zero; if set +to G_DISCARD then null will be returned. C<hash> can be a valid precomputed +hash value, or 0 to ask for it to be computed. + + SV* hv_delete_ent _((HV* tb, SV* key, I32 flags, U32 hash)); + =item hv_exists Returns a boolean indicating whether the specified hash key exists. The -C<lken> is the length of the key. +C<klen> is the length of the key. bool hv_exists _((HV* tb, char* key, U32 klen)); +=item hv_exists_ent + +Returns a boolean indicating whether the specified hash key exists. C<hash> +can be a valid precomputed hash value, or 0 to ask for it to be computed. + + bool hv_exists_ent _((HV* tb, SV* key, U32 hash)); + =item hv_fetch Returns the SV which corresponds to the specified key in the hash. The -C<lken> is the length of the key. If C<lval> is set then the fetch will be +C<klen> is the length of the key. If C<lval> is set then the fetch will be part of a store. Check that the return value is non-null before dereferencing it to a C<SV*>. SV** hv_fetch _((HV* tb, char* key, U32 klen, I32 lval)); +=item hv_fetch_ent + +Returns the hash entry which corresponds to the specified key in the hash. +C<hash> must be a valid precomputed hash number for the given C<key>, or +0 if you want the function to compute it. IF C<lval> is set then the +fetch will be part of a store. Make sure the return value is non-null +before accessing it. The return value when C<tb> is a tied hash +is a pointer to a static location, so be sure to make a copy of the +structure if you need to store it somewhere. + + HE* hv_fetch_ent _((HV* tb, SV* key, I32 lval, U32 hash)); + +=item hv_free_ent + +Releases a hash entry, such as while iterating though the hash. See +C<hv_iternext> and C<hv_delayfree_ent>. + + void hv_free_ent _((HV* hv, HE* entry)); + =item hv_iterinit Prepares a starting point to traverse a hash table. @@ -1087,6 +1613,14 @@ C<hv_iterinit>. char* hv_iterkey _((HE* entry, I32* retlen)); +=item hv_iterkeysv + +Returns the key as an C<SV*> from the current position of the hash +iterator. The return value will always be a mortal copy of the +key. Also see C<hv_iterinit>. + + SV* hv_iterkeysv _((HE* entry)); + =item hv_iternext Returns entries from a hash iterator. See C<hv_iterinit>. @@ -1122,13 +1656,24 @@ Returns the package name of a stash. See C<SvSTASH>, C<CvSTASH>. =item hv_store Stores an SV in a hash. The hash key is specified as C<key> and C<klen> is -the length of the key. The C<hash> parameter is the pre-computed hash +the length of the key. The C<hash> parameter is the precomputed hash value; if it is zero then Perl will compute it. The return value will be null if the operation failed, otherwise it can be dereferenced to get the original C<SV*>. SV** hv_store _((HV* tb, char* key, U32 klen, SV* val, U32 hash)); +=item hv_store_ent + +Stores C<val> in a hash. The hash key is specified as C<key>. The C<hash> +parameter is the precomputed hash value; if it is zero then Perl will +compute it. The return value is the new hash entry so created. It will be +null if the operation failed or if the entry was stored in a tied hash. +Otherwise the contents of the return value can be accessed using the +C<He???> macros described here. + + HE* hv_store_ent _((HV* tb, SV* key, SV* val, U32 hash)); + =item hv_undef Undefines the hash. @@ -1144,7 +1689,7 @@ character or digit. =item isALPHA -Returns a boolean indicating whether the C C<char> is an ascii alphanumeric +Returns a boolean indicating whether the C C<char> is an ascii alphabetic character. int isALPHA (char c) @@ -1176,7 +1721,12 @@ Returns a boolean indicating whether the C C<char> is an uppercase character. =item items Variable which is setup by C<xsubpp> to indicate the number of items on the -stack. See L<perlxs>. +stack. See L<perlxs/"Variable-length Parameter Lists">. + +=item ix + +Variable which is setup by C<xsubpp> to indicate which of an XSUB's aliases +was used to invoke it. See L<perlxs/"The ALIAS: Keyword">. =item LEAVE @@ -1186,7 +1736,7 @@ Closing bracket on a callback. See C<ENTER> and L<perlcall>. =item MARK -Stack marker for the XSUB. See C<dMARK>. +Stack marker variable for the XSUB. See C<dMARK>. =item mg_clear @@ -1240,7 +1790,7 @@ Do magic after a value is assigned to the SV. See C<sv_magic>. The XSUB-writer's interface to the C C<memmove> function. The C<s> is the source, C<d> is the destination, C<n> is the number of items, and C<t> is -the type. +the type. Can do overlapping moves. See also C<Copy>. (void) Move( s, d, n, t ); @@ -1270,64 +1820,73 @@ memory is zeroed with C<memzero>. =item newAV -Creates a new AV. The refcount is set to 1. +Creates a new AV. The reference count is set to 1. AV* newAV _((void)); =item newHV -Creates a new HV. The refcount is set to 1. +Creates a new HV. The reference count is set to 1. HV* newHV _((void)); -=item newRV +=item newRV_inc -Creates an RV wrapper for an SV. The refcount for the original SV is +Creates an RV wrapper for an SV. The reference count for the original SV is incremented. - SV* newRV _((SV* ref)); + SV* newRV_inc _((SV* ref)); + +For historical reasons, "newRV" is a synonym for "newRV_inc". + +=item newRV_noinc + +Creates an RV wrapper for an SV. The reference count for the original +SV is B<not> incremented. + + SV* newRV_noinc _((SV* ref)); =item newSV Creates a new SV. The C<len> parameter indicates the number of bytes of -pre-allocated string space the SV should have. The refcount for the new SV -is set to 1. +preallocated string space the SV should have. The reference count for the +new SV is set to 1. SV* newSV _((STRLEN len)); =item newSViv -Creates a new SV and copies an integer into it. The refcount for the SV is -set to 1. +Creates a new SV and copies an integer into it. The reference count for the +SV is set to 1. SV* newSViv _((IV i)); =item newSVnv -Creates a new SV and copies a double into it. The refcount for the SV is -set to 1. +Creates a new SV and copies a double into it. The reference count for the +SV is set to 1. SV* newSVnv _((NV i)); =item newSVpv -Creates a new SV and copies a string into it. The refcount for the SV is -set to 1. If C<len> is zero then Perl will compute the length. +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. If C<len> is zero then Perl will compute the length. SV* newSVpv _((char* s, STRLEN len)); =item newSVrv Creates a new SV for the RV, C<rv>, to point to. If C<rv> is not an RV then -it will be upgraded one. If C<classname> is non-null then the new SV will +it will be upgraded to one. If C<classname> is non-null then the new SV will be blessed in the specified package. The new SV is returned and its -refcount is 1. +reference count is 1. SV* newSVrv _((SV* rv, char* classname)); =item newSVsv -Creates a new SV which is an exact duplicate of the orignal SV. +Creates a new SV which is an exact duplicate of the original SV. SV* newSVsv _((SV* old)); @@ -1408,6 +1967,12 @@ Tells Perl to C<eval> the string in the SV. I32 perl_eval_sv _((SV* sv, I32 flags)); +=item perl_eval_pv + +Tells Perl to C<eval> the given string and return an SV* result. + + SV* perl_eval_pv _((char* p, I32 croak_on_error)); + =item perl_free Releases a Perl interpreter. See L<perlembed>. @@ -1544,7 +2109,8 @@ The XSUB-writer's interface to the C C<realloc> function, with cast. =item RETVAL Variable which is setup by C<xsubpp> to hold the return value for an XSUB. -This is always the proper type for the XSUB. See L<perlxs>. +This is always the proper type for the XSUB. +See L<perlxs/"The RETVAL Variable">. =item safefree @@ -1659,8 +2225,8 @@ ends. =item sv_bless Blesses an SV into a specified package. The SV must be an RV. The package -must be designated by its stash (see C<gv_stashpv()>). The refcount of the -SV is unaffected. +must be designated by its stash (see C<gv_stashpv()>). The reference count +of the SV is unaffected. SV* sv_bless _((SV* sv, HV* stash)); @@ -1677,13 +2243,36 @@ C<len> indicates number of bytes to copy. void sv_catpvn _((SV* sv, char* ptr, STRLEN len)); +=item sv_catpvf + +Processes its arguments like C<sprintf> and appends the formatted output +to an SV. + + void sv_catpvf _((SV* sv, const char* pat, ...)); + =item sv_catsv -Concatentates the string from SV C<ssv> onto the end of the string in SV +Concatenates the string from SV C<ssv> onto the end of the string in SV C<dsv>. void sv_catsv _((SV* dsv, SV* ssv)); +=item sv_cmp + +Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the +string in C<sv1> is less than, equal to, or greater than the string in +C<sv2>. + + I32 sv_cmp _((SV* sv1, SV* sv2)); + +=item sv_cmp + +Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the +string in C<sv1> is less than, equal to, or greater than the string in +C<sv2>. + + I32 sv_cmp _((SV* sv1, SV* sv2)); + =item SvCUR Returns the length of the string which is in the SV. See C<SvLEN>. @@ -1696,6 +2285,18 @@ Set the length of the string which is in the SV. See C<SvCUR>. SvCUR_set (SV* sv, int val ) +=item sv_dec + +Auto-decrement of the value in the SV. + + void sv_dec _((SV* sv)); + +=item sv_dec + +Auto-decrement of the value in the SV. + + void sv_dec _((SV* sv)); + =item SvEND Returns a pointer to the last character in the string which is in the SV. @@ -1703,12 +2304,32 @@ See C<SvCUR>. Access the character as *SvEND(sv) +=item sv_eq + +Returns a boolean indicating whether the strings in the two SVs are +identical. + + I32 sv_eq _((SV* sv1, SV* sv2)); + =item SvGROW -Expands the character buffer in the SV. +Expands the character buffer in the SV. Calls C<sv_grow> to perform the +expansion if necessary. Returns a pointer to the character buffer. char * SvGROW( SV* sv, int len ) +=item sv_grow + +Expands the character buffer in the SV. This will use C<sv_unref> and will +upgrade the SV to C<SVt_PV>. Returns a pointer to the character buffer. +Use C<SvGROW>. + +=item sv_inc + +Auto-increment of the value in the SV. + + void sv_inc _((SV* sv)); + =item SvIOK Returns a boolean indicating whether the SV contains an integer. @@ -1727,6 +2348,18 @@ Tells an SV that it is an integer. SvIOK_on (SV* sv) +=item SvIOK_only + +Tells an SV that it is an integer and disables all other OK bits. + + SvIOK_on (SV* sv) + +=item SvIOK_only + +Tells an SV that it is an integer and disables all other OK bits. + + SvIOK_on (SV* sv) + =item SvIOKp Returns a boolean indicating whether the SV contains an integer. Checks the @@ -1768,6 +2401,18 @@ Returns the size of the string buffer in the SV. See C<SvCUR>. int SvLEN (SV* sv) +=item sv_len + +Returns the length of the string in the SV. Use C<SvCUR>. + + STRLEN sv_len _((SV* sv)); + +=item sv_len + +Returns the length of the string in the SV. Use C<SvCUR>. + + STRLEN sv_len _((SV* sv)); + =item sv_magic Adds magic to an SV. @@ -1789,7 +2434,7 @@ Returns a boolean indicating whether the value is an SV. =item sv_newmortal -Creates a new SV which is mortal. The refcount of the SV is set to 1. +Creates a new SV which is mortal. The reference count of the SV is set to 1. SV* sv_newmortal _((void)); @@ -1835,6 +2480,18 @@ Tells an SV that it is a double. SvNOK_on (SV* sv) +=item SvNOK_only + +Tells an SV that it is a double and disables all other OK bits. + + SvNOK_on (SV* sv) + +=item SvNOK_only + +Tells an SV that it is a double and disables all other OK bits. + + SvNOK_on (SV* sv) + =item SvNOKp Returns a boolean indicating whether the SV contains a double. Checks the @@ -1872,6 +2529,18 @@ Tells an SV that it is a string. SvPOK_on (SV* sv) +=item SvPOK_only + +Tells an SV that it is a string and disables all other OK bits. + + SvPOK_on (SV* sv) + +=item SvPOK_only + +Tells an SV that it is a string and disables all other OK bits. + + SvPOK_on (SV* sv) + =item SvPOKp Returns a boolean indicating whether the SV contains a character string. @@ -1895,19 +2564,19 @@ Returns a pointer to the string in the SV. The SV must contain a string. =item SvREFCNT -Returns the value of the object's refcount. +Returns the value of the object's reference count. int SvREFCNT (SV* sv); =item SvREFCNT_dec -Decrements the refcount of the given SV. +Decrements the reference count of the given SV. void SvREFCNT_dec (SV* sv) =item SvREFCNT_inc -Increments the refcount of the given SV. +Increments the reference count of the given SV. void SvREFCNT_inc (SV* sv) @@ -1960,31 +2629,41 @@ bytes to be copied. void sv_setpvn _((SV* sv, char* ptr, STRLEN len)); +=item sv_setpvf + +Processes its arguments like C<sprintf> and sets an SV to the formatted +output. + + void sv_setpvf _((SV* sv, const char* pat, ...)); + =item sv_setref_iv -Copies an integer into an SV, optionally blessing the SV. The SV must be an -RV. The C<classname> argument indicates the package for the blessing. Set -C<classname> to C<Nullch> to avoid the blessing. The new SV will be -returned and will have a refcount of 1. +Copies an integer into a new SV, optionally blessing the SV. The C<rv> +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C<classname> argument indicates the package for the +blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV +will be returned and will have a reference count of 1. SV* sv_setref_iv _((SV *rv, char *classname, IV iv)); =item sv_setref_nv -Copies a double into an SV, optionally blessing the SV. The SV must be an -RV. The C<classname> argument indicates the package for the blessing. Set -C<classname> to C<Nullch> to avoid the blessing. The new SV will be -returned and will have a refcount of 1. +Copies a double into a new SV, optionally blessing the SV. The C<rv> +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C<classname> argument indicates the package for the +blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV +will be returned and will have a reference count of 1. SV* sv_setref_nv _((SV *rv, char *classname, double nv)); =item sv_setref_pv -Copies a pointer into an SV, optionally blessing the SV. The SV must be an -RV. If the C<pv> argument is NULL then C<sv_undef> will be placed into the -SV. The C<classname> argument indicates the package for the blessing. Set -C<classname> to C<Nullch> to avoid the blessing. The new SV will be -returned and will have a refcount of 1. +Copies a pointer into a new SV, optionally blessing the SV. The C<rv> +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. If the C<pv> argument is NULL then C<sv_undef> will be placed +into the SV. The C<classname> argument indicates the package for the +blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV +will be returned and will have a reference count of 1. SV* sv_setref_pv _((SV *rv, char *classname, void* pv)); @@ -1995,11 +2674,12 @@ Note that C<sv_setref_pvn> copies the string while this copies the pointer. =item sv_setref_pvn -Copies a string into an SV, optionally blessing the SV. The lenth of the -string must be specified with C<n>. The SV must be an RV. The C<classname> +Copies a string into a new SV, optionally blessing the SV. The length of the +string must be specified with C<n>. The C<rv> argument will be upgraded to +an RV. That RV will be modified to point to the new SV. The C<classname> argument indicates the package for the blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV will be returned and will have -a refcount of 1. +a reference count of 1. SV* sv_setref_pvn _((SV *rv, char *classname, char* pv, I32 n)); @@ -2008,9 +2688,7 @@ Note that C<sv_setref_pv> copies the pointer while this copies the string. =item sv_setsv Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. -(B<NOTE:> If C<ssv> has the C<SVs_TEMP> bit set, C<sv_setsv> may simply steal -the string from C<ssv> and give it to C<dsv>, leaving C<ssv> empty. -Caveat caller.) +The source SV may be destroyed if it is mortal. void sv_setsv _((SV* dsv, SV* ssv)); @@ -2068,16 +2746,32 @@ C<svtype> enum. Test these flags with the C<SvTYPE> macro. =item SvUPGRADE -Used to upgrade an SV to a more complex form. See C<svtype>. +Used to upgrade an SV to a more complex form. Uses C<sv_upgrade> to perform +the upgrade if necessary. See C<svtype>. + + bool SvUPGRADE _((SV* sv, svtype mt)); + +=item sv_upgrade + +Upgrade an SV to a more complex form. Use C<SvUPGRADE>. See C<svtype>. =item sv_undef This is the C<undef> SV. Always refer to this as C<&sv_undef>. +=item sv_unref + +Unsets the RV status of the SV, and decrements the reference count of +whatever was being referenced by the RV. This can almost be thought of +as a reversal of C<newSVrv>. See C<SvROK_off>. + + void sv_unref _((SV* sv)); + =item sv_usepvn Tells an SV to use C<ptr> to find its string value. Normally the string is -stored inside the SV; this allows the SV to use an outside string. The +stored inside the SV but sv_usepvn allows the SV to use an outside string. +The C<ptr> should point to memory that was allocated by C<malloc>. The string length, C<len>, must be supplied. This function will realloc the memory pointed to by C<ptr>, so that pointer should not be freed or used by the programmer after giving it to sv_usepvn. @@ -2092,7 +2786,7 @@ This is the C<true> SV. See C<sv_no>. Always refer to this as C<&sv_yes>. Variable which is setup by C<xsubpp> to designate the object in a C++ XSUB. This is always the proper type for the C++ object. See C<CLASS> and -L<perlxs>. +L<perlxs/"Using XS With C++">. =item toLOWER @@ -2138,37 +2832,110 @@ Push an SV onto the stack, extending the stack if necessary. See C<PUSHs>. XPUSHs(sv) +=item XS + +Macro to declare an XSUB and its C parameter list. This is handled by +C<xsubpp>. + =item XSRETURN Return from XSUB, indicating number of items on the stack. This is usually handled by C<xsubpp>. - XSRETURN(x); + XSRETURN(int x); =item XSRETURN_EMPTY -Return from an XSUB immediately. +Return an empty list from an XSUB immediately. XSRETURN_EMPTY; +=item XSRETURN_IV + +Return an integer from an XSUB immediately. Uses C<XST_mIV>. + + XSRETURN_IV(IV v); + =item XSRETURN_NO -Return C<false> from an XSUB immediately. +Return C<&sv_no> from an XSUB immediately. Uses C<XST_mNO>. XSRETURN_NO; +=item XSRETURN_NV + +Return an double from an XSUB immediately. Uses C<XST_mNV>. + + XSRETURN_NV(NV v); + +=item XSRETURN_PV + +Return a copy of a string from an XSUB immediately. Uses C<XST_mPV>. + + XSRETURN_PV(char *v); + =item XSRETURN_UNDEF -Return C<undef> from an XSUB immediately. +Return C<&sv_undef> from an XSUB immediately. Uses C<XST_mUNDEF>. XSRETURN_UNDEF; =item XSRETURN_YES -Return C<true> from an XSUB immediately. +Return C<&sv_yes> from an XSUB immediately. Uses C<XST_mYES>. XSRETURN_YES; +=item XST_mIV + +Place an integer into the specified position C<i> on the stack. The value is +stored in a new mortal SV. + + XST_mIV( int i, IV v ); + +=item XST_mNV + +Place a double into the specified position C<i> on the stack. The value is +stored in a new mortal SV. + + XST_mNV( int i, NV v ); + +=item XST_mNO + +Place C<&sv_no> into the specified position C<i> on the stack. + + XST_mNO( int i ); + +=item XST_mPV + +Place a copy of a string into the specified position C<i> on the stack. The +value is stored in a new mortal SV. + + XST_mPV( int i, char *v ); + +=item XST_mUNDEF + +Place C<&sv_undef> into the specified position C<i> on the stack. + + XST_mUNDEF( int i ); + +=item XST_mYES + +Place C<&sv_yes> into the specified position C<i> on the stack. + + XST_mYES( int i ); + +=item XS_VERSION + +The version identifier for an XS module. This is usually handled +automatically by C<ExtUtils::MakeMaker>. See C<XS_VERSION_BOOTCHECK>. + +=item XS_VERSION_BOOTCHECK + +Macro to verify that a PM module's $VERSION variable matches the XS module's +C<XS_VERSION> variable. This is usually handled automatically by +C<xsubpp>. See L<perlxs/"The VERSIONCHECK: Keyword">. + =item Zero The XSUB-writer's interface to the C C<memzero> function. The C<d> is the @@ -2178,17 +2945,16 @@ destination, C<n> is the number of items, and C<t> is the type. =back -=head1 AUTHOR +=head1 EDITOR -Jeff Okamoto <okamoto@corp.hp.com> +Jeff Okamoto <F<okamoto@corp.hp.com>> With lots of help and suggestions from Dean Roehrich, Malcolm Beattie, Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil -Bowers, Matthew Green, Tim Bunce, and Spider Boardman. +Bowers, Matthew Green, Tim Bunce, Spider Boardman, and Ulrich Pfeifer. -API Listing by Dean Roehrich <roehrich@cray.com>. +API Listing by Dean Roehrich <F<roehrich@cray.com>>. =head1 DATE -Version 20: 1995/12/14 - +Version 31.7: 1997/5/1 diff --git a/pod/perlipc.pod b/pod/perlipc.pod index ac2c5fd584..6b1f2ab335 100644 --- a/pod/perlipc.pod +++ b/pod/perlipc.pod @@ -1,6 +1,6 @@ =head1 NAME -perlipc - Perl interprocess communication (signals, fifos, pipes, safe subprocceses, sockets, and semaphores) +perlipc - Perl interprocess communication (signals, fifos, pipes, safe subprocesses, sockets, and semaphores) =head1 DESCRIPTION @@ -14,23 +14,23 @@ Perl uses a simple signal handling model: the %SIG hash contains names or references of user-installed signal handlers. These handlers will be called with an argument which is the name of the signal that triggered it. A signal may be generated intentionally from a particular keyboard sequence like -control-C or control-Z, sent to you from an another process, or +control-C or control-Z, sent to you from another process, or triggered automatically by the kernel when special events transpire, like -a child process exiting, your process running out of stack space, or +a child process exiting, your process running out of stack space, or hitting file size limit. For example, to trap an interrupt signal, set up a handler like this. -Notice how all we do is set with a global variable and then raise an -exception. That's because on most systems libraries are not -re-entrant, so calling any print() functions (or even anything that needs to -malloc(3) more memory) could in theory trigger a memory fault -and subsequent core dump. +Do as little as you possibly can in your handler; notice how all we do is +set a global variable and then raise an exception. That's because on most +systems, libraries are not re-entrant; particularly, memory allocation and +I/O routines are not. That means that doing nearly I<anything> in your +handler could in theory trigger a memory fault and subsequent core dump. sub catch_zap { my $signame = shift; $shucks++; die "Somebody sent me a SIG$signame"; - } + } $SIG{INT} = 'catch_zap'; # could fail in modules $SIG{INT} = \&catch_zap; # best strategy @@ -45,14 +45,14 @@ indexed by name to get the number: $signo{$name} = $i; $signame[$i] = $name; $i++; - } + } -So to check whether signal 17 and SIGALRM were the same, just do this: +So to check whether signal 17 and SIGALRM were the same, do just this: print "signal #17 = $signame[17]\n"; - if ($signo{ALRM}) { + if ($signo{ALRM}) { print "SIGALRM is $signo{ALRM}\n"; - } + } You may also choose to assign the strings C<'IGNORE'> or C<'DEFAULT'> as the handler, in which case Perl will try to discard the signal or do the @@ -65,10 +65,10 @@ values are "inherited" by functions called from within that block.) sub precious { local $SIG{INT} = 'IGNORE'; &more_functions; - } + } sub more_functions { # interrupts still ignored, for now... - } + } Sending a signal to a negative process ID means that you send the signal to the entire Unix process-group. This code send a hang-up signal to all @@ -83,11 +83,11 @@ itself: Another interesting signal to send is signal number zero. This doesn't actually affect another process, but instead checks whether it's alive -or has changed its UID. +or has changed its UID. unless (kill 0 => $kid_pid) { warn "something wicked happened to $kid_pid"; - } + } You might also want to employ anonymous functions for simple signal handlers: @@ -95,29 +95,31 @@ handlers: $SIG{INT} = sub { die "\nOutta here!\n" }; But that will be problematic for the more complicated handlers that need -to re-install themselves. Because Perl's signal mechanism is currently -based on the signal(3) function from the C library, you may somtimes be so +to reinstall themselves. Because Perl's signal mechanism is currently +based on the signal(3) function from the C library, you may sometimes be so misfortunate as to run on systems where that function is "broken", that is, it behaves in the old unreliable SysV way rather than the newer, more reasonable BSD and POSIX fashion. So you'll see defensive people writing signal handlers like this: - sub REAPER { - $SIG{CHLD} = \&REAPER; # loathe sysV + sub REAPER { $waitedpid = wait; + # loathe sysV: it makes us not only reinstate + # the handler, but place it after the wait + $SIG{CHLD} = \&REAPER; } $SIG{CHLD} = \&REAPER; # now do something that forks... or even the more elaborate: - use POSIX "wait_h"; - sub REAPER { + use POSIX ":sys_wait_h"; + sub REAPER { my $child; - $SIG{CHLD} = \&REAPER; # loathe sysV while ($child = waitpid(-1,WNOHANG)) { $Kid_Status{$child} = $?; - } + } + $SIG{CHLD} = \&REAPER; # still loathe sysV } $SIG{CHLD} = \&REAPER; # do something that forks... @@ -132,11 +134,11 @@ using longjmp() or throw() in other languages. Here's an example: - eval { + eval { local $SIG{ALRM} = sub { die "alarm clock restart" }; - alarm 10; + alarm 10; flock(FH, 2); # blocking write lock - alarm 0; + alarm 0; }; if ($@ and $@ !~ /alarm clock restart/) { die } @@ -149,7 +151,7 @@ examples in it. A named pipe (often referred to as a FIFO) is an old Unix IPC mechanism for processes communicating on the same machine. It works -just like a regular, connected anonymous pipes, except that the +just like a regular, connected anonymous pipes, except that the processes rendezvous using a filename and don't have to be related. To create a named pipe, use the Unix command mknod(1) or on some @@ -158,22 +160,22 @@ systems, mkfifo(1). These may not be in your normal path. # system return val is backwards, so && not || # $ENV{PATH} .= ":/etc:/usr/etc"; - if ( system('mknod', $path, 'p') + if ( system('mknod', $path, 'p') && system('mkfifo', $path) ) { die "mk{nod,fifo} $path failed; - } + } A fifo is convenient when you want to connect a process to an unrelated one. When you open a fifo, the program will block until there's something -on the other end. +on the other end. For example, let's say you'd like to have your F<.signature> file be a named pipe that has a Perl program on the other end. Now every time any -program (like a mailer, newsreader, finger program, etc.) tries to read +program (like a mailer, news reader, finger program, etc.) tries to read from that file, the reading program will block and your program will -supply the the new signature. We'll use the pipe-checking file test B<-p> +supply the new signature. We'll use the pipe-checking file test B<-p> to find out whether anyone (or anything) has accidentally removed our fifo. chdir; # go home @@ -183,15 +185,15 @@ to find out whether anyone (or anything) has accidentally removed our fifo. while (1) { unless (-p $FIFO) { unlink $FIFO; - system('mknod', $FIFO, 'p') + system('mknod', $FIFO, 'p') && die "can't mknod $FIFO: $!"; - } + } # next line blocks until there's a reader open (FIFO, "> $FIFO") || die "can't write $FIFO: $!"; print FIFO "John Smith (smith\@host.org)\n", `fortune -s`; close FIFO; - sleep 2; # to avoid dup sigs + sleep 2; # to avoid dup signals } @@ -199,10 +201,10 @@ to find out whether anyone (or anything) has accidentally removed our fifo. Perl's basic open() statement can also be used for unidirectional interprocess communication by either appending or prepending a pipe symbol to the second -argument to open(). Here's how to start something up a child process you +argument to open(). Here's how to start something up in a child process you intend to write to: - open(SPOOLER, "| cat -v | lpr -h 2>/dev/null") + open(SPOOLER, "| cat -v | lpr -h 2>/dev/null") || die "can't fork: $!"; local $SIG{PIPE} = sub { die "spooler pipe broke" }; print SPOOLER "stuff\n"; @@ -215,8 +217,8 @@ And here's how to start up a child process you intend to read from: while (<STATUS>) { next if /^(tcp|udp)/; print; - } - close SPOOLER || die "bad netstat: $! $?"; + } + close STATUS || die "bad netstat: $! $?"; If one can be sure that a particular program is a Perl script that is expecting filenames in @ARGV, the clever programmer can write something @@ -248,7 +250,7 @@ exist: the open() will in all likelihood succeed (it only reflects the fork()'s success), but then your output will fail--spectacularly. Perl can't know whether the command worked because your command is actually running in a separate process whose exec() might have failed. Therefore, -while readers of bogus commands just return a quick end of file, writers +while readers of bogus commands return just a quick end of file, writers to bogus command will trigger a signal they'd better be prepared to handle. Consider: @@ -256,6 +258,57 @@ handle. Consider: print FH "bang\n"; close FH; +=head2 Filehandles + +Both the main process and the child process share the same STDIN, +STDOUT and STDERR filehandles. If both processes try to access them +at once, strange things can happen. You may want to close or reopen +the filehandles for the child. You can get around this by opening +your pipe with open(), but on some systems this means that the child +process cannot outlive the parent. + +=head2 Background Processes + +You can run a command in the background with: + + system("cmd &"); + +The command's STDOUT and STDERR (and possibly STDIN, depending on your +shell) will be the same as the parent's. You won't need to catch +SIGCHLD because of the double-fork taking place (see below for more +details). + +=head2 Complete Dissociation of Child from Parent + +In some cases (starting server processes, for instance) you'll want to +complete dissociate the child process from the parent. The following +process is reported to work on most Unixish systems. Non-Unix users +should check their Your_OS::Process module for other solutions. + +=over 4 + +=item * + +Open /dev/tty and use the TIOCNOTTY ioctl on it. See L<tty(4)> +for details. + +=item * + +Change directory to / + +=item * + +Reopen STDIN, STDOUT, and STDERR so they're not connected to the old +tty. + +=item * + +Background yourself like this: + + fork && exit; + +=back + =head2 Safe Pipe Opens Another interesting approach to IPC is making your single program go @@ -272,13 +325,13 @@ you opened whatever your kid writes to his STDOUT. use English; my $sleep_count = 0; - do { + do { $pid = open(KID_TO_WRITE, "|-"); unless (defined $pid) { warn "cannot fork: $!"; die "bailing out" if $sleep_count++ > 6; sleep 10; - } + } } until defined $pid; if ($pid) { # parent @@ -286,17 +339,17 @@ you opened whatever your kid writes to his STDOUT. close(KID_TO_WRITE) || warn "kid exited $?"; } else { # child ($EUID, $EGID) = ($UID, $GID); # suid progs only - open (FILE, "> /safe/file") + open (FILE, "> /safe/file") || die "can't open /safe/file: $!"; while (<STDIN>) { print FILE; # child's STDIN is parent's KID - } + } exit; # don't forget this - } + } Another common use for this construct is when you need to execute something without the shell's interference. With system(), it's -straigh-forward, but you can't use a pipe open or backticks safely. +straightforward, but you can't use a pipe open or backticks safely. That's because there's no way to stop the shell from getting its hands on your arguments. Instead, use lower-level control to call exec() directly. @@ -308,7 +361,7 @@ Here's a safe backtick or pipe open for read: if ($pid) { # parent while (<KID_TO_READ>) { # do something interesting - } + } close(KID_TO_READ) || warn "kid exited $?"; } else { # child @@ -316,7 +369,7 @@ Here's a safe backtick or pipe open for read: exec($program, @options, @args) || die "can't exec program: $!"; # NOTREACHED - } + } And here's a safe pipe open for writing: @@ -328,7 +381,7 @@ And here's a safe pipe open for writing: if ($pid) { # parent for (@data) { print KID_TO_WRITE; - } + } close(KID_TO_WRITE) || warn "kid exited $?"; } else { # child @@ -336,14 +389,14 @@ And here's a safe pipe open for writing: exec($program, @options, @args) || die "can't exec program: $!"; # NOTREACHED - } + } Note that these operations are full Unix forks, which means they may not be correctly implemented on alien systems. Additionally, these are not true multithreading. If you'd like to learn more about threading, see the -F<modules> file mentioned below in the L<SEE ALSO> section. +F<modules> file mentioned below in the SEE ALSO section. -=head2 Bidirectional Communication +=head2 Bidirectional Communication with Another Process While this works reasonably well for unidirectional communication, what about bidirectional communication? The obvious thing you'd like to do @@ -351,7 +404,7 @@ doesn't actually work: open(PROG_FOR_READING_AND_WRITING, "| some program |") -and if you forget to use the B<-w> flag, then you'll miss out +and if you forget to use the B<-w> flag, then you'll miss out entirely on the diagnostic message: Can't do bidirectional pipe at -e line 1. @@ -378,17 +431,17 @@ Here's an example of using open2(): print Writer "stuff\n"; $got = <Reader>; -The problem with this is that Unix buffering is going to really -ruin your day. Even though your C<Writer> filehandle is autoflushed, +The problem with this is that Unix buffering is really going to +ruin your day. Even though your C<Writer> filehandle is auto-flushed, and the process on the other end will get your data in a timely manner, -you can't usually do anything to force it to actually give it back to you -in a similarly quick fashion. In this case, we could, because we +you can't usually do anything to force it to give it back to you +in a similarly quick fashion. In this case, we could, because we gave I<cat> a B<-u> flag to make it unbuffered. But very few Unix commands are designed to operate over pipes, so this seldom works -unless you yourself wrote the program on the other end of the +unless you yourself wrote the program on the other end of the double-ended pipe. -A solution to this is the non-standard F<Comm.pl> library. It uses +A solution to this is the nonstandard F<Comm.pl> library. It uses pseudo-ttys to make your program behave more reasonably: require 'Comm.pl'; @@ -399,18 +452,18 @@ pseudo-ttys to make your program behave more reasonably: } This way you don't have to have control over the source code of the -program you're using. The F<Comm> library also has expect() -and interact() functions. Find the library (and hopefully its +program you're using. The F<Comm> library also has expect() +and interact() functions. Find the library (and we hope its successor F<IPC::Chat>) at your nearest CPAN archive as detailed -in the L<SEE ALSO> section below. +in the SEE ALSO section below. =head1 Sockets: Client/Server Communication -While not limited to Unix-derived operating systems (e.g. WinSock on PCs +While not limited to Unix-derived operating systems (e.g., WinSock on PCs provides socket support, as do some VMS libraries), you may not have -sockets on your system, in which this section probably isn't going to do -you much good. With sockets, you can do both virtual circuits (i.e. TCP -streams) and datagrams (i.e. UDP packets). You may be able to do even more +sockets on your system, in which case this section probably isn't going to do +you much good. With sockets, you can do both virtual circuits (i.e., TCP +streams) and datagrams (i.e., UDP packets). You may be able to do even more depending on your system. The Perl function calls for dealing with sockets have the same names as @@ -426,6 +479,14 @@ setting C<$AF_INET = 2>, you know you're in for big trouble: An immeasurably superior approach is to use the C<Socket> module, which more reliably grants access to various constants and functions you'll need. +If you're not writing a server/client for an existing protocol like +NNTP or SMTP, you should give some thought to how your server will +know when the client has finished talking, and vice-versa. Most +protocols are based on one-line messages and responses (so one party +knows the other has finished when a "\n" is received) or multi-line +messages and responses that end with a period on an empty line +("\n.\n" terminates a message/response). + =head2 Internet TCP Clients and Servers Use Internet-domain sockets when you want to do client-server @@ -449,9 +510,9 @@ Here's a sample TCP client using Internet-domain sockets: $proto = getprotobyname('tcp'); socket(SOCK, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; connect(SOCK, $paddr) || die "connect: $!"; - while ($line = <SOCK>) { + while (defined($line = <SOCK>)) { print $line; - } + } close (SOCK) || die "close: $!"; exit; @@ -470,12 +531,14 @@ instead. use Socket; use Carp; - sub logmsg { print "$0 $$: @_ at ", scalar localtime, "\n" } + sub logmsg { print "$0 $$: @_ at ", scalar localtime, "\n" } my $port = shift || 2345; my $proto = getprotobyname('tcp'); + $port = $1 if $port =~ /(\d+)/; # untaint port number + socket(Server, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; - setsockopt(Server, SOL_SOCKET, SO_REUSEADDR, + setsockopt(Server, SOL_SOCKET, SO_REUSEADDR, pack("l", 1)) || die "setsockopt: $!"; bind(Server, sockaddr_in($port, INADDR_ANY)) || die "bind: $!"; listen(Server,SOMAXCONN) || die "listen: $!"; @@ -490,16 +553,16 @@ instead. my($port,$iaddr) = sockaddr_in($paddr); my $name = gethostbyaddr($iaddr,AF_INET); - logmsg "connection from $name [", - inet_ntoa($iaddr), "] + logmsg "connection from $name [", + inet_ntoa($iaddr), "] at port $port"; - print CLIENT "Hello there, $name, it's now ", + print Client "Hello there, $name, it's now ", scalar localtime, "\n"; - } + } And here's a multithreaded version. It's multithreaded in that -like most typical servers, it spawns (forks) a slave server to +like most typical servers, it spawns (forks) a slave server to handle the client request so that the master server can quickly go back to service a new client. @@ -511,12 +574,14 @@ go back to service a new client. use Carp; sub spawn; # forward declaration - sub logmsg { print "$0 $$: @_ at ", scalar localtime, "\n" } + sub logmsg { print "$0 $$: @_ at ", scalar localtime, "\n" } my $port = shift || 2345; my $proto = getprotobyname('tcp'); + $port = $1 if $port =~ /(\d+)/; # untaint port number + socket(Server, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; - setsockopt(Server, SOL_SOCKET, SO_REUSEADDR, + setsockopt(Server, SOL_SOCKET, SO_REUSEADDR, pack("l", 1)) || die "setsockopt: $!"; bind(Server, sockaddr_in($port, INADDR_ANY)) || die "bind: $!"; listen(Server,SOMAXCONN) || die "listen: $!"; @@ -526,38 +591,38 @@ go back to service a new client. my $waitedpid = 0; my $paddr; - sub REAPER { - $SIG{CHLD} = \&REAPER; # loathe sysV + sub REAPER { $waitedpid = wait; + $SIG{CHLD} = \&REAPER; # loathe sysV logmsg "reaped $waitedpid" . ($? ? " with exit $?" : ''); } $SIG{CHLD} = \&REAPER; - for ( $waitedpid = 0; - ($paddr = accept(Client,Server)) || $waitedpid; - $waitedpid = 0, close Client) + for ( $waitedpid = 0; + ($paddr = accept(Client,Server)) || $waitedpid; + $waitedpid = 0, close Client) { - next if $waitedpid; + next if $waitedpid and not $paddr; my($port,$iaddr) = sockaddr_in($paddr); my $name = gethostbyaddr($iaddr,AF_INET); - logmsg "connection from $name [", - inet_ntoa($iaddr), "] + logmsg "connection from $name [", + inet_ntoa($iaddr), "] at port $port"; - spawn sub { + spawn sub { print "Hello there, $name, it's now ", scalar localtime, "\n"; - exec '/usr/games/fortune' + exec '/usr/games/fortune' or confess "can't exec fortune: $!"; }; - } + } sub spawn { my $coderef = shift; - unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') { + unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') { confess "usage: spawn CODEREF"; } @@ -567,15 +632,15 @@ go back to service a new client. return; } elsif ($pid) { logmsg "begat $pid"; - return; # i'm the parent + return; # I'm the parent } - # else i'm the child -- go spawn + # else I'm the child -- go spawn open(STDIN, "<&Client") || die "can't dup client to stdin"; open(STDOUT, ">&Client") || die "can't dup client to stdout"; ## open(STDERR, ">&STDOUT") || die "can't dup stdout to stderr"; exit &$coderef(); - } + } This server takes the trouble to clone off a child version via fork() for each incoming request. That way it can handle many requests at once, @@ -601,11 +666,11 @@ differ from the system on which it's being run: use Socket; my $SECS_of_70_YEARS = 2208988800; - sub ctime { scalar localtime(shift) } + sub ctime { scalar localtime(shift) } - my $iaddr = gethostbyname('localhost'); - my $proto = getprotobyname('tcp'); - my $port = getservbyname('time', 'tcp'); + my $iaddr = gethostbyname('localhost'); + my $proto = getprotobyname('tcp'); + my $port = getservbyname('time', 'tcp'); my $paddr = sockaddr_in(0, $iaddr); my($host); @@ -627,10 +692,10 @@ differ from the system on which it's being run: =head2 Unix-Domain TCP Clients and Servers -That's fine for Internet-domain clients and servers, but what local +That's fine for Internet-domain clients and servers, but what about local communications? While you can use the same setup, sometimes you don't want to. Unix-domain sockets are local to the current host, and are often -used internally to implement pipes. Unlike Internet domain sockets, UNIX +used internally to implement pipes. Unlike Internet domain sockets, Unix domain sockets can show up in the file system with an ls(1) listing. $ ls -l /dev/log @@ -640,7 +705,7 @@ You can test for these with Perl's B<-S> file test: unless ( -S '/dev/log' ) { die "something's wicked with the print system"; - } + } Here's a sample Unix-domain client: @@ -652,13 +717,13 @@ Here's a sample Unix-domain client: $rendezvous = shift || '/tmp/catsock'; socket(SOCK, PF_UNIX, SOCK_STREAM, 0) || die "socket: $!"; - connect(SOCK, sockaddr_un($remote)) || die "connect: $!"; - while ($line = <SOCK>) { + connect(SOCK, sockaddr_un($rendezvous)) || die "connect: $!"; + while (defined($line = <SOCK>)) { print $line; - } + } exit; -And here's a corresponding server. +And here's a corresponding server. #!/usr/bin/perl -Tw require 5.002; @@ -681,17 +746,17 @@ And here's a corresponding server. $SIG{CHLD} = \&REAPER; - for ( $waitedpid = 0; - accept(Client,Server) || $waitedpid; - $waitedpid = 0, close Client) + for ( $waitedpid = 0; + accept(Client,Server) || $waitedpid; + $waitedpid = 0, close Client) { next if $waitedpid; logmsg "connection on $NAME"; - spawn sub { + spawn sub { print "Hello there, it's now ", scalar localtime, "\n"; exec '/usr/games/fortune' or die "can't exec fortune: $!"; }; - } + } As you see, it's remarkably similar to the Internet domain TCP server, so much so, in fact, that we've omitted several duplicate functions--spawn(), @@ -710,7 +775,326 @@ if they go through a CGI interface. You'd have a small, simple CGI program that does whatever checks and logging you feel like, and then acts as a Unix-domain client and connects to your private server. -=head2 UDP: Message Passing +=head1 TCP Clients with IO::Socket + +For those preferring a higher-level interface to socket programming, the +IO::Socket module provides an object-oriented approach. IO::Socket is +included as part of the standard Perl distribution as of the 5.004 +release. If you're running an earlier version of Perl, just fetch +IO::Socket from CPAN, where you'll also find find modules providing easy +interfaces to the following systems: DNS, FTP, Ident (RFC 931), NIS and +NISPlus, NNTP, Ping, POP3, SMTP, SNMP, SSLeay, Telnet, and Time--just +to name a few. + +=head2 A Simple Client + +Here's a client that creates a TCP connection to the "daytime" +service at port 13 of the host name "localhost" and prints out everything +that the server there cares to provide. + + #!/usr/bin/perl -w + use IO::Socket; + $remote = IO::Socket::INET->new( + Proto => "tcp", + PeerAddr => "localhost", + PeerPort => "daytime(13)", + ) + or die "cannot connect to daytime port at localhost"; + while ( <$remote> ) { print } + +When you run this program, you should get something back that +looks like this: + + Wed May 14 08:40:46 MDT 1997 + +Here are what those parameters to the C<new> constructor mean: + +=over + +=item C<Proto> + +This is which protocol to use. In this case, the socket handle returned +will be connected to a TCP socket, because we want a stream-oriented +connection, that is, one that acts pretty much like a plain old file. +Not all sockets are this of this type. For example, the UDP protocol +can be used to make a datagram socket, used for message-passing. + +=item C<PeerAddr> + +This is the name or Internet address of the remote host the server is +running on. We could have specified a longer name like C<"www.perl.com">, +or an address like C<"204.148.40.9">. For demonstration purposes, we've +used the special hostname C<"localhost">, which should always mean the +current machine you're running on. The corresponding Internet address +for localhost is C<"127.1">, if you'd rather use that. + +=item C<PeerPort> + +This is the service name or port number we'd like to connect to. +We could have gotten away with using just C<"daytime"> on systems with a +well-configured system services file,[FOOTNOTE: The system services file +is in I</etc/services> under Unix] but just in case, we've specified the +port number (13) in parentheses. Using just the number would also have +worked, but constant numbers make careful programmers nervous. + +=back + +Notice how the return value from the C<new> constructor is used as +a filehandle in the C<while> loop? That's what's called an indirect +filehandle, a scalar variable containing a filehandle. You can use +it the same way you would a normal filehandle. For example, you +can read one line from it this way: + + $line = <$handle>; + +all remaining lines from is this way: + + @lines = <$handle>; + +and send a line of data to it this way: + + print $handle "some data\n"; + +=head2 A Webget Client + +Here's a simple client that takes a remote host to fetch a document +from, and then a list of documents to get from that host. This is a +more interesting client than the previous one because it first sends +something to the server before fetching the server's response. + + #!/usr/bin/perl -w + use IO::Socket; + unless (@ARGV > 1) { die "usage: $0 host document ..." } + $host = shift(@ARGV); + foreach $document ( @ARGV ) { + $remote = IO::Socket::INET->new( Proto => "tcp", + PeerAddr => $host, + PeerPort => "http(80)", + ); + unless ($remote) { die "cannot connect to http daemon on $host" } + $remote->autoflush(1); + print $remote "GET $document HTTP/1.0\n\n"; + while ( <$remote> ) { print } + close $remote; + } + +The web server handing the "http" service, which is assumed to be at +its standard port, number 80. If your the web server you're trying to +connect to is at a different port (like 1080 or 8080), you should specify +as the named-parameter pair, C<PeerPort =E<gt> 8080>. The C<autoflush> +method is used on the socket because otherwise the system would buffer +up the output we sent it. (If you're on a Mac, you'll also need to +change every C<"\n"> in your code that sends data over the network to +be a C<"\015\012"> instead.) + +Connecting to the server is only the first part of the process: once you +have the connection, you have to use the server's language. Each server +on the network has its own little command language that it expects as +input. The string that we send to the server starting with "GET" is in +HTTP syntax. In this case, we simply request each specified document. +Yes, we really are making a new connection for each document, even though +it's the same host. That's the way you always used to have to speak HTTP. +Recent versions of web browsers may request that the remote server leave +the connection open a little while, but the server doesn't have to honor +such a request. + +Here's an example of running that program, which we'll call I<webget>: + + shell_prompt$ webget www.perl.com /guanaco.html + HTTP/1.1 404 File Not Found + Date: Thu, 08 May 1997 18:02:32 GMT + Server: Apache/1.2b6 + Connection: close + Content-type: text/html + + <HEAD><TITLE>404 File Not Found</TITLE></HEAD> + <BODY><H1>File Not Found</H1> + The requested URL /guanaco.html was not found on this server.<P> + </BODY> + +Ok, so that's not very interesting, because it didn't find that +particular document. But a long response wouldn't have fit on this page. + +For a more fully-featured version of this program, you should look to +the I<lwp-request> program included with the LWP modules from CPAN. + +=head2 Interactive Client with IO::Socket + +Well, that's all fine if you want to send one command and get one answer, +but what about setting up something fully interactive, somewhat like +the way I<telnet> works? That way you can type a line, get the answer, +type a line, get the answer, etc. + +This client is more complicated than the two we've done so far, but if +you're on a system that supports the powerful C<fork> call, the solution +isn't that rough. Once you've made the connection to whatever service +you'd like to chat with, call C<fork> to clone your process. Each of +these two identical process has a very simple job to do: the parent +copies everything from the socket to standard output, while the child +simultaneously copies everything from standard input to the socket. +To accomplish the same thing using just one process would be I<much> +harder, because it's easier to code two processes to do one thing than it +is to code one process to do two things. (This keep-it-simple principle +is one of the cornerstones of the Unix philosophy, and good software +engineering as well, which is probably why it's spread to other systems +as well.) + +Here's the code: + + #!/usr/bin/perl -w + use strict; + use IO::Socket; + my ($host, $port, $kidpid, $handle, $line); + + unless (@ARGV == 2) { die "usage: $0 host port" } + ($host, $port) = @ARGV; + + # create a tcp connection to the specified host and port + $handle = IO::Socket::INET->new(Proto => "tcp", + PeerAddr => $host, + PeerPort => $port) + or die "can't connect to port $port on $host: $!"; + + $handle->autoflush(1); # so output gets there right away + print STDERR "[Connected to $host:$port]\n"; + + # split the program into two processes, identical twins + die "can't fork: $!" unless defined($kidpid = fork()); + + # the if{} block runs only in the parent process + if ($kidpid) { + # copy the socket to standard output + while (defined ($line = <$handle>)) { + print STDOUT $line; + } + kill("TERM", $kidpid); # send SIGTERM to child + } + # the else{} block runs only in the child process + else { + # copy standard input to the socket + while (defined ($line = <STDIN>)) { + print $handle $line; + } + } + +The C<kill> function in the parent's C<if> block is there to send a +signal to our child process (current running in the C<else> block) +as soon as the remote server has closed its end of the connection. + +The C<kill> at the end of the parent's block is there to eliminate the +child process as soon as the server we connect to closes its end. + +If the remote server sends data a byte at time, and you need that +data immediately without waiting for a newline (which might not happen), +you may wish to replace the C<while> loop in the parent with the +following: + + my $byte; + while (sysread($handle, $byte, 1) == 1) { + print STDOUT $byte; + } + +Making a system call for each byte you want to read is not very efficient +(to put it mildly) but is the simplest to explain and works reasonably +well. + +=head1 TCP Servers with IO::Socket + +Setting up server is little bit more involved than running a client. +The model is that the server creates a special kind of socket that +does nothing but listen on a particular port for incoming connections. +It does this by calling the C<IO::Socket::INET-E<gt>new()> method with +slightly different arguments than the client did. + +=over + +=item Proto + +This is which protocol to use. Like our clients, we'll +still specify C<"tcp"> here. + +=item LocalPort + +We specify a local +port in the C<LocalPort> argument, which we didn't do for the client. +This is service name or port number for which you want to be the +server. (Under Unix, ports under 1024 are restricted to the +superuser.) In our sample, we'll use port 9000, but you can use +any port that's not currently in use on your system. If you try +to use one already in used, you'll get an "Address already in use" +message. Under Unix, the C<netstat -a> command will show +which services current have servers. + +=item Listen + +The C<Listen> parameter is set to the maximum number of +pending connections we can accept until we turn away incoming clients. +Think of it as a call-waiting queue for your telephone. +The low-level Socket module has a special symbol for the system maximum, which +is SOMAXCONN. + +=item Reuse + +The C<Reuse> parameter is needed so that we restart our server +manually without waiting a few minutes to allow system buffers to +clear out. + +=back + +Once the generic server socket has been created using the parameters +listed above, the server then waits for a new client to connect +to it. The server blocks in the C<accept> method, which eventually an +bidirectional connection to the remote client. (Make sure to autoflush +this handle to circumvent buffering.) + +To add to user-friendliness, our server prompts the user for commands. +Most servers don't do this. Because of the prompt without a newline, +you'll have to use the C<sysread> variant of the interactive client above. + +This server accepts one of five different commands, sending output +back to the client. Note that unlike most network servers, this one +only handles one incoming client at a time. Multithreaded servers are +covered in Chapter 6 of the Camel or in the perlipc(1) manpage. + +Here's the code. We'll + + #!/usr/bin/perl -w + use IO::Socket; + use Net::hostent; # for OO version of gethostbyaddr + + $PORT = 9000; # pick something not in use + + $server = IO::Socket::INET->new( Proto => 'tcp', + LocalPort => $PORT, + Listen => SOMAXCONN, + Reuse => 1); + + die "can't setup server" unless $server; + print "[Server $0 accepting clients]\n"; + + while ($client = $server->accept()) { + $client->autoflush(1); + print $client "Welcome to $0; type help for command list.\n"; + $hostinfo = gethostbyaddr($client->peeraddr); + printf "[Connect from %s]\n", $hostinfo->name || $client->peerhost; + print $client "Command? "; + while ( <$client>) { + next unless /\S/; # blank line + if (/quit|exit/i) { last; } + elsif (/date|time/i) { printf $client "%s\n", scalar localtime; } + elsif (/who/i ) { print $client `who 2>&1`; } + elsif (/cookie/i ) { print $client `/usr/games/fortune 2>&1`; } + elsif (/motd/i ) { print $client `cat /etc/motd 2>&1`; } + else { + print $client "Commands: quit date who cookie motd\n"; + } + } continue { + print $client "Command? "; + } + close $client; + } + +=head1 UDP: Message Passing Another kind of client-server setup is one that uses not connections, but messages. UDP communications involve much lower overhead but also provide @@ -719,11 +1103,11 @@ all, let alone in order and unmangled. Still, UDP offers some advantages over TCP, including being able to "broadcast" or "multicast" to a whole bunch of destination hosts at once (usually on your local subnet). If you find yourself overly concerned about reliability and start building checks -into your message system, then you probably should just use TCP to start +into your message system, then you probably should use just TCP to start with. Here's a UDP program similar to the sample Internet TCP client given -above. However, instead of checking one host at a time, the UDP version +earlier. However, instead of checking one host at a time, the UDP version will check many of them asynchronously by simulating a multicast and then using select() to do a timed-out wait for I/O. To do something similar with TCP, you'd have to use a different socket handle for each host. @@ -734,8 +1118,8 @@ with TCP, you'd have to use a different socket handle for each host. use Socket; use Sys::Hostname; - my ( $count, $hisiaddr, $hispaddr, $histime, - $host, $iaddr, $paddr, $port, $proto, + my ( $count, $hisiaddr, $hispaddr, $histime, + $host, $iaddr, $paddr, $port, $proto, $rin, $rout, $rtime, $SECS_of_70_YEARS); $SECS_of_70_YEARS = 2208988800; @@ -781,8 +1165,7 @@ Berkeley mmap() to have shared memory so as to share a variable amongst several processes. That's because Perl would reallocate your string when you weren't wanting it to. - -Here's a small example showing shared memory usage. +Here's a small example showing shared memory usage. $IPC_PRIVATE = 0; $IPC_RMID = 0; @@ -808,7 +1191,7 @@ Here's an example of a semaphore: die if !defined($key); print "$key\n"; -Put this code in a separate file to be run in more that one process +Put this code in a separate file to be run in more than one process. Call the file F<take>: # create a semaphore @@ -832,7 +1215,7 @@ Call the file F<take>: semop($key,$opstring) || die "$!"; -Put this code in a separate file to be run in more that one process +Put this code in a separate file to be run in more than one process. Call this file F<give>: # 'give' the semaphore @@ -852,32 +1235,24 @@ Call this file F<give>: semop($key,$opstring) || die "$!"; -=head1 WARNING - -The SysV IPC code above was written long ago, and it's definitely clunky -looking. It should at the very least be made to C<use strict> and -C<require "sys/ipc.ph">. Better yet, perhaps someone should create an -C<IPC::SysV> module the way we have the C<Socket> module for normal -client-server communications. - -(... time passes) - -Voila! Check out the IPC::SysV modules written by Jack Shirazi. You can -find them at a CPAN store near you. +The SysV IPC code above was written long ago, and it's definitely +clunky looking. It should at the very least be made to C<use strict> +and C<require "sys/ipc.ph">. Better yet, check out the IPC::SysV modules +on CPAN. =head1 NOTES If you are running under version 5.000 (dubious) or 5.001, you can still use most of the examples in this document. You may have to remove the C<use strict> and some of the my() statements for 5.000, and for both -you'll have to load in version 1.2 of the F<Socket.pm> module, which -was/is/shall-be included in I<perl5.001o>. +you'll have to load in version 1.2 or older of the F<Socket.pm> module, which +is included in I<perl5.002>. Most of these routines quietly but politely return C<undef> when they fail instead of causing your program to die right then and there due to an uncaught exception. (Actually, some of the new I<Socket> conversion functions croak() on bad arguments.) It is therefore essential -that you should check the return values fo these functions. Always begin +that you should check the return values of these functions. Always begin your socket programs this way for optimal success, and don't forget to add B<-T> taint checking flag to the pound-bang line for servers: @@ -892,26 +1267,42 @@ B<-T> taint checking flag to the pound-bang line for servers: All these routines create system-specific portability problems. As noted elsewhere, Perl is at the mercy of your C libraries for much of its system behaviour. It's probably safest to assume broken SysV semantics for -signals and to stick with simple TCP and UDP socket operations; e.g. don't -try to pass open filedescriptors over a local UDP datagram socket if you +signals and to stick with simple TCP and UDP socket operations; e.g., don't +try to pass open file descriptors over a local UDP datagram socket if you want your code to stand a chance of being portable. -Because few vendors provide C libraries that are safely -re-entrant, the prudent programmer will do little else within -a handler beyond die() to raise an exception and longjmp(3) out. +Because few vendors provide C libraries that are safely re-entrant, +the prudent programmer will do little else within a handler beyond +setting a numeric variable that already exists; or, if locked into +a slow (restarting) system call, using die() to raise an exception +and longjmp(3) out. In fact, even these may in some cases cause a +core dump. It's probably best to avoid signals except where they are +absolutely inevitable. This perilous problems will be addressed in a +future release of Perl. =head1 AUTHOR Tom Christiansen, with occasional vestiges of Larry Wall's original -version. +version and suggestions from the Perl Porters. =head1 SEE ALSO -Besides the obvious functions in L<perlfunc>, you should also check out -the F<modules> file at your nearest CPAN site. (See L<perlmod> or best -yet, the F<Perl FAQ> for a description of what CPAN is and where to get it.) +There's a lot more to networking than this, but this should get you +started. + +For intrepid programmers, the classic textbook I<Unix Network Programming> +by Richard Stevens (published by Addison-Wesley). Note that most books +on networking address networking from the perspective of a C programmer; +translation to Perl is left as an exercise for the reader. + +The IO::Socket(3) manpage describes the object library, and the Socket(3) +manpage describes the low-level interface to sockets. Besides the obvious +functions in L<perlfunc>, you should also check out the F<modules> file +at your nearest CPAN site. (See L<perlmodlib> or best yet, the F<Perl +FAQ> for a description of what CPAN is and where to get it.) + Section 5 of the F<modules> file is devoted to "Networking, Device Control -(modems) and Interprocess Communication", and contains numerous unbundled +(modems), and Interprocess Communication", and contains numerous unbundled modules numerous networking modules, Chat and Expect operations, CGI programming, DCE, FTP, IPC, NNTP, Proxy, Ptty, RPC, SNMP, SMTP, Telnet, Threads, and ToolTalk--just to name a few. diff --git a/pod/perllocale.pod b/pod/perllocale.pod new file mode 100644 index 0000000000..e1bf5f070d --- /dev/null +++ b/pod/perllocale.pod @@ -0,0 +1,800 @@ +=head1 NAME + +perllocale - Perl locale handling (internationalization and localization) + +=head1 DESCRIPTION + +Perl supports language-specific notions of data such as "is this a +letter", "what is the uppercase equivalent of this letter", and "which +of these letters comes first". These are important issues, especially +for languages other than English - but also for English: it would be +very naE<iuml>ve to think that C<A-Za-z> defines all the "letters". Perl +is also aware that some character other than '.' may be preferred as a +decimal point, and that output date representations may be +language-specific. The process of making an application take account of +its users' preferences in such matters is called B<internationalization> +(often abbreviated as B<i18n>); telling such an application about a +particular set of preferences is known as B<localization> (B<l10n>). + +Perl can understand language-specific data via the standardized (ISO C, +XPG4, POSIX 1.c) method called "the locale system". The locale system is +controlled per application using one pragma, one function call, and +several environment variables. + +B<NOTE>: This feature is new in Perl 5.004, and does not apply unless an +application specifically requests it - see L<Backward compatibility>. +The one exception is that write() now B<always> uses the current locale +- see L<"NOTES">. + +=head1 PREPARING TO USE LOCALES + +If Perl applications are to be able to understand and present your data +correctly according a locale of your choice, B<all> of the following +must be true: + +=over 4 + +=item * + +B<Your operating system must support the locale system>. If it does, +you should find that the setlocale() function is a documented part of +its C library. + +=item * + +B<Definitions for the locales which you use must be installed>. You, or +your system administrator, must make sure that this is the case. The +available locales, the location in which they are kept, and the manner +in which they are installed, vary from system to system. Some systems +provide only a few, hard-wired, locales, and do not allow more to be +added; others allow you to add "canned" locales provided by the system +supplier; still others allow you or the system administrator to define +and add arbitrary locales. (You may have to ask your supplier to +provide canned locales which are not delivered with your operating +system.) Read your system documentation for further illumination. + +=item * + +B<Perl must believe that the locale system is supported>. If it does, +C<perl -V:d_setlocale> will say that the value for C<d_setlocale> is +C<define>. + +=back + +If you want a Perl application to process and present your data +according to a particular locale, the application code should include +the S<C<use locale>> pragma (see L<The use locale pragma>) where +appropriate, and B<at least one> of the following must be true: + +=over 4 + +=item * + +B<The locale-determining environment variables (see L<"ENVIRONMENT">) +must be correctly set up>, either by yourself, or by the person who set +up your system account, at the time the application is started. + +=item * + +B<The application must set its own locale> using the method described in +L<The setlocale function>. + +=back + +=head1 USING LOCALES + +=head2 The use locale pragma + +By default, Perl ignores the current locale. The S<C<use locale>> +pragma tells Perl to use the current locale for some operations: + +=over 4 + +=item * + +B<The comparison operators> (C<lt>, C<le>, C<cmp>, C<ge>, and C<gt>) and +the POSIX string collation functions strcoll() and strxfrm() use +C<LC_COLLATE>. sort() is also affected if it is used without an +explicit comparison function because it uses C<cmp> by default. + +B<Note:> C<eq> and C<ne> are unaffected by the locale: they always +perform a byte-by-byte comparison of their scalar operands. What's +more, if C<cmp> finds that its operands are equal according to the +collation sequence specified by the current locale, it goes on to +perform a byte-by-byte comparison, and only returns I<0> (equal) if the +operands are bit-for-bit identical. If you really want to know whether +two strings - which C<eq> and C<cmp> may consider different - are equal +as far as collation in the locale is concerned, see the discussion in +L<Category LC_COLLATE: Collation>. + +=item * + +B<Regular expressions and case-modification functions> (uc(), lc(), +ucfirst(), and lcfirst()) use C<LC_CTYPE> + +=item * + +B<The formatting functions> (printf(), sprintf() and write()) use +C<LC_NUMERIC> + +=item * + +B<The POSIX date formatting function> (strftime()) uses C<LC_TIME>. + +=back + +C<LC_COLLATE>, C<LC_CTYPE>, and so on, are discussed further in L<LOCALE +CATEGORIES>. + +The default behavior returns with S<C<no locale>> or on reaching the +end of the enclosing block. + +Note that the string result of any operation that uses locale +information is tainted, as it is possible for a locale to be +untrustworthy. See L<"SECURITY">. + +=head2 The setlocale function + +You can switch locales as often as you wish at run time with the +POSIX::setlocale() function: + + # This functionality not usable prior to Perl 5.004 + require 5.004; + + # Import locale-handling tool set from POSIX module. + # This example uses: setlocale -- the function call + # LC_CTYPE -- explained below + use POSIX qw(locale_h); + + # query and save the old locale + $old_locale = setlocale(LC_CTYPE); + + setlocale(LC_CTYPE, "fr_CA.ISO8859-1"); + # LC_CTYPE now in locale "French, Canada, codeset ISO 8859-1" + + setlocale(LC_CTYPE, ""); + # LC_CTYPE now reset to default defined by LC_ALL/LC_CTYPE/LANG + # environment variables. See below for documentation. + + # restore the old locale + setlocale(LC_CTYPE, $old_locale); + +The first argument of setlocale() gives the B<category>, the second the +B<locale>. The category tells in what aspect of data processing you +want to apply locale-specific rules. Category names are discussed in +L<LOCALE CATEGORIES> and L<"ENVIRONMENT">. The locale is the name of a +collection of customization information corresponding to a particular +combination of language, country or territory, and codeset. Read on for +hints on the naming of locales: not all systems name locales as in the +example. + +If no second argument is provided, the function returns a string naming +the current locale for the category. You can use this value as the +second argument in a subsequent call to setlocale(). If a second +argument is given and it corresponds to a valid locale, the locale for +the category is set to that value, and the function returns the +now-current locale value. You can use this in a subsequent call to +setlocale(). (In some implementations, the return value may sometimes +differ from the value you gave as the second argument - think of it as +an alias for the value that you gave.) + +As the example shows, if the second argument is an empty string, the +category's locale is returned to the default specified by the +corresponding environment variables. Generally, this results in a +return to the default which was in force when Perl started up: changes +to the environment made by the application after startup may or may not +be noticed, depending on the implementation of your system's C library. + +If the second argument does not correspond to a valid locale, the locale +for the category is not changed, and the function returns I<undef>. + +For further information about the categories, consult L<setlocale(3)>. +For the locales available in your system, also consult L<setlocale(3)> +and see whether it leads you to the list of the available locales +(search for the I<SEE ALSO> section). If that fails, try the following +command lines: + + locale -a + + nlsinfo + + ls /usr/lib/nls/loc + + ls /usr/lib/locale + + ls /usr/lib/nls + +and see whether they list something resembling these + + en_US.ISO8859-1 de_DE.ISO8859-1 ru_RU.ISO8859-5 + en_US de_DE ru_RU + en de ru + english german russian + english.iso88591 german.iso88591 russian.iso88595 + +Sadly, even though the calling interface for setlocale() has been +standardized, the names of the locales and the directories where +the configuration is, have not. The basic form of the name is +I<language_country/territory>B<.>I<codeset>, but the +latter parts are not always present. + +Two special locales are worth particular mention: "C" and "POSIX". +Currently these are effectively the same locale: the difference is +mainly that the first one is defined by the C standard and the second by +the POSIX standard. What they define is the B<default locale> in which +every program starts in the absence of locale information in its +environment. (The default default locale, if you will.) Its language +is (American) English and its character codeset ASCII. + +B<NOTE>: Not all systems have the "POSIX" locale (not all systems are +POSIX-conformant), so use "C" when you need explicitly to specify this +default locale. + +=head2 The localeconv function + +The POSIX::localeconv() function allows you to get particulars of the +locale-dependent numeric formatting information specified by the current +C<LC_NUMERIC> and C<LC_MONETARY> locales. (If you just want the name of +the current locale for a particular category, use POSIX::setlocale() +with a single parameter - see L<The setlocale function>.) + + use POSIX qw(locale_h); + + # Get a reference to a hash of locale-dependent info + $locale_values = localeconv(); + + # Output sorted list of the values + for (sort keys %$locale_values) { + printf "%-20s = %s\n", $_, $locale_values->{$_} + } + +localeconv() takes no arguments, and returns B<a reference to> a hash. +The keys of this hash are formatting variable names such as +C<decimal_point> and C<thousands_sep>; the values are the corresponding +values. See L<POSIX (3)/localeconv> for a longer example, which lists +all the categories an implementation might be expected to provide; some +provide more and others fewer, however. Note that you don't need C<use +locale>: as a function with the job of querying the locale, localeconv() +always observes the current locale. + +Here's a simple-minded example program which rewrites its command line +parameters as integers formatted correctly in the current locale: + + # See comments in previous example + require 5.004; + use POSIX qw(locale_h); + + # Get some of locale's numeric formatting parameters + my ($thousands_sep, $grouping) = + @{localeconv()}{'thousands_sep', 'grouping'}; + + # Apply defaults if values are missing + $thousands_sep = ',' unless $thousands_sep; + $grouping = 3 unless $grouping; + + # Format command line params for current locale + for (@ARGV) { + $_ = int; # Chop non-integer part + 1 while + s/(\d)(\d{$grouping}($|$thousands_sep))/$1$thousands_sep$2/; + print "$_"; + } + print "\n"; + +=head1 LOCALE CATEGORIES + +The subsections which follow describe basic locale categories. As well +as these, there are some combination categories which allow the +manipulation of more than one basic category at a time. See +L<"ENVIRONMENT"> for a discussion of these. + +=head2 Category LC_COLLATE: Collation + +When in the scope of S<C<use locale>>, Perl looks to the C<LC_COLLATE> +environment variable to determine the application's notions on the +collation (ordering) of characters. ('b' follows 'a' in Latin +alphabets, but where do 'E<aacute>' and 'E<aring>' belong?) + +Here is a code snippet that will tell you what are the alphanumeric +characters in the current locale, in the locale order: + + use locale; + print +(sort grep /\w/, map { chr() } 0..255), "\n"; + +Compare this with the characters that you see and their order if you +state explicitly that the locale should be ignored: + + no locale; + print +(sort grep /\w/, map { chr() } 0..255), "\n"; + +This machine-native collation (which is what you get unless S<C<use +locale>> has appeared earlier in the same block) must be used for +sorting raw binary data, whereas the locale-dependent collation of the +first example is useful for natural text. + +As noted in L<USING LOCALES>, C<cmp> compares according to the current +collation locale when C<use locale> is in effect, but falls back to a +byte-by-byte comparison for strings which the locale says are equal. You +can use POSIX::strcoll() if you don't want this fall-back: + + use POSIX qw(strcoll); + $equal_in_locale = + !strcoll("space and case ignored", "SpaceAndCaseIgnored"); + +$equal_in_locale will be true if the collation locale specifies a +dictionary-like ordering which ignores space characters completely, and +which folds case. + +If you have a single string which you want to check for "equality in +locale" against several others, you might think you could gain a little +efficiency by using POSIX::strxfrm() in conjunction with C<eq>: + + use POSIX qw(strxfrm); + $xfrm_string = strxfrm("Mixed-case string"); + print "locale collation ignores spaces\n" + if $xfrm_string eq strxfrm("Mixed-casestring"); + print "locale collation ignores hyphens\n" + if $xfrm_string eq strxfrm("Mixedcase string"); + print "locale collation ignores case\n" + if $xfrm_string eq strxfrm("mixed-case string"); + +strxfrm() takes a string and maps it into a transformed string for use +in byte-by-byte comparisons against other transformed strings during +collation. "Under the hood", locale-affected Perl comparison operators +call strxfrm() for both their operands, then do a byte-by-byte +comparison of the transformed strings. By calling strxfrm() explicitly, +and using a non locale-affected comparison, the example attempts to save +a couple of transformations. In fact, it doesn't save anything: Perl +magic (see L<perlguts/Magic Variables>) creates the transformed version of a +string the first time it's needed in a comparison, then keeps it around +in case it's needed again. An example rewritten the easy way with +C<cmp> runs just about as fast. It also copes with null characters +embedded in strings; if you call strxfrm() directly, it treats the first +null it finds as a terminator. And don't expect the transformed strings +it produces to be portable across systems - or even from one revision +of your operating system to the next. In short, don't call strxfrm() +directly: let Perl do it for you. + +Note: C<use locale> isn't shown in some of these examples, as it isn't +needed: strcoll() and strxfrm() exist only to generate locale-dependent +results, and so always obey the current C<LC_COLLATE> locale. + +=head2 Category LC_CTYPE: Character Types + +When in the scope of S<C<use locale>>, Perl obeys the C<LC_CTYPE> locale +setting. This controls the application's notion of which characters are +alphabetic. This affects Perl's C<\w> regular expression metanotation, +which stands for alphanumeric characters - that is, alphabetic and +numeric characters. (Consult L<perlre> for more information about +regular expressions.) Thanks to C<LC_CTYPE>, depending on your locale +setting, characters like 'E<aelig>', 'E<eth>', 'E<szlig>', and +'E<oslash>' may be understood as C<\w> characters. + +The C<LC_CTYPE> locale also provides the map used in translating +characters between lower and uppercase. This affects the case-mapping +functions - lc(), lcfirst, uc() and ucfirst(); case-mapping +interpolation with C<\l>, C<\L>, C<\u> or <\U> in double-quoted strings +and in C<s///> substitutions; and case-independent regular expression +pattern matching using the C<i> modifier. + +Finally, C<LC_CTYPE> affects the POSIX character-class test functions - +isalpha(), islower() and so on. For example, if you move from the "C" +locale to a 7-bit Scandinavian one, you may find - possibly to your +surprise - that "|" moves from the ispunct() class to isalpha(). + +B<Note:> A broken or malicious C<LC_CTYPE> locale definition may result +in clearly ineligible characters being considered to be alphanumeric by +your application. For strict matching of (unaccented) letters and +digits - for example, in command strings - locale-aware applications +should use C<\w> inside a C<no locale> block. See L<"SECURITY">. + +=head2 Category LC_NUMERIC: Numeric Formatting + +When in the scope of S<C<use locale>>, Perl obeys the C<LC_NUMERIC> +locale information, which controls application's idea of how numbers +should be formatted for human readability by the printf(), sprintf(), +and write() functions. String to numeric conversion by the +POSIX::strtod() function is also affected. In most implementations the +only effect is to change the character used for the decimal point - +perhaps from '.' to ',': these functions aren't aware of such niceties +as thousands separation and so on. (See L<The localeconv function> if +you care about these things.) + +Note that output produced by print() is B<never> affected by the +current locale: it is independent of whether C<use locale> or C<no +locale> is in effect, and corresponds to what you'd get from printf() +in the "C" locale. The same is true for Perl's internal conversions +between numeric and string formats: + + use POSIX qw(strtod); + use locale; + + $n = 5/2; # Assign numeric 2.5 to $n + + $a = " $n"; # Locale-independent conversion to string + + print "half five is $n\n"; # Locale-independent output + + printf "half five is %g\n", $n; # Locale-dependent output + + print "DECIMAL POINT IS COMMA\n" + if $n == (strtod("2,5"))[0]; # Locale-dependent conversion + +=head2 Category LC_MONETARY: Formatting of monetary amounts + +The C standard defines the C<LC_MONETARY> category, but no function that +is affected by its contents. (Those with experience of standards +committees will recognize that the working group decided to punt on the +issue.) Consequently, Perl takes no notice of it. If you really want +to use C<LC_MONETARY>, you can query its contents - see L<The localeconv +function> - and use the information that it returns in your +application's own formatting of currency amounts. However, you may well +find that the information, though voluminous and complex, does not quite +meet your requirements: currency formatting is a hard nut to crack. + +=head2 LC_TIME + +The output produced by POSIX::strftime(), which builds a formatted +human-readable date/time string, is affected by the current C<LC_TIME> +locale. Thus, in a French locale, the output produced by the C<%B> +format element (full month name) for the first month of the year would +be "janvier". Here's how to get a list of the long month names in the +current locale: + + use POSIX qw(strftime); + for (0..11) { + $long_month_name[$_] = + strftime("%B", 0, 0, 0, 1, $_, 96); + } + +Note: C<use locale> isn't needed in this example: as a function which +exists only to generate locale-dependent results, strftime() always +obeys the current C<LC_TIME> locale. + +=head2 Other categories + +The remaining locale category, C<LC_MESSAGES> (possibly supplemented by +others in particular implementations) is not currently used by Perl - +except possibly to affect the behavior of library functions called by +extensions which are not part of the standard Perl distribution. + +=head1 SECURITY + +While the main discussion of Perl security issues can be found in +L<perlsec>, a discussion of Perl's locale handling would be incomplete +if it did not draw your attention to locale-dependent security issues. +Locales - particularly on systems which allow unprivileged users to +build their own locales - are untrustworthy. A malicious (or just plain +broken) locale can make a locale-aware application give unexpected +results. Here are a few possibilities: + +=over 4 + +=item * + +Regular expression checks for safe file names or mail addresses using +C<\w> may be spoofed by an C<LC_CTYPE> locale which claims that +characters such as "E<gt>" and "|" are alphanumeric. + +=item * + +String interpolation with case-mapping, as in, say, C<$dest = +"C:\U$name.$ext">, may produce dangerous results if a bogus LC_CTYPE +case-mapping table is in effect. + +=item * + +If the decimal point character in the C<LC_NUMERIC> locale is +surreptitiously changed from a dot to a comma, C<sprintf("%g", +0.123456e3)> produces a string result of "123,456". Many people would +interpret this as one hundred and twenty-three thousand, four hundred +and fifty-six. + +=item * + +A sneaky C<LC_COLLATE> locale could result in the names of students with +"D" grades appearing ahead of those with "A"s. + +=item * + +An application which takes the trouble to use the information in +C<LC_MONETARY> may format debits as if they were credits and vice versa +if that locale has been subverted. Or it make may make payments in US +dollars instead of Hong Kong dollars. + +=item * + +The date and day names in dates formatted by strftime() could be +manipulated to advantage by a malicious user able to subvert the +C<LC_DATE> locale. ("Look - it says I wasn't in the building on +Sunday.") + +=back + +Such dangers are not peculiar to the locale system: any aspect of an +application's environment which may maliciously be modified presents +similar challenges. Similarly, they are not specific to Perl: any +programming language which allows you to write programs which take +account of their environment exposes you to these issues. + +Perl cannot protect you from all of the possibilities shown in the +examples - there is no substitute for your own vigilance - but, when +C<use locale> is in effect, Perl uses the tainting mechanism (see +L<perlsec>) to mark string results which become locale-dependent, and +which may be untrustworthy in consequence. Here is a summary of the +tainting behavior of operators and functions which may be affected by +the locale: + +=over 4 + +=item B<Comparison operators> (C<lt>, C<le>, C<ge>, C<gt> and C<cmp>): + +Scalar true/false (or less/equal/greater) result is never tainted. + +=item B<Case-mapping interpolation> (with C<\l>, C<\L>, C<\u> or <\U>) + +Result string containing interpolated material is tainted if +C<use locale> is in effect. + +=item B<Matching operator> (C<m//>): + +Scalar true/false result never tainted. + +Subpatterns, either delivered as an array-context result, or as $1 etc. +are tainted if C<use locale> is in effect, and the subpattern regular +expression contains C<\w> (to match an alphanumeric character), C<\W> +(non-alphanumeric character), C<\s> (white-space character), or C<\S> +(non white-space character). The matched pattern variable, $&, $` +(pre-match), $' (post-match), and $+ (last match) are also tainted if +C<use locale> is in effect and the regular expression contains C<\w>, +C<\W>, C<\s>, or C<\S>. + +=item B<Substitution operator> (C<s///>): + +Has the same behavior as the match operator. Also, the left +operand of C<=~> becomes tainted when C<use locale> in effect, +if it is modified as a result of a substitution based on a regular +expression match involving C<\w>, C<\W>, C<\s>, or C<\S>; or of +case-mapping with C<\l>, C<\L>,C<\u> or <\U>. + +=item B<In-memory formatting function> (sprintf()): + +Result is tainted if "use locale" is in effect. + +=item B<Output formatting functions> (printf() and write()): + +Success/failure result is never tainted. + +=item B<Case-mapping functions> (lc(), lcfirst(), uc(), ucfirst()): + +Results are tainted if C<use locale> is in effect. + +=item B<POSIX locale-dependent functions> (localeconv(), strcoll(), +strftime(), strxfrm()): + +Results are never tainted. + +=item B<POSIX character class tests> (isalnum(), isalpha(), isdigit(), +isgraph(), islower(), isprint(), ispunct(), isspace(), isupper(), +isxdigit()): + +True/false results are never tainted. + +=back + +Three examples illustrate locale-dependent tainting. +The first program, which ignores its locale, won't run: a value taken +directly from the command line may not be used to name an output file +when taint checks are enabled. + + #/usr/local/bin/perl -T + # Run with taint checking + + # Command line sanity check omitted... + $tainted_output_file = shift; + + open(F, ">$tainted_output_file") + or warn "Open of $untainted_output_file failed: $!\n"; + +The program can be made to run by "laundering" the tainted value through +a regular expression: the second example - which still ignores locale +information - runs, creating the file named on its command line +if it can. + + #/usr/local/bin/perl -T + + $tainted_output_file = shift; + $tainted_output_file =~ m%[\w/]+%; + $untainted_output_file = $&; + + open(F, ">$untainted_output_file") + or warn "Open of $untainted_output_file failed: $!\n"; + +Compare this with a very similar program which is locale-aware: + + #/usr/local/bin/perl -T + + $tainted_output_file = shift; + use locale; + $tainted_output_file =~ m%[\w/]+%; + $localized_output_file = $&; + + open(F, ">$localized_output_file") + or warn "Open of $localized_output_file failed: $!\n"; + +This third program fails to run because $& is tainted: it is the result +of a match involving C<\w> when C<use locale> is in effect. + +=head1 ENVIRONMENT + +=over 12 + +=item PERL_BADLANG + +A string that can suppress Perl's warning about failed locale settings +at startup. Failure can occur if the locale support in the operating +system is lacking (broken) is some way - or if you mistyped the name of +a locale when you set up your environment. If this environment variable +is absent, or has a value which does not evaluate to integer zero - that +is "0" or "" - Perl will complain about locale setting failures. + +B<NOTE>: PERL_BADLANG only gives you a way to hide the warning message. +The message tells about some problem in your system's locale support, +and you should investigate what the problem is. + +=back + +The following environment variables are not specific to Perl: They are +part of the standardized (ISO C, XPG4, POSIX 1.c) setlocale() method +for controlling an application's opinion on data. + +=over 12 + +=item LC_ALL + +C<LC_ALL> is the "override-all" locale environment variable. If it is +set, it overrides all the rest of the locale environment variables. + +=item LC_CTYPE + +In the absence of C<LC_ALL>, C<LC_CTYPE> chooses the character type +locale. In the absence of both C<LC_ALL> and C<LC_CTYPE>, C<LANG> +chooses the character type locale. + +=item LC_COLLATE + +In the absence of C<LC_ALL>, C<LC_COLLATE> chooses the collation +(sorting) locale. In the absence of both C<LC_ALL> and C<LC_COLLATE>, +C<LANG> chooses the collation locale. + +=item LC_MONETARY + +In the absence of C<LC_ALL>, C<LC_MONETARY> chooses the monetary +formatting locale. In the absence of both C<LC_ALL> and C<LC_MONETARY>, +C<LANG> chooses the monetary formatting locale. + +=item LC_NUMERIC + +In the absence of C<LC_ALL>, C<LC_NUMERIC> chooses the numeric format +locale. In the absence of both C<LC_ALL> and C<LC_NUMERIC>, C<LANG> +chooses the numeric format. + +=item LC_TIME + +In the absence of C<LC_ALL>, C<LC_TIME> chooses the date and time +formatting locale. In the absence of both C<LC_ALL> and C<LC_TIME>, +C<LANG> chooses the date and time formatting locale. + +=item LANG + +C<LANG> is the "catch-all" locale environment variable. If it is set, it +is used as the last resort after the overall C<LC_ALL> and the +category-specific C<LC_...>. + +=back + +=head1 NOTES + +=head2 Backward compatibility + +Versions of Perl prior to 5.004 B<mostly> ignored locale information, +generally behaving as if something similar to the C<"C"> locale (see +L<The setlocale function>) was always in force, even if the program +environment suggested otherwise. By default, Perl still behaves this +way so as to maintain backward compatibility. If you want a Perl +application to pay attention to locale information, you B<must> use +the S<C<use locale>> pragma (see L<The use locale Pragma>) to +instruct it to do so. + +Versions of Perl from 5.002 to 5.003 did use the C<LC_CTYPE> +information if that was available, that is, C<\w> did understand what +are the letters according to the locale environment variables. +The problem was that the user had no control over the feature: +if the C library supported locales, Perl used them. + +=head2 I18N:Collate obsolete + +In versions of Perl prior to 5.004 per-locale collation was possible +using the C<I18N::Collate> library module. This module is now mildly +obsolete and should be avoided in new applications. The C<LC_COLLATE> +functionality is now integrated into the Perl core language: One can +use locale-specific scalar data completely normally with C<use locale>, +so there is no longer any need to juggle with the scalar references of +C<I18N::Collate>. + +=head2 Sort speed and memory use impacts + +Comparing and sorting by locale is usually slower than the default +sorting; slow-downs of two to four times have been observed. It will +also consume more memory: once a Perl scalar variable has participated +in any string comparison or sorting operation obeying the locale +collation rules, it will take 3-15 times more memory than before. (The +exact multiplier depends on the string's contents, the operating system +and the locale.) These downsides are dictated more by the operating +system's implementation of the locale system than by Perl. + +=head2 write() and LC_NUMERIC + +Formats are the only part of Perl which unconditionally use information +from a program's locale; if a program's environment specifies an +LC_NUMERIC locale, it is always used to specify the decimal point +character in formatted output. Formatted output cannot be controlled by +C<use locale> because the pragma is tied to the block structure of the +program, and, for historical reasons, formats exist outside that block +structure. + +=head2 Freely available locale definitions + +There is a large collection of locale definitions at +C<ftp://dkuug.dk/i18n/WG15-collection>. You should be aware that it is +unsupported, and is not claimed to be fit for any purpose. If your +system allows the installation of arbitrary locales, you may find the +definitions useful as they are, or as a basis for the development of +your own locales. + +=head2 I18n and l10n + +"Internationalization" is often abbreviated as B<i18n> because its first +and last letters are separated by eighteen others. (You may guess why +the internalin ... internaliti ... i18n tends to get abbreviated.) In +the same way, "localization" is often abbreviated to B<l10n>. + +=head2 An imperfect standard + +Internationalization, as defined in the C and POSIX standards, can be +criticized as incomplete, ungainly, and having too large a granularity. +(Locales apply to a whole process, when it would arguably be more useful +to have them apply to a single thread, window group, or whatever.) They +also have a tendency, like standards groups, to divide the world into +nations, when we all know that the world can equally well be divided +into bankers, bikers, gamers, and so on. But, for now, it's the only +standard we've got. This may be construed as a bug. + +=head1 BUGS + +=head2 Broken systems + +In certain system environments the operating system's locale support +is broken and cannot be fixed or used by Perl. Such deficiencies can +and will result in mysterious hangs and/or Perl core dumps when the +C<use locale> is in effect. When confronted with such a system, +please report in excruciating detail to <F<perlbug@perl.com>>, and +complain to your vendor: maybe some bug fixes exist for these problems +in your operating system. Sometimes such bug fixes are called an +operating system upgrade. + +=head1 SEE ALSO + +L<POSIX (3)/isalnum>, L<POSIX (3)/isalpha>, L<POSIX (3)/isdigit>, +L<POSIX (3)/isgraph>, L<POSIX (3)/islower>, L<POSIX (3)/isprint>, +L<POSIX (3)/ispunct>, L<POSIX (3)/isspace>, L<POSIX (3)/isupper>, +L<POSIX (3)/isxdigit>, L<POSIX (3)/localeconv>, L<POSIX (3)/setlocale>, +L<POSIX (3)/strcoll>, L<POSIX (3)/strftime>, L<POSIX (3)/strtod>, +L<POSIX (3)/strxfrm> + +=head1 HISTORY + +Jarkko Hietaniemi's original F<perli18n.pod> heavily hacked by Dominic +Dunlop, assisted by the perl5-porters. + +Last update: Wed Jan 22 11:04:58 EST 1997 diff --git a/pod/perllol.pod b/pod/perllol.pod index 11632e0c97..ac36364ae0 100644 --- a/pod/perllol.pod +++ b/pod/perllol.pod @@ -12,11 +12,11 @@ that applies here will also be applicable later on with the fancier data structures. A list of lists, or an array of an array if you would, is just a regular -old array @LoL that you can get at with two subscripts, like $LoL[3][2]. Here's +old array @LoL that you can get at with two subscripts, like C<$LoL[3][2]>. Here's a declaration of the array: # assign to our array a list of list references - @LoL = ( + @LoL = ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], [ "homer", "marge", "bart" ], @@ -27,7 +27,7 @@ a declaration of the array: Now you should be very careful that the outer bracket type is a round one, that is, parentheses. That's because you're assigning to -an @list, so you need parens. If you wanted there I<not> to be an @LoL, +an @list, so you need parentheses. If you wanted there I<not> to be an @LoL, but rather just a reference to it, you could do something more like this: # assign a reference to list of list references @@ -39,10 +39,10 @@ but rather just a reference to it, you could do something more like this: print $ref_to_LoL->[2][2]; -Notice that the outer bracket type has changed, and so our access syntax +Notice that the outer bracket type has changed, and so our access syntax has also changed. That's because unlike C, in perl you can't freely -interchange arrays and references thereto. $ref_to_LoL is a reference to an -array, whereas @LoL is an array proper. Likewise, $LoL[2] is not an +interchange arrays and references thereto. $ref_to_LoL is a reference to an +array, whereas @LoL is an array proper. Likewise, C<$LoL[2]> is not an array, but an array ref. So how come you can write these: $LoL[2][2] @@ -54,8 +54,8 @@ instead of having to write these: $ref_to_LoL->[2]->[2] Well, that's because the rule is that on adjacent brackets only (whether -square or curly), you are free to omit the pointer dereferencing array. -But you need not do so for the very first one if it's a scalar containing +square or curly), you are free to omit the pointer dereferencing arrow. +But you cannot do so for the very first one if it's a scalar containing a reference, which means that $ref_to_LoL always needs it. =head1 Growing Your Own @@ -72,7 +72,7 @@ each line is a row and each word an element. If you're trying to develop an while (<>) { @tmp = split; push @LoL, [ @tmp ]; - } + } You might also have loaded that from a function: @@ -81,7 +81,7 @@ You might also have loaded that from a function: } Or you might have had a temporary variable sitting around with the -list in it. +list in it. for $i ( 1 .. 10 ) { @tmp = somefunc($i); @@ -93,8 +93,8 @@ constructor. That's because this will be very wrong: $LoL[$i] = @tmp; -You see, assigning a named list like that to a scalar just counts the -number of elements in @tmp, which probably isn't what you want. +You see, assigning a named list like that to a scalar just counts the +number of elements in @tmp, which probably isn't what you want. If you are running under C<use strict>, you'll have to add some declarations to make it happy: @@ -104,58 +104,58 @@ declarations to make it happy: while (<>) { @tmp = split; push @LoL, [ @tmp ]; - } + } Of course, you don't need the temporary array to have a name at all: while (<>) { push @LoL, [ split ]; - } + } You also don't have to use push(). You could just make a direct assignment if you knew where you wanted to put it: my (@LoL, $i, $line); - for $i ( 0 .. 10 ) + for $i ( 0 .. 10 ) { $line = <>; $LoL[$i] = [ split ' ', $line ]; - } + } or even just my (@LoL, $i); - for $i ( 0 .. 10 ) + for $i ( 0 .. 10 ) { $LoL[$i] = [ split ' ', <> ]; - } + } -You should in general be leary of using potential list functions -in a scalar context without explicitly stating such. +You should in general be leery of using potential list functions +in a scalar context without explicitly stating such. This would be clearer to the casual reader: my (@LoL, $i); - for $i ( 0 .. 10 ) + for $i ( 0 .. 10 ) { $LoL[$i] = [ split ' ', scalar(<>) ]; - } + } If you wanted to have a $ref_to_LoL variable as a reference to an array, you'd have to do something like this: while (<>) { push @$ref_to_LoL, [ split ]; - } + } -Actually, if you were using strict, you'd not only have to declare $ref_to_LoL as -you had to declare @LoL, but you'd I<also> having to initialize it to a -reference to an empty list. (This was a bug in 5.001m that's been fixed -for the 5.002 release.) +Actually, if you were using strict, you'd have to declare not only +$ref_to_LoL as you had to declare @LoL, but you'd I<also> having to +initialize it to a reference to an empty list. (This was a bug in +perl version 5.001m that's been fixed for the 5.002 release.) my $ref_to_LoL = []; while (<>) { push @$ref_to_LoL, [ split ]; - } + } Ok, now you can add new rows. What about adding new columns? If you're -just dealing with matrices, it's often easiest to use simple assignment: +dealing with just matrices, it's often easiest to use simple assignment: for $x (1 .. 10) { for $y (1 .. 10) { @@ -165,19 +165,19 @@ just dealing with matrices, it's often easiest to use simple assignment: for $x ( 3, 7, 9 ) { $LoL[$x][20] += func2($x); - } + } -It doesn't matter whether those elements are already +It doesn't matter whether those elements are already there or not: it'll gladly create them for you, setting intervening elements to C<undef> as need be. -If you just wanted to append to a row, you'd have +If you wanted just to append to a row, you'd have to do something a bit funnier looking: # add new columns to an existing row push @{ $LoL[0] }, "wilma", "betty"; -Notice that I I<couldn't> just say: +Notice that I I<couldn't> say just: push $LoL[0], "wilma", "betty"; # WRONG! @@ -186,22 +186,22 @@ to push() must be a real array, not just a reference to such. =head1 Access and Printing -Now it's time to print your data structure out. How -are you going to do that? Well, if you only want one +Now it's time to print your data structure out. How +are you going to do that? Well, if you want only one of the elements, it's trivial: print $LoL[0][0]; If you want to print the whole thing, though, you can't -just say +say print @LoL; # WRONG -because you'll just get references listed, and perl will never -automatically dereference things for you. Instead, you have to +because you'll get just references listed, and perl will never +automatically dereference things for you. Instead, you have to roll yourself a loop or two. This prints the whole structure, using the shell-style for() construct to loop across the outer -set of subscripts. +set of subscripts. for $aref ( @LoL ) { print "\t [ @$aref ],\n"; @@ -221,7 +221,7 @@ or maybe even this. Notice the inner loop. } } -As you can see, it's getting a bit complicated. That's why +As you can see, it's getting a bit complicated. That's why sometimes is easier to take a temporary on your way through: for $i ( 0 .. $#LoL ) { @@ -231,7 +231,7 @@ sometimes is easier to take a temporary on your way through: } } -Hm... that's still a bit ugly. How about this: +Hmm... that's still a bit ugly. How about this: for $i ( 0 .. $#LoL ) { $aref = $LoL[$i]; @@ -243,7 +243,7 @@ Hm... that's still a bit ugly. How about this: =head1 Slices -If you want to get at a slide (part of a row) in a multidimensional +If you want to get at a slice (part of a row) in a multidimensional array, you're going to have to do some fancy subscripting. That's because while we have a nice synonym for single elements via the pointer arrow for dereferencing, no such convenience exists for slices. @@ -254,10 +254,10 @@ Here's how to do one operation using a loop. We'll assume an @LoL variable as before. @part = (); - $x = 4; + $x = 4; for ($y = 7; $y < 13; $y++) { push @part, $LoL[$x][$y]; - } + } That same loop could be replaced with a slice operation: @@ -266,16 +266,16 @@ That same loop could be replaced with a slice operation: but as you might well imagine, this is pretty rough on the reader. Ah, but what if you wanted a I<two-dimensional slice>, such as having -$x run from 4..8 and $y run from 7 to 12? Hm... here's the simple way: +$x run from 4..8 and $y run from 7 to 12? Hmm... here's the simple way: @newLoL = (); for ($startx = $x = 4; $x <= 8; $x++) { for ($starty = $y = 7; $x <= 12; $y++) { $newLoL[$x - $startx][$y - $starty] = $LoL[$x][$y]; } - } + } -We can reduce some of the looping through slices +We can reduce some of the looping through slices for ($x = 4; $x <= 8; $x++) { push @newLoL, [ @{ $LoL[$x] } [ 7..12 ] ]; @@ -293,13 +293,13 @@ If I were you, I'd put that in a function: @newLoL = splice_2D( \@LoL, 4 => 8, 7 => 12 ); sub splice_2D { my $lrr = shift; # ref to list of list refs! - my ($x_lo, $x_hi, + my ($x_lo, $x_hi, $y_lo, $y_hi) = @_; - return map { - [ @{ $lrr->[$_] } [ $y_lo .. $y_hi ] ] + return map { + [ @{ $lrr->[$_] } [ $y_lo .. $y_hi ] ] } $x_lo .. $x_hi; - } + } =head1 SEE ALSO @@ -308,6 +308,6 @@ perldata(1), perlref(1), perldsc(1) =head1 AUTHOR -Tom Christiansen <tchrist@perl.com> +Tom Christiansen <F<tchrist@perl.com>> Last udpate: Sat Oct 7 19:35:26 MDT 1995 diff --git a/pod/perlmod.pod b/pod/perlmod.pod index 80a4036246..4d0ad2d449 100644 --- a/pod/perlmod.pod +++ b/pod/perlmod.pod @@ -1,28 +1,29 @@ =head1 NAME -perlmod - Perl modules (packages) +perlmod - Perl modules (packages and symbol tables) =head1 DESCRIPTION =head2 Packages Perl provides a mechanism for alternative namespaces to protect packages -from stomping on each others variables. In fact, apart from certain -magical variables, there's really no such thing as a global variable in -Perl. The package statement declares the compilation unit as being in the -given namespace. The scope of the package declaration is from the -declaration itself through the end of the enclosing block (the same scope -as the local() operator). All further unqualified dynamic identifiers -will be in this namespace. A package statement only affects dynamic -variables--including those you've used local() on--but I<not> lexical -variables created with my(). Typically it would be the first declaration -in a file to be included by the C<require> or C<use> operator. You can -switch into a package in more than one place; it merely influences which -symbol table is used by the compiler for the rest of that block. You can -refer to variables and filehandles in other packages by prefixing the -identifier with the package name and a double colon: -C<$Package::Variable>. If the package name is null, the C<main> package -as assumed. That is, C<$::sail> is equivalent to C<$main::sail>. +from stomping on each other's variables. In fact, apart from certain +magical variables, there's really no such thing as a global variable +in Perl. The package statement declares the compilation unit as +being in the given namespace. The scope of the package declaration +is from the declaration itself through the end of the enclosing block, +C<eval>, C<sub>, or end of file, whichever comes first (the same scope +as the my() and local() operators). All further unqualified dynamic +identifiers will be in this namespace. A package statement affects +only dynamic variables--including those you've used local() on--but +I<not> lexical variables created with my(). Typically it would be +the first declaration in a file to be included by the C<require> or +C<use> operator. You can switch into a package in more than one place; +it influences merely which symbol table is used by the compiler for the +rest of that block. You can refer to variables and filehandles in other +packages by prefixing the identifier with the package name and a double +colon: C<$Package::Variable>. If the package name is null, the C<main> +package is assumed. That is, C<$::sail> is equivalent to C<$main::sail>. (The old package delimiter was a single quote, but double colon is now the preferred delimiter, in part because it's more readable @@ -39,10 +40,10 @@ It would treat package C<INNER> as a totally separate global package. Only identifiers starting with letters (or underscore) are stored in a package's symbol table. All other symbols are kept in package C<main>, including all of the punctuation variables like $_. In addition, the -identifiers STDIN, STDOUT, STDERR, ARGV, ARGVOUT, ENV, INC and SIG are +identifiers STDIN, STDOUT, STDERR, ARGV, ARGVOUT, ENV, INC, and SIG are forced to be in package C<main>, even when used for other purposes than -their built-in one. Note also that, if you have a package called C<m>, -C<s> or C<y>, then you can't use the qualified form of an identifier +their builtin one. Note also that, if you have a package called C<m>, +C<s>, or C<y>, then you can't use the qualified form of an identifier because it will be interpreted instead as a pattern match, a substitution, or a translation. @@ -62,23 +63,26 @@ temporarily switches back to the C<main> package to evaluate various expressions in the context of the C<main> package (or wherever you came from). See L<perldebug>. -See L<perlsub> for other scoping issues related to my() and local(), -or L<perlref> regarding closures. +The special symbol C<__PACKAGE__> contains the current package, but cannot +(easily) be used to construct variables. + +See L<perlsub> for other scoping issues related to my() and local(), +and L<perlref> regarding closures. =head2 Symbol Tables -The symbol table for a package happens to be stored in the associative -array of that name appended with two colons. The main symbol table's -name is thus C<%main::>, or C<%::> for short. Likewise the nested package -mentioned earlier is named C<%OUTER::INNER::>. +The symbol table for a package happens to be stored in the hash of that +name with two colons appended. The main symbol table's name is thus +C<%main::>, or C<%::> for short. Likewise symbol table for the nested +package mentioned earlier is named C<%OUTER::INNER::>. -The value in each entry of the associative array is what you are referring -to when you use the C<*name> typeglob notation. In fact, the following -have the same effect, though the first is more efficient because it does -the symbol table lookups at compile time: +The value in each entry of the hash is what you are referring to when you +use the C<*name> typeglob notation. In fact, the following have the same +effect, though the first is more efficient because it does the symbol +table lookups at compile time: - local(*main::foo) = *main::bar; local($main::{'foo'}) = - $main::{'bar'}; + local *main::foo = *main::bar; + local $main::{foo} = $main::{bar}; You can use this to print out all the variables in a package, for instance. Here is F<dumpvar.pl> from the Perl library: @@ -112,16 +116,19 @@ instance. Here is F<dumpvar.pl> from the Perl library: } Note that even though the subroutine is compiled in package C<dumpvar>, -the name of the subroutine is qualified so that its name is inserted -into package C<main>. +the name of the subroutine is qualified so that its name is inserted into +package C<main>. While popular many years ago, this is now considered +very poor style; in general, you should be writing modules and using the +normal export mechanism instead of hammering someone else's namespace, +even main's. Assignment to a typeglob performs an aliasing operation, i.e., *dick = *richard; -causes variables, subroutines and file handles accessible via the -identifier C<richard> to also be accessible via the symbol C<dick>. If -you only want to alias a particular variable or subroutine, you can +causes variables, subroutines, and file handles accessible via the +identifier C<richard> to also be accessible via the identifier C<dick>. If +you want to alias only a particular variable or subroutine, you can assign a reference instead: *dick = \$richard; @@ -140,12 +147,12 @@ thing. # now use %hashsym normally, and you # will affect the caller's %another_hash my %nhash = (); # do what you want - return \%nhash; + return \%nhash; } -On return, the reference wil overwrite the hash slot in the +On return, the reference will overwrite the hash slot in the symbol table specified by the *some_hash typeglob. This -is a somewhat tricky way of passing around refernces cheaply +is a somewhat tricky way of passing around references cheaply when you won't want to have to remember to dereference variables explicitly. @@ -154,6 +161,29 @@ Another use of symbol tables is for making "constant" scalars. *PI = \3.14159265358979; Now you cannot alter $PI, which is probably a good thing all in all. +This isn't the same as a constant subroutine (one prototyped to +take no arguments and to return a constant expression), which is +subject to optimization at compile-time. This isn't. See L<perlsub> +for details on these. + +You can say C<*foo{PACKAGE}> and C<*foo{NAME}> to find out what name and +package the *foo symbol table entry comes from. This may be useful +in a subroutine which is passed typeglobs as arguments + + sub identify_typeglob { + my $glob = shift; + print 'You gave me ', *{$glob}{PACKAGE}, '::', *{$glob}{NAME}, "\n"; + } + identify_typeglob *foo; + identify_typeglob *bar::baz; + +This prints + + You gave me main::foo + You gave me bar::baz + +The *foo{THING} notation can also be used to obtain references to the +individual elements of *foo, see L<perlref>. =head2 Package Constructors and Destructors @@ -161,13 +191,14 @@ There are two special subroutine definitions that function as package constructors and destructors. These are the C<BEGIN> and C<END> routines. The C<sub> is optional for these routines. -A C<BEGIN> subroutine is executed as soon as possible, that is, the -moment it is completely defined, even before the rest of the containing -file is parsed. You may have multiple C<BEGIN> blocks within a -file--they will execute in order of definition. Because a C<BEGIN> -block executes immediately, it can pull in definitions of subroutines -and such from other files in time to be visible to the rest of the -file. +A C<BEGIN> subroutine is executed as soon as possible, that is, the moment +it is completely defined, even before the rest of the containing file +is parsed. You may have multiple C<BEGIN> blocks within a file--they +will execute in order of definition. Because a C<BEGIN> block executes +immediately, it can pull in definitions of subroutines and such from other +files in time to be visible to the rest of the file. Once a C<BEGIN> +has run, it is immediately undefined and any code it used is returned to +Perl's memory pool. This means you can't ever explicitly call a C<BEGIN>. An C<END> subroutine is executed as late as possible, that is, when the interpreter is being exited, even if it is exiting as a result of a @@ -176,6 +207,11 @@ signal--you have to trap that yourself (if you can).) You may have multiple C<END> blocks within a file--they will execute in reverse order of definition; that is: last in, first out (LIFO). +Inside an C<END> subroutine C<$?> contains the value that the script is +going to pass to C<exit()>. You can modify C<$?> to change the exit +value of the script. Beware of changing C<$?> by accident (e.g. by +running something via C<system>). + Note that when you use the B<-n> and B<-p> switches to Perl, C<BEGIN> and C<END> work just as they do in B<awk>, as a degenerate case. @@ -184,9 +220,9 @@ and C<END> work just as they do in B<awk>, as a degenerate case. There is no special class syntax in Perl, but a package may function as a class if it provides subroutines that function as methods. Such a package may also derive some of its methods from another class package -by listing the other package name in its @ISA array. +by listing the other package name in its @ISA array. -For more on this, see L<perlobj>. +For more on this, see L<perltoot> and L<perlobj>. =head2 Perl Modules @@ -198,18 +234,70 @@ definition and make its semantics available implicitly through method calls on the class and its objects, without explicit exportation of any symbols. Or it can do a little of both. -For example, to start a normal module called Fred, create -a file called Fred.pm and put this at the start of it: +For example, to start a normal module called Some::Module, create +a file called Some/Module.pm and start with this template: + + package Some::Module; # assumes Some/Module.pm + + use strict; + + BEGIN { + use Exporter (); + use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + + # set the version for version checking + $VERSION = 1.00; + # if using RCS/CVS, this may be preferred + $VERSION = do { my @r = (q$Revision: 2.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker + + @ISA = qw(Exporter); + @EXPORT = qw(&func1 &func2 &func4); + %EXPORT_TAGS = ( ); # eg: TAG => [ qw!name1 name2! ], + + # your exported package globals go here, + # as well as any optionally exported functions + @EXPORT_OK = qw($Var1 %Hashit &func3); + } + use vars @EXPORT_OK; + + # non-exported package globals go here + use vars qw(@more $stuff); + + # initalize package globals, first exported ones + $Var1 = ''; + %Hashit = (); + + # then the others (which are still accessible as $Some::Module::stuff) + $stuff = ''; + @more = (); + + # all file-scoped lexicals must be created before + # the functions below that use them. + + # file-private lexicals go here + my $priv_var = ''; + my %secret_hash = (); + + # here's a file-private function as a closure, + # callable as &$priv_func; it cannot be prototyped. + my $priv_func = sub { + # stuff goes here. + }; - package Fred; - require Exporter; - @ISA = qw(Exporter); - @EXPORT = qw(func1 func2); - @EXPORT_OK = qw($sally @listabob %harry func3); + # make all your functions, whether exported or not; + # remember to put something interesting in the {} stubs + sub func1 {} # no prototype + sub func2() {} # proto'd void + sub func3($$) {} # proto'd to 2 scalars + + # this one isn't exported, but could be called! + sub func4(\%) {} # proto'd to 1 hash ref + + END { } # module clean-up code here (global destructor) Then go on to declare and use your variables in functions without any qualifications. -See L<Exporter> and the I<Perl Modules File> for details on +See L<Exporter> and the L<perlmodlib> for details on mechanics and style issues in module creation. Perl modules are included into your program by saying @@ -252,9 +340,9 @@ the rest of the current file. This will not work if you use C<require> instead of C<use>. With require you can get into this problem: require Cwd; # make Cwd:: accessible - $here = Cwd::getcwd(); + $here = Cwd::getcwd(); - use Cwd; # import names from Cwd:: + use Cwd; # import names from Cwd:: $here = getcwd(); require Cwd; # make Cwd:: accessible @@ -275,795 +363,16 @@ the module. If so, these will be entirely transparent to the user of the module. It is the responsibility of the F<.pm> file to load (or arrange to autoload) any additional functionality. The POSIX module happens to do both dynamic loading and autoloading, but the user can -just say C<use POSIX> to get it all. +say just C<use POSIX> to get it all. -For more information on writing extension modules, see L<perlxs> +For more information on writing extension modules, see L<perlxstut> and L<perlguts>. -=head1 NOTE - -Perl does not enforce private and public parts of its modules as you may -have been used to in other languages like C++, Ada, or Modula-17. Perl -doesn't have an infatuation with enforced privacy. It would prefer -that you stayed out of its living room because you weren't invited, not -because it has a shotgun. - -The module and its user have a contract, part of which is common law, -and part of which is "written". Part of the common law contract is -that a module doesn't pollute any namespace it wasn't asked to. The -written contract for the module (AKA documentation) may make other -provisions. But then you know when you C<use RedefineTheWorld> that -you're redefining the world and willing to take the consequences. - -=head1 THE PERL MODULE LIBRARY - -A number of modules are included the the Perl distribution. These are -described below, and all end in F<.pm>. You may also discover files in -the library directory that end in either F<.pl> or F<.ph>. These are old -libraries supplied so that old programs that use them still run. The -F<.pl> files will all eventually be converted into standard modules, and -the F<.ph> files made by B<h2ph> will probably end up as extension modules -made by B<h2xs>. (Some F<.ph> values may already be available through the -POSIX module.) The B<pl2pm> file in the distribution may help in your -conversion, but it's just a mechanical process, so is far from bullet proof. - -=head2 Pragmatic Modules - -They work somewhat like pragmas in that they tend to affect the compilation of -your program, and thus will usually only work well when used within a -C<use>, or C<no>. These are locally scoped, so an inner BLOCK -may countermand any of these by saying - - no integer; - no strict 'refs'; - -which lasts until the end of that BLOCK. - -The following programs are defined (and have their own documentation). - -=over 12 - -=item diagnostics - -Pragma to produce enhanced diagnostics - -=item integer - -Pragma to compute arithmetic in integer instead of double - -=item less - -Pragma to request less of something from the compiler - -=item overload - -Pragma for overloading operators - -=item sigtrap - -Pragma to enable stack backtrace on unexpected signals - -=item strict - -Pragma to restrict unsafe constructs - -=item subs - -Pragma to predeclare sub names - -=back - -=head2 Standard Modules - -Standard, bundled modules are all expected to behave in a well-defined -manner with respect to namespace pollution because they use the -Exporter module. See their own documentation for details. - -=over 12 - -=item AnyDBM_File - -provide framework for multiple DBMs - -=item AutoLoader - -load functions only on demand - -=item AutoSplit - -split a package for autoloading - -=item Benchmark - -benchmark running times of code - -=item Carp - -warn of errors (from perspective of caller) - -=item Config - -access Perl configuration option - -=item Cwd - -get pathname of current working directory - -=item DB_File - -Perl access to Berkeley DB - -=item Devel::SelfStubber - -generate stubs for a SelfLoading module - -=item DynaLoader - -Dynamically load C libraries into Perl code - -=item English - -use nice English (or awk) names for ugly punctuation variables - -=item Env - -perl module that imports environment variables - -=item Exporter - -provide inport/export controls for Perl modules - -=item ExtUtils::Liblist - -determine libraries to use and how to use them - -=item ExtUtils::MakeMaker - -create an extension Makefile - -=item ExtUtils::Manifest - -utilities to write and check a MANIFEST file - -=item ExtUtils::Mkbootstrap - -make a bootstrap file for use by DynaLoader - -=item ExtUtils::Miniperl - -!!!GOOD QUESTION!!! - -=item Fcntl - -load the C Fcntl.h defines - -=item File::Basename - -parse file specifications - -=item File::CheckTree - -run many filetest checks on a tree - -=item File::Find - -traverse a file tree - -=item FileHandle - -supply object methods for filehandles - -=item File::Path - -create or remove a series of directories - -=item Getopt::Long - -extended getopt processing - -=item Getopt::Std - -Process single-character switches with switch clustering - -=item I18N::Collate - -compare 8-bit scalar data according to the current locale - -=item IPC::Open2 - -a process for both reading and writing - -=item IPC::Open3 - -open a process for reading, writing, and error handling - -=item Net::Ping - -check a host for upness - -=item POSIX - -Perl interface to IEEE Std 1003.1 - -=item SelfLoader - -load functions only on demand - -=item Safe - -Creation controlled compartments in which perl code can be evaluated. - -=item Socket - -load the C socket.h defines and structure manipulators - -=item Test::Harness - -run perl standard test scripts with statistics - -=item Text::Abbrev - -rceate an abbreviation table from a list - -=back - -To find out I<all> the modules installed on your system, including -those without documentation or outside the standard release, do this: - - find `perl -e 'print "@INC"'` -name '*.pm' -print - -They should all have their own documentation installed and accessible via -your system man(1) command. If that fails, try the I<perldoc> program. - -=head2 Extension Modules - -Extension modules are written in C (or a mix of Perl and C) and get -dynamically loaded into Perl if and when you need them. Supported -extension modules include the Socket, Fcntl, and POSIX modules. - -Many popular C extension modules do not come bundled (at least, not -completely) due to their size, volatility, or simply lack of time for -adequate testing and configuration across the multitude of platforms on -which Perl was beta-tested. You are encouraged to look for them in -archie(1L), the Perl FAQ or Meta-FAQ, the WWW page, and even with their -authors before randomly posting asking for their present condition and -disposition. - -=head1 CPAN - -CPAN stands for the Comprehensive Perl Archive Network. This is a globally -replicated collection of all known Perl materials, including hundreds -of unbunded modules. Here are the major categories of modules: - -=over - -=item * -Language Extensions and Documentation Tools - -=item * -Development Support - -=item * -Operating System Interfaces - -=item * -Networking, Device Control (modems) and InterProcess Communication - -=item * -Data Types and Data Type Utilities - -=item * -Database Interfaces - -=item * -User Interfaces - -=item * -Interfaces to / Emulations of Other Programming Languages - -=item * -File Names, File Systems and File Locking (see also File Handles) - -=item * -String Processing, Language Text Processing, Parsing and Searching - -=item * -Option, Argument, Parameter and Configuration File Processing - -=item * -Internationalization and Locale - -=item * -Authentication, Security and Encryption - -=item * -World Wide Web, HTML, HTTP, CGI, MIME - -=item * -Server and Daemon Utilities - -=item * -Archiving and Compression - -=item * -Images, Pixmap and Bitmap Manipulation, Drawing and Graphing - -=item * -Mail and Usenet News - -=item * -Control Flow Utilities (callbacks and exceptions etc) - -=item * -File Handle and Input/Output Stream Utilities - -=item * -Miscellaneous Modules - -=back - -Some of the reguster CPAN sites as of this writing include the following. -You should try to choose one close to you: - -=over - -=item * -ftp://ftp.sterling.com/programming/languages/perl/ - -=item * -ftp://ftp.sedl.org/pub/mirrors/CPAN/ - -=item * -ftp://ftp.uoknor.edu/mirrors/CPAN/ - -=item * -ftp://ftp.delphi.com/pub/mirrors/packages/perl/CPAN/ - -=item * -ftp://uiarchive.cso.uiuc.edu/pub/lang/perl/CPAN/ - -=item * -ftp://ftp.cis.ufl.edu/pub/perl/CPAN/ - -=item * -ftp://ftp.switch.ch/mirror/CPAN/ - -=item * -ftp://ftp.sunet.se/pub/lang/perl/CPAN/ - -=item * -ftp://ftp.ci.uminho.pt/pub/lang/perl/ - -=item * -ftp://ftp.cs.ruu.nl/pub/PERL/CPAN/ - -=item * -ftp://ftp.demon.co.uk/pub/mirrors/perl/CPAN/ - -=item * -ftp://ftp.rz.ruhr-uni-bochum.de/pub/programming/languages/perl/CPAN/ - -=item * -ftp://ftp.leo.org/pub/comp/programming/languages/perl/CPAN/ - -=item * -ftp://ftp.pasteur.fr/pub/computing/unix/perl/CPAN/ - -=item * -ftp://ftp.ibp.fr/pub/perl/CPAN/ - -=item * -ftp://ftp.funet.fi/pub/languages/perl/CPAN/ - -=item * -ftp://ftp.tekotago.ac.nz/pub/perl/CPAN/ - -=item * -ftp://ftp.mame.mu.oz.au/pub/perl/CPAN/ - -=item * -ftp://coombs.anu.edu.au/pub/perl/ - -=item * -ftp://dongpo.math.ncu.edu.tw/perl/CPAN/ - -=item * -ftp://ftp.lab.kdd.co.jp/lang/perl/CPAN/ - -=item * -ftp://ftp.is.co.za/programming/perl/CPAN/ - -=back - -For an up-to-date listing of CPAN sites, -see http://www.perl.com/perl/ or ftp://ftp.perl.com/perl/ . - -=head1 Modules: Creation, Use and Abuse - -(The following section is borrowed directly from Tim Bunce's modules -file, available at your nearest CPAN site.) - -Perl 5 implements a class using a package, but the presence of a -package doesn't imply the presence of a class. A package is just a -namespace. A class is a package that provides subroutines that can be -used as methods. A method is just a subroutine that expects, as its -first argument, either the name of a package (for "static" methods), -or a reference to something (for "virtual" methods). - -A module is a file that (by convention) provides a class of the same -name (sans the .pm), plus an import method in that class that can be -called to fetch exported symbols. This module may implement some of -its methods by loading dynamic C or C++ objects, but that should be -totally transparent to the user of the module. Likewise, the module -might set up an AUTOLOAD function to slurp in subroutine definitions on -demand, but this is also transparent. Only the .pm file is required to -exist. - -=head2 Guidelines for Module Creation - -=over 4 - -=item Do similar modules already exist in some form? - -If so, please try to reuse the existing modules either in whole or -by inheriting useful features into a new class. If this is not -practical try to get together with the module authors to work on -extending or enhancing the functionality of the existing modules. -A perfect example is the plethora of packages in perl4 for dealing -with command line options. - -If you are writing a module to expand an already existing set of -modules, please coordinate with the author of the package. It -helps if you follow the same naming scheme and module interaction -scheme as the original author. - -=item Try to design the new module to be easy to extend and reuse. - -Use blessed references. Use the two argument form of bless to bless -into the class name given as the first parameter of the constructor, -e.g.: - - sub new { - my $class = shift; - return bless {}, $class; - } - -or even this if you'd like it to be used as either a static -or a virtual method. - - sub new { - my $self = shift; - my $class = ref($self) || $self; - return bless {}, $class; - } - -Pass arrays as references so more parameters can be added later -(it's also faster). Convert functions into methods where -appropriate. Split large methods into smaller more flexible ones. -Inherit methods from other modules if appropriate. - -Avoid class name tests like: die "Invalid" unless ref $ref eq 'FOO'. -Generally you can delete the "eq 'FOO'" part with no harm at all. -Let the objects look after themselves! Generally, avoid hardwired -class names as far as possible. - -Avoid $r-E<gt>Class::func() where using @ISA=qw(... Class ...) and -$r-E<gt>func() would work (see perlbot man page for more details). - -Use autosplit so little used or newly added functions won't be a -burden to programs which don't use them. Add test functions to -the module after __END__ either using AutoSplit or by saying: - - eval join('',<main::DATA>) || die $@ unless caller(); - -Does your module pass the 'empty sub-class' test? If you say -"@SUBCLASS::ISA = qw(YOURCLASS);" your applications should be able -to use SUBCLASS in exactly the same way as YOURCLASS. For example, -does your application still work if you change: $obj = new YOURCLASS; -into: $obj = new SUBCLASS; ? - -Avoid keeping any state information in your packages. It makes it -difficult for multiple other packages to use yours. Keep state -information in objects. - -Always use C<-w>. Try to C<use strict;> (or C<use strict qw(...);>). -Remember that you can add C<no strict qw(...);> to individual blocks -of code which need less strictness. Always use C<-w>. Always use C<-w>! -Follow the guidelines in the perlstyle(1) manual. - -=item Some simple style guidelines - -The perlstyle manual supplied with perl has many helpful points. - -Coding style is a matter of personal taste. Many people evolve their -style over several years as they learn what helps them write and -maintain good code. Here's one set of assorted suggestions that -seem to be widely used by experienced developers: - -Use underscores to separate words. It is generally easier to read -$var_names_like_this than $VarNamesLikeThis, especially for -non-native speakers of English. It's also a simple rule that works -consistently with VAR_NAMES_LIKE_THIS. - -Package/Module names are an exception to this rule. Perl informally -reserves lowercase module names for 'pragma' modules like integer -and strict. Other modules normally begin with a capital letter and -use mixed case with no underscores (need to be short and portable). - -You may find it helpful to use letter case to indicate the scope -or nature of a variable. For example: - - $ALL_CAPS_HERE constants only (beware clashes with perl vars) - $Some_Caps_Here package-wide global/static - $no_caps_here function scope my() or local() variables - -Function and method names seem to work best as all lowercase. -E.g., $obj-E<gt>as_string(). - -You can use a leading underscore to indicate that a variable or -function should not be used outside the package that defined it. - -=item Select what to export. - -Do NOT export method names! - -Do NOT export anything else by default without a good reason! - -Exports pollute the namespace of the module user. If you must -export try to use @EXPORT_OK in preference to @EXPORT and avoid -short or common names to reduce the risk of name clashes. - -Generally anything not exported is still accessible from outside the -module using the ModuleName::item_name (or $blessed_ref-E<gt>method) -syntax. By convention you can use a leading underscore on names to -informally indicate that they are 'internal' and not for public use. - -(It is actually possible to get private functions by saying: -my $subref = sub { ... }; &$subref; But there's no way to call that -directly as a method, since a method must have a name in the symbol -table.) - -As a general rule, if the module is trying to be object oriented -then export nothing. If it's just a collection of functions then -@EXPORT_OK anything but use @EXPORT with caution. - -=item Select a name for the module. - -This name should be as descriptive, accurate and complete as -possible. Avoid any risk of ambiguity. Always try to use two or -more whole words. Generally the name should reflect what is special -about what the module does rather than how it does it. Please use -nested module names to informally group or categorise a module. -A module should have a very good reason not to have a nested name. -Module names should begin with a capital letter. - -Having 57 modules all called Sort will not make life easy for anyone -(though having 23 called Sort::Quick is only marginally better :-). -Imagine someone trying to install your module alongside many others. -If in any doubt ask for suggestions in comp.lang.perl.misc. - -If you are developing a suite of related modules/classes it's good -practice to use nested classes with a common prefix as this will -avoid namespace clashes. For example: Xyz::Control, Xyz::View, -Xyz::Model etc. Use the modules in this list as a naming guide. - -If adding a new module to a set, follow the original author's -standards for naming modules and the interface to methods in -those modules. - -To be portable each component of a module name should be limited to -11 characters. If it might be used on DOS then try to ensure each is -unique in the first 8 characters. Nested modules make this easier. - -=item Have you got it right? - -How do you know that you've made the right decisions? Have you -picked an interface design that will cause problems later? Have -you picked the most appropriate name? Do you have any questions? - -The best way to know for sure, and pick up many helpful suggestions, -is to ask someone who knows. Comp.lang.perl.misc is read by just about -all the people who develop modules and it's the best place to ask. - -All you need to do is post a short summary of the module, its -purpose and interfaces. A few lines on each of the main methods is -probably enough. (If you post the whole module it might be ignored -by busy people - generally the very people you want to read it!) - -Don't worry about posting if you can't say when the module will be -ready - just say so in the message. It might be worth inviting -others to help you, they may be able to complete it for you! - -=item README and other Additional Files. - -It's well known that software developers usually fully document the -software they write. If, however, the world is in urgent need of -your software and there is not enough time to write the full -documentation please at least provide a README file containing: - -=over 10 - -=item * -A description of the module/package/extension etc. - -=item * -A copyright notice - see below. - -=item * -Prerequisites - what else you may need to have. - -=item * -How to build it - possible changes to Makefile.PL etc. - -=item * -How to install it. - -=item * -Recent changes in this release, especially incompatibilities - -=item * -Changes / enhancements you plan to make in the future. - -=back - -If the README file seems to be getting too large you may wish to -split out some of the sections into separate files: INSTALL, -Copying, ToDo etc. - -=item Adding a Copyright Notice. - -How you choose to licence your work is a personal decision. -The general mechanism is to assert your Copyright and then make -a declaration of how others may copy/use/modify your work. - -Perl, for example, is supplied with two types of licence: The GNU -GPL and The Artistic License (see the files README, Copying and -Artistic). Larry has good reasons for NOT just using the GNU GPL. - -My personal recommendation, out of respect for Larry, Perl and the -perl community at large is to simply state something like: - - Copyright (c) 1995 Your Name. All rights reserved. - This program is free software; you can redistribute it and/or - modify it under the same terms as Perl itself. - -This statement should at least appear in the README file. You may -also wish to include it in a Copying file and your source files. -Remember to include the other words in addition to the Copyright. - -=item Give the module a version/issue/release number. - -To be fully compatible with the Exporter and MakeMaker modules you -should store your module's version number in a non-my package -variable called $VERSION. This should be a valid floating point -number with at least two digits after the decimal (ie hundredths, -e.g, $VERSION = "0.01"). Don't use a "1.3.2" style version. -See Exporter.pm in Perl5.001m or later for details. - -It may be handy to add a function or method to retrieve the number. -Use the number in announcements and archive file names when -releasing the module (ModuleName-1.02.tar.Z). -See perldoc ExtUtils::MakeMaker.pm for details. - -=item How to release and distribute a module. - -It's good idea to post an announcement of the availability of your -module (or the module itself if small) to the comp.lang.perl.announce -Usenet newsgroup. This will at least ensure very wide once-off -distribution. - -If possible you should place the module into a major ftp archive and -include details of it's location in your announcement. - -Some notes about ftp archives: Please use a long descriptive file -name which includes the version number. Most incoming directories -will not be readable/listable, i.e., you won't be able to see your -file after uploading it. Remember to send your email notification -message as soon as possible after uploading else your file may get -deleted automatically. Allow time for the file to be processed -and/or check the file has been processed before announcing its -location. - -FTP Archives for Perl Modules: - -Follow the instructions and links on - - http://franz.ww.tu-berlin.de/modulelist - -or upload to one of these sites: - - ftp://franz.ww.tu-berlin.de/incoming - ftp://ftp.cis.ufl.edu/incoming - -and notify upload@franz.ww.tu-berlin.de. - -By using the WWW interface you can ask the Upload Server to mirror -your modules from your ftp or WWW site into your own directory on -CPAN! - -Please remember to send me an updated entry for the Module list! - -=item Take care when changing a released module. - -Always strive to remain compatible with previous released versions -(see 2.2 above) Otherwise try to add a mechanism to revert to the -old behaviour if people rely on it. Document incompatible changes. - -=back - -=head2 Guidelines for Converting Perl 4 Library Scripts into Modules - -=over 4 - -=item There is no requirement to convert anything. - -If it ain't broke, don't fix it! Perl 4 library scripts should -continue to work with no problems. You may need to make some minor -changes (like escaping non-array @'s in double quoted strings) but -there is no need to convert a .pl file into a Module for just that. - -=item Consider the implications. - -All the perl applications which make use of the script will need to -be changed (slightly) if the script is converted into a module. Is -it worth it unless you plan to make other changes at the same time? - -=item Make the most of the opportunity. - -If you are going to convert the script to a module you can use the -opportunity to redesign the interface. The 'Guidelines for Module -Creation' above include many of the issues you should consider. - -=item The pl2pm utility will get you started. - -This utility will read *.pl files (given as parameters) and write -corresponding *.pm files. The pl2pm utilities does the following: - -=over 10 - -=item * -Adds the standard Module prologue lines - -=item * -Converts package specifiers from ' to :: - -=item * -Converts die(...) to croak(...) - -=item * -Several other minor changes - -=back - -Being a mechanical process pl2pm is not bullet proof. The converted -code will need careful checking, especially any package statements. -Don't delete the original .pl file till the new .pm one works! - -=back - -=head2 Guidelines for Reusing Application Code - -=over 4 - -=item Complete applications rarely belong in the Perl Module Library. - -=item Many applications contain some perl code which could be reused. - -Help save the world! Share your code in a form that makes it easy -to reuse. - -=item Break-out the reusable code into one or more separate module files. - -=item Take the opportunity to reconsider and redesign the interfaces. - -=item In some cases the 'application' can then be reduced to a small - -fragment of code built on top of the reusable modules. In these cases -the application could invoked as: - - perl -e 'use Module::Name; method(@ARGV)' ... -or - perl -mModule::Name ... (in perl5.002?) - -=back +=head1 SEE ALSO +See L<perlmodlib> for general style issues related to building Perl +modules and classes as well as descriptions of the standard library and +CPAN, L<Exporter> for how Perl's standard import/export mechanism works, +L<perltoot> for an in-depth tutorial on creating classes, L<perlobj> +for a hard-core reference document on objects, and L<perlsub> for an +explanation of functions and scoping. diff --git a/pod/perlmodlib.pod b/pod/perlmodlib.pod new file mode 100644 index 0000000000..cfb281dcc7 --- /dev/null +++ b/pod/perlmodlib.pod @@ -0,0 +1,1094 @@ +=head1 NAME + +perlmodlib - constructing new Perl modules and finding existing ones + +=head1 DESCRIPTION + +=head1 THE PERL MODULE LIBRARY + +A number of modules are included the Perl distribution. These are +described below, and all end in F<.pm>. You may also discover files in +the library directory that end in either F<.pl> or F<.ph>. These are old +libraries supplied so that old programs that use them still run. The +F<.pl> files will all eventually be converted into standard modules, and +the F<.ph> files made by B<h2ph> will probably end up as extension modules +made by B<h2xs>. (Some F<.ph> values may already be available through the +POSIX module.) The B<pl2pm> file in the distribution may help in your +conversion, but it's just a mechanical process and therefore far from +bulletproof. + +=head2 Pragmatic Modules + +They work somewhat like pragmas in that they tend to affect the compilation of +your program, and thus will usually work well only when used within a +C<use>, or C<no>. Most of these are locally scoped, so an inner BLOCK +may countermand any of these by saying: + + no integer; + no strict 'refs'; + +which lasts until the end of that BLOCK. + +Unlike the pragmas that effect the C<$^H> hints variable, the C<use +vars> and C<use subs> declarations are not BLOCK-scoped. They allow +you to predeclare a variables or subroutines within a particular +I<file> rather than just a block. Such declarations are effective +for the entire file for which they were declared. You cannot rescind +them with C<no vars> or C<no subs>. + +The following pragmas are defined (and have their own documentation). + +=over 12 + +=item use autouse MODULE => qw(sub1 sub2 sub3) + +Defers C<require MODULE> until someone calls one of the specified +subroutines (which must be exported by MODULE). This pragma should be +used with caution, and only when necessary. + +=item blib + +manipulate @INC at compile time to use MakeMaker's uninstalled version +of a package + +=item diagnostics + +force verbose warning diagnostics + +=item integer + +compute arithmetic in integer instead of double + +=item less + +request less of something from the compiler + +=item lib + +manipulate @INC at compile time + +=item locale + +use or ignore current locale for builtin operations (see L<perllocale>) + +=item ops + +restrict named opcodes when compiling or running Perl code + +=item overload + +overload basic Perl operations + +=item sigtrap + +enable simple signal handling + +=item strict + +restrict unsafe constructs + +=item subs + +predeclare sub names + +=item vmsish + +adopt certain VMS-specific behaviors + +=item vars + +predeclare global variable names + +=back + +=head2 Standard Modules + +Standard, bundled modules are all expected to behave in a well-defined +manner with respect to namespace pollution because they use the +Exporter module. See their own documentation for details. + +=over 12 + +=item AnyDBM_File + +provide framework for multiple DBMs + +=item AutoLoader + +load functions only on demand + +=item AutoSplit + +split a package for autoloading + +=item Benchmark + +benchmark running times of code + +=item CPAN + +interface to Comprehensive Perl Archive Network + +=item CPAN::FirstTime + +create a CPAN configuration file + +=item CPAN::Nox + +run CPAN while avoiding compiled extensions + +=item Carp + +warn of errors (from perspective of caller) + +=item Class::Struct + +declare struct-like datatypes + +=item Config + +access Perl configuration information + +=item Cwd + +get pathname of current working directory + +=item DB_File + +access to Berkeley DB + +=item Devel::SelfStubber + +generate stubs for a SelfLoading module + +=item DirHandle + +supply object methods for directory handles + +=item DynaLoader + +dynamically load C libraries into Perl code + +=item English + +use nice English (or awk) names for ugly punctuation variables + +=item Env + +import environment variables + +=item Exporter + +implements default import method for modules + +=item ExtUtils::Embed + +utilities for embedding Perl in C/C++ applications + +=item ExtUtils::Install + +install files from here to there + +=item ExtUtils::Liblist + +determine libraries to use and how to use them + +=item ExtUtils::MM_OS2 + +methods to override Unix behaviour in ExtUtils::MakeMaker + +=item ExtUtils::MM_Unix + +methods used by ExtUtils::MakeMaker + +=item ExtUtils::MM_VMS + +methods to override Unix behaviour in ExtUtils::MakeMaker + +=item ExtUtils::MakeMaker + +create an extension Makefile + +=item ExtUtils::Manifest + +utilities to write and check a MANIFEST file + +=item ExtUtils::Mkbootstrap + +make a bootstrap file for use by DynaLoader + +=item ExtUtils::Mksymlists + +write linker options files for dynamic extension + +=item ExtUtils::testlib + +add blib/* directories to @INC + +=item Fcntl + +load the C Fcntl.h defines + +=item File::Basename + +split a pathname into pieces + +=item File::CheckTree + +run many filetest checks on a tree + +=item File::Compare + +compare files or filehandles + +=item File::Copy + +copy files or filehandles + +=item File::Find + +traverse a file tree + +=item File::Path + +create or remove a series of directories + +=item File::stat + +by-name interface to Perl's builtin stat() functions + +=item FileCache + +keep more files open than the system permits + +=item FileHandle + +supply object methods for filehandles + +=item FindBin + +locate directory of original perl script + +=item GDBM_File + +access to the gdbm library + +=item Getopt::Long + +extended processing of command line options + +=item Getopt::Std + +process single-character switches with switch clustering + +=item I18N::Collate + +compare 8-bit scalar data according to the current locale + +=item IO + +load various IO modules + +=item IO::File + +supply object methods for filehandles + +=item IO::Handle + +supply object methods for I/O handles + +=item IO::Pipe + +supply object methods for pipes + +=item IO::Seekable + +supply seek based methods for I/O objects + +=item IO::Select + +OO interface to the select system call + +=item IO::Socket + +object interface to socket communications + +=item IPC::Open2 + +open a process for both reading and writing + +=item IPC::Open3 + +open a process for reading, writing, and error handling + +=item Math::BigFloat + +arbitrary length float math package + +=item Math::BigInt + +arbitrary size integer math package + +=item Math::Complex + +complex numbers and associated mathematical functions + +=item Math::Trig + +simple interface to parts of Math::Complex for those who +need trigonometric functions only for real numbers + +=item NDBM_File + +tied access to ndbm files + +=item Net::Ping + +Hello, anybody home? + +=item Net::hostent + +by-name interface to Perl's builtin gethost*() functions + +=item Net::netent + +by-name interface to Perl's builtin getnet*() functions + +=item Net::protoent + +by-name interface to Perl's builtin getproto*() functions + +=item Net::servent + +by-name interface to Perl's builtin getserv*() functions + +=item Opcode + +disable named opcodes when compiling or running perl code + +=item Pod::Text + +convert POD data to formatted ASCII text + +=item POSIX + +interface to IEEE Standard 1003.1 + +=item SDBM_File + +tied access to sdbm files + +=item Safe + +compile and execute code in restricted compartments + +=item Search::Dict + +search for key in dictionary file + +=item SelectSaver + +save and restore selected file handle + +=item SelfLoader + +load functions only on demand + +=item Shell + +run shell commands transparently within perl + +=item Socket + +load the C socket.h defines and structure manipulators + +=item Symbol + +manipulate Perl symbols and their names + +=item Sys::Hostname + +try every conceivable way to get hostname + +=item Sys::Syslog + +interface to the Unix syslog(3) calls + +=item Term::Cap + +termcap interface + +=item Term::Complete + +word completion module + +=item Term::ReadLine + +interface to various C<readline> packages + +=item Test::Harness + +run perl standard test scripts with statistics + +=item Text::Abbrev + +create an abbreviation table from a list + +=item Text::ParseWords + +parse text into an array of tokens + +=item Text::Soundex + +implementation of the Soundex Algorithm as described by Knuth + +=item Text::Tabs + +expand and unexpand tabs per the Unix expand(1) and unexpand(1) + +=item Text::Wrap + +line wrapping to form simple paragraphs + +=item Tie::Hash + +base class definitions for tied hashes + +=item Tie::RefHash + +base class definitions for tied hashes with references as keys + +=item Tie::Scalar + +base class definitions for tied scalars + +=item Tie::SubstrHash + +fixed-table-size, fixed-key-length hashing + +=item Time::Local + +efficiently compute time from local and GMT time + +=item Time::gmtime + +by-name interface to Perl's builtin gmtime() function + +=item Time::localtime + +by-name interface to Perl's builtin localtime() function + +=item Time::tm + +internal object used by Time::gmtime and Time::localtime + +=item UNIVERSAL + +base class for ALL classes (blessed references) + +=item User::grent + +by-name interface to Perl's builtin getgr*() functions + +=item User::pwent + +by-name interface to Perl's builtin getpw*() functions + +=back + +To find out I<all> the modules installed on your system, including +those without documentation or outside the standard release, do this: + + find `perl -e 'print "@INC"'` -name '*.pm' -print + +They should all have their own documentation installed and accessible via +your system man(1) command. If that fails, try the I<perldoc> program. + +=head2 Extension Modules + +Extension modules are written in C (or a mix of Perl and C) and may be +statically linked or in general are +dynamically loaded into Perl if and when you need them. Supported +extension modules include the Socket, Fcntl, and POSIX modules. + +Many popular C extension modules do not come bundled (at least, not +completely) due to their sizes, volatility, or simply lack of time for +adequate testing and configuration across the multitude of platforms on +which Perl was beta-tested. You are encouraged to look for them in +archie(1L), the Perl FAQ or Meta-FAQ, the WWW page, and even with their +authors before randomly posting asking for their present condition and +disposition. + +=head1 CPAN + +CPAN stands for the Comprehensive Perl Archive Network. This is a globally +replicated collection of all known Perl materials, including hundreds +of unbundled modules. Here are the major categories of modules: + +=over + +=item * +Language Extensions and Documentation Tools + +=item * +Development Support + +=item * +Operating System Interfaces + +=item * +Networking, Device Control (modems) and InterProcess Communication + +=item * +Data Types and Data Type Utilities + +=item * +Database Interfaces + +=item * +User Interfaces + +=item * +Interfaces to / Emulations of Other Programming Languages + +=item * +File Names, File Systems and File Locking (see also File Handles) + +=item * +String Processing, Language Text Processing, Parsing, and Searching + +=item * +Option, Argument, Parameter, and Configuration File Processing + +=item * +Internationalization and Locale + +=item * +Authentication, Security, and Encryption + +=item * +World Wide Web, HTML, HTTP, CGI, MIME + +=item * +Server and Daemon Utilities + +=item * +Archiving and Compression + +=item * +Images, Pixmap and Bitmap Manipulation, Drawing, and Graphing + +=item * +Mail and Usenet News + +=item * +Control Flow Utilities (callbacks and exceptions etc) + +=item * +File Handle and Input/Output Stream Utilities + +=item * +Miscellaneous Modules + +=back + +The registered CPAN sites as of this writing include the following. +You should try to choose one close to you: + +=over + +=item * +Africa + + South Africa ftp://ftp.is.co.za/programming/perl/CPAN/ + +=item * +Asia + + Hong Kong ftp://ftp.hkstar.com/pub/CPAN/ + Japan ftp://ftp.jaist.ac.jp/pub/lang/perl/CPAN/ + ftp://ftp.lab.kdd.co.jp/lang/perl/CPAN/ + South Korea ftp://ftp.nuri.net/pub/CPAN/ + Taiwan ftp://dongpo.math.ncu.edu.tw/perl/CPAN/ + ftp://ftp.wownet.net/pub2/PERL/ + +=item * +Australasia + + Australia ftp://ftp.netinfo.com.au/pub/perl/CPAN/ + New Zealand ftp://ftp.tekotago.ac.nz/pub/perl/CPAN/ + +=item * +Europe + + Austria ftp://ftp.tuwien.ac.at/pub/languages/perl/CPAN/ + Belgium ftp://ftp.kulnet.kuleuven.ac.be/pub/mirror/CPAN/ + Czech Republic ftp://sunsite.mff.cuni.cz/Languages/Perl/CPAN/ + Denmark ftp://sunsite.auc.dk/pub/languages/perl/CPAN/ + Finland ftp://ftp.funet.fi/pub/languages/perl/CPAN/ + France ftp://ftp.ibp.fr/pub/perl/CPAN/ + ftp://ftp.pasteur.fr/pub/computing/unix/perl/CPAN/ + Germany ftp://ftp.gmd.de/packages/CPAN/ + ftp://ftp.leo.org/pub/comp/programming/languages/perl/CPAN/ + ftp://ftp.mpi-sb.mpg.de/pub/perl/CPAN/ + ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN/ + ftp://ftp.uni-erlangen.de/pub/source/Perl/CPAN/ + ftp://ftp.uni-hamburg.de/pub/soft/lang/perl/CPAN/ + Greece ftp://ftp.ntua.gr/pub/lang/perl/ + Hungary ftp://ftp.kfki.hu/pub/packages/perl/CPAN/ + Italy ftp://cis.utovrm.it/CPAN/ + the Netherlands ftp://ftp.cs.ruu.nl/pub/PERL/CPAN/ + ftp://ftp.EU.net/packages/cpan/ + Norway ftp://ftp.uit.no/pub/languages/perl/cpan/ + Poland ftp://ftp.pk.edu.pl/pub/lang/perl/CPAN/ + ftp://sunsite.icm.edu.pl/pub/CPAN/ + Portugal ftp://ftp.ci.uminho.pt/pub/lang/perl/ + ftp://ftp.telepac.pt/pub/CPAN/ + Russia ftp://ftp.sai.msu.su/pub/lang/perl/CPAN/ + Slovenia ftp://ftp.arnes.si/software/perl/CPAN/ + Spain ftp://ftp.etse.urv.es/pub/mirror/perl/ + ftp://ftp.rediris.es/mirror/CPAN/ + Sweden ftp://ftp.sunet.se/pub/lang/perl/CPAN/ + UK ftp://ftp.demon.co.uk/pub/mirrors/perl/CPAN/ + ftp://sunsite.doc.ic.ac.uk/packages/CPAN/ + ftp://unix.hensa.ac.uk/mirrors/perl-CPAN/ + +=item * +North America + + Ontario ftp://ftp.utilis.com/public/CPAN/ + ftp://enterprise.ic.gc.ca/pub/perl/CPAN/ + Manitoba ftp://theory.uwinnipeg.ca/pub/CPAN/ + California ftp://ftp.digital.com/pub/plan/perl/CPAN/ + ftp://ftp.cdrom.com/pub/perl/CPAN/ + Colorado ftp://ftp.cs.colorado.edu/pub/perl/CPAN/ + Florida ftp://ftp.cis.ufl.edu/pub/perl/CPAN/ + Illinois ftp://uiarchive.uiuc.edu/pub/lang/perl/CPAN/ + Massachusetts ftp://ftp.iguide.com/pub/mirrors/packages/perl/CPAN/ + New York ftp://ftp.rge.com/pub/languages/perl/ + North Carolina ftp://ftp.duke.edu/pub/perl/ + Oklahoma ftp://ftp.ou.edu/mirrors/CPAN/ + Oregon http://www.perl.org/CPAN/ + ftp://ftp.orst.edu/pub/packages/CPAN/ + Pennsylvania ftp://ftp.epix.net/pub/languages/perl/ + Texas ftp://ftp.sedl.org/pub/mirrors/CPAN/ + ftp://ftp.metronet.com/pub/perl/ + +=item * +South America + + Chile ftp://sunsite.dcc.uchile.cl/pub/Lang/perl/CPAN/ + +=back + +For an up-to-date listing of CPAN sites, +see F<http://www.perl.com/perl/CPAN> or F<ftp://ftp.perl.com/perl/>. + +=head1 Modules: Creation, Use, and Abuse + +(The following section is borrowed directly from Tim Bunce's modules +file, available at your nearest CPAN site.) + +Perl implements a class using a package, but the presence of a +package doesn't imply the presence of a class. A package is just a +namespace. A class is a package that provides subroutines that can be +used as methods. A method is just a subroutine that expects, as its +first argument, either the name of a package (for "static" methods), +or a reference to something (for "virtual" methods). + +A module is a file that (by convention) provides a class of the same +name (sans the .pm), plus an import method in that class that can be +called to fetch exported symbols. This module may implement some of +its methods by loading dynamic C or C++ objects, but that should be +totally transparent to the user of the module. Likewise, the module +might set up an AUTOLOAD function to slurp in subroutine definitions on +demand, but this is also transparent. Only the F<.pm> file is required to +exist. See L<perlsub>, L<perltoot>, and L<AutoLoader> for details about +the AUTOLOAD mechanism. + +=head2 Guidelines for Module Creation + +=over 4 + +=item Do similar modules already exist in some form? + +If so, please try to reuse the existing modules either in whole or +by inheriting useful features into a new class. If this is not +practical try to get together with the module authors to work on +extending or enhancing the functionality of the existing modules. +A perfect example is the plethora of packages in perl4 for dealing +with command line options. + +If you are writing a module to expand an already existing set of +modules, please coordinate with the author of the package. It +helps if you follow the same naming scheme and module interaction +scheme as the original author. + +=item Try to design the new module to be easy to extend and reuse. + +Use blessed references. Use the two argument form of bless to bless +into the class name given as the first parameter of the constructor, +e.g.,: + + sub new { + my $class = shift; + return bless {}, $class; + } + +or even this if you'd like it to be used as either a static +or a virtual method. + + sub new { + my $self = shift; + my $class = ref($self) || $self; + return bless {}, $class; + } + +Pass arrays as references so more parameters can be added later +(it's also faster). Convert functions into methods where +appropriate. Split large methods into smaller more flexible ones. +Inherit methods from other modules if appropriate. + +Avoid class name tests like: C<die "Invalid" unless ref $ref eq 'FOO'>. +Generally you can delete the "C<eq 'FOO'>" part with no harm at all. +Let the objects look after themselves! Generally, avoid hard-wired +class names as far as possible. + +Avoid C<$r-E<gt>Class::func()> where using C<@ISA=qw(... Class ...)> and +C<$r-E<gt>func()> would work (see L<perlbot> for more details). + +Use autosplit so little used or newly added functions won't be a +burden to programs which don't use them. Add test functions to +the module after __END__ either using AutoSplit or by saying: + + eval join('',<main::DATA>) || die $@ unless caller(); + +Does your module pass the 'empty subclass' test? If you say +"C<@SUBCLASS::ISA = qw(YOURCLASS);>" your applications should be able +to use SUBCLASS in exactly the same way as YOURCLASS. For example, +does your application still work if you change: C<$obj = new YOURCLASS;> +into: C<$obj = new SUBCLASS;> ? + +Avoid keeping any state information in your packages. It makes it +difficult for multiple other packages to use yours. Keep state +information in objects. + +Always use B<-w>. Try to C<use strict;> (or C<use strict qw(...);>). +Remember that you can add C<no strict qw(...);> to individual blocks +of code which need less strictness. Always use B<-w>. Always use B<-w>! +Follow the guidelines in the perlstyle(1) manual. + +=item Some simple style guidelines + +The perlstyle manual supplied with perl has many helpful points. + +Coding style is a matter of personal taste. Many people evolve their +style over several years as they learn what helps them write and +maintain good code. Here's one set of assorted suggestions that +seem to be widely used by experienced developers: + +Use underscores to separate words. It is generally easier to read +$var_names_like_this than $VarNamesLikeThis, especially for +non-native speakers of English. It's also a simple rule that works +consistently with VAR_NAMES_LIKE_THIS. + +Package/Module names are an exception to this rule. Perl informally +reserves lowercase module names for 'pragma' modules like integer +and strict. Other modules normally begin with a capital letter and +use mixed case with no underscores (need to be short and portable). + +You may find it helpful to use letter case to indicate the scope +or nature of a variable. For example: + + $ALL_CAPS_HERE constants only (beware clashes with perl vars) + $Some_Caps_Here package-wide global/static + $no_caps_here function scope my() or local() variables + +Function and method names seem to work best as all lowercase. +e.g., C<$obj-E<gt>as_string()>. + +You can use a leading underscore to indicate that a variable or +function should not be used outside the package that defined it. + +=item Select what to export. + +Do NOT export method names! + +Do NOT export anything else by default without a good reason! + +Exports pollute the namespace of the module user. If you must +export try to use @EXPORT_OK in preference to @EXPORT and avoid +short or common names to reduce the risk of name clashes. + +Generally anything not exported is still accessible from outside the +module using the ModuleName::item_name (or C<$blessed_ref-E<gt>method>) +syntax. By convention you can use a leading underscore on names to +indicate informally that they are 'internal' and not for public use. + +(It is actually possible to get private functions by saying: +C<my $subref = sub { ... }; &$subref;>. But there's no way to call that +directly as a method, because a method must have a name in the symbol +table.) + +As a general rule, if the module is trying to be object oriented +then export nothing. If it's just a collection of functions then +@EXPORT_OK anything but use @EXPORT with caution. + +=item Select a name for the module. + +This name should be as descriptive, accurate, and complete as +possible. Avoid any risk of ambiguity. Always try to use two or +more whole words. Generally the name should reflect what is special +about what the module does rather than how it does it. Please use +nested module names to group informally or categorize a module. +There should be a very good reason for a module not to have a nested name. +Module names should begin with a capital letter. + +Having 57 modules all called Sort will not make life easy for anyone +(though having 23 called Sort::Quick is only marginally better :-). +Imagine someone trying to install your module alongside many others. +If in any doubt ask for suggestions in comp.lang.perl.misc. + +If you are developing a suite of related modules/classes it's good +practice to use nested classes with a common prefix as this will +avoid namespace clashes. For example: Xyz::Control, Xyz::View, +Xyz::Model etc. Use the modules in this list as a naming guide. + +If adding a new module to a set, follow the original author's +standards for naming modules and the interface to methods in +those modules. + +To be portable each component of a module name should be limited to +11 characters. If it might be used on MS-DOS then try to ensure each is +unique in the first 8 characters. Nested modules make this easier. + +=item Have you got it right? + +How do you know that you've made the right decisions? Have you +picked an interface design that will cause problems later? Have +you picked the most appropriate name? Do you have any questions? + +The best way to know for sure, and pick up many helpful suggestions, +is to ask someone who knows. Comp.lang.perl.misc is read by just about +all the people who develop modules and it's the best place to ask. + +All you need to do is post a short summary of the module, its +purpose and interfaces. A few lines on each of the main methods is +probably enough. (If you post the whole module it might be ignored +by busy people - generally the very people you want to read it!) + +Don't worry about posting if you can't say when the module will be +ready - just say so in the message. It might be worth inviting +others to help you, they may be able to complete it for you! + +=item README and other Additional Files. + +It's well known that software developers usually fully document the +software they write. If, however, the world is in urgent need of +your software and there is not enough time to write the full +documentation please at least provide a README file containing: + +=over 10 + +=item * +A description of the module/package/extension etc. + +=item * +A copyright notice - see below. + +=item * +Prerequisites - what else you may need to have. + +=item * +How to build it - possible changes to Makefile.PL etc. + +=item * +How to install it. + +=item * +Recent changes in this release, especially incompatibilities + +=item * +Changes / enhancements you plan to make in the future. + +=back + +If the README file seems to be getting too large you may wish to +split out some of the sections into separate files: INSTALL, +Copying, ToDo etc. + +=over 4 + +=item Adding a Copyright Notice. + +How you choose to license your work is a personal decision. +The general mechanism is to assert your Copyright and then make +a declaration of how others may copy/use/modify your work. + +Perl, for example, is supplied with two types of licence: The GNU +GPL and The Artistic Licence (see the files README, Copying, and +Artistic). Larry has good reasons for NOT just using the GNU GPL. + +My personal recommendation, out of respect for Larry, Perl, and the +perl community at large is to state something simply like: + + Copyright (c) 1995 Your Name. All rights reserved. + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + +This statement should at least appear in the README file. You may +also wish to include it in a Copying file and your source files. +Remember to include the other words in addition to the Copyright. + +=item Give the module a version/issue/release number. + +To be fully compatible with the Exporter and MakeMaker modules you +should store your module's version number in a non-my package +variable called $VERSION. This should be a floating point +number with at least two digits after the decimal (i.e., hundredths, +e.g, C<$VERSION = "0.01">). Don't use a "1.3.2" style version. +See Exporter.pm in Perl5.001m or later for details. + +It may be handy to add a function or method to retrieve the number. +Use the number in announcements and archive file names when +releasing the module (ModuleName-1.02.tar.Z). +See perldoc ExtUtils::MakeMaker.pm for details. + +=item How to release and distribute a module. + +It's good idea to post an announcement of the availability of your +module (or the module itself if small) to the comp.lang.perl.announce +Usenet newsgroup. This will at least ensure very wide once-off +distribution. + +If possible you should place the module into a major ftp archive and +include details of its location in your announcement. + +Some notes about ftp archives: Please use a long descriptive file +name which includes the version number. Most incoming directories +will not be readable/listable, i.e., you won't be able to see your +file after uploading it. Remember to send your email notification +message as soon as possible after uploading else your file may get +deleted automatically. Allow time for the file to be processed +and/or check the file has been processed before announcing its +location. + +FTP Archives for Perl Modules: + +Follow the instructions and links on + + http://franz.ww.tu-berlin.de/modulelist + +or upload to one of these sites: + + ftp://franz.ww.tu-berlin.de/incoming + ftp://ftp.cis.ufl.edu/incoming + +and notify <F<upload@franz.ww.tu-berlin.de>>. + +By using the WWW interface you can ask the Upload Server to mirror +your modules from your ftp or WWW site into your own directory on +CPAN! + +Please remember to send me an updated entry for the Module list! + +=item Take care when changing a released module. + +Always strive to remain compatible with previous released versions +(see 2.2 above) Otherwise try to add a mechanism to revert to the +old behaviour if people rely on it. Document incompatible changes. + +=back + +=back + +=head2 Guidelines for Converting Perl 4 Library Scripts into Modules + +=over 4 + +=item There is no requirement to convert anything. + +If it ain't broke, don't fix it! Perl 4 library scripts should +continue to work with no problems. You may need to make some minor +changes (like escaping non-array @'s in double quoted strings) but +there is no need to convert a .pl file into a Module for just that. + +=item Consider the implications. + +All the perl applications which make use of the script will need to +be changed (slightly) if the script is converted into a module. Is +it worth it unless you plan to make other changes at the same time? + +=item Make the most of the opportunity. + +If you are going to convert the script to a module you can use the +opportunity to redesign the interface. The 'Guidelines for Module +Creation' above include many of the issues you should consider. + +=item The pl2pm utility will get you started. + +This utility will read *.pl files (given as parameters) and write +corresponding *.pm files. The pl2pm utilities does the following: + +=over 10 + +=item * +Adds the standard Module prologue lines + +=item * +Converts package specifiers from ' to :: + +=item * +Converts die(...) to croak(...) + +=item * +Several other minor changes + +=back + +Being a mechanical process pl2pm is not bullet proof. The converted +code will need careful checking, especially any package statements. +Don't delete the original .pl file till the new .pm one works! + +=back + +=head2 Guidelines for Reusing Application Code + +=over 4 + +=item Complete applications rarely belong in the Perl Module Library. + +=item Many applications contain some perl code which could be reused. + +Help save the world! Share your code in a form that makes it easy +to reuse. + +=item Break-out the reusable code into one or more separate module files. + +=item Take the opportunity to reconsider and redesign the interfaces. + +=item In some cases the 'application' can then be reduced to a small + +fragment of code built on top of the reusable modules. In these cases +the application could invoked as: + + perl -e 'use Module::Name; method(@ARGV)' ... +or + perl -mModule::Name ... (in perl5.002 or higher) + +=back + +=head1 NOTE + +Perl does not enforce private and public parts of its modules as you may +have been used to in other languages like C++, Ada, or Modula-17. Perl +doesn't have an infatuation with enforced privacy. It would prefer +that you stayed out of its living room because you weren't invited, not +because it has a shotgun. + +The module and its user have a contract, part of which is common law, +and part of which is "written". Part of the common law contract is +that a module doesn't pollute any namespace it wasn't asked to. The +written contract for the module (A.K.A. documentation) may make other +provisions. But then you know when you C<use RedefineTheWorld> that +you're redefining the world and willing to take the consequences. diff --git a/pod/perlobj.pod b/pod/perlobj.pod index 81c6c96246..7428334ee2 100644 --- a/pod/perlobj.pod +++ b/pod/perlobj.pod @@ -4,10 +4,13 @@ perlobj - Perl objects =head1 DESCRIPTION -First of all, you need to understand what references are in Perl. See -L<perlref> for that. +First of all, you need to understand what references are in Perl. +See L<perlref> for that. Second, if you still find the following +reference work too complicated, a tutorial on object-oriented programming +in Perl can be found in L<perltoot>. -Here are three very simple definitions that you should find reassuring. +If you're still with us, then +here are three very simple definitions that you should find reassuring. =over 4 @@ -24,7 +27,7 @@ with object references. =item 3. A method is simply a subroutine that expects an object reference (or -a package name, for static methods) as the first argument. +a package name, for class methods) as the first argument. =back @@ -41,11 +44,11 @@ constructor: package Critter; sub new { bless {} } -The C<{}> constructs a reference to an anonymous hash containing no +The C<{}> constructs a reference to an anonymous hash containing no key/value pairs. The bless() takes that reference and tells the object it references that it's now a Critter, and returns the reference. -This is for convenience, since the referenced object itself knows that -it has been blessed, and its reference to it could have been returned +This is for convenience, because the referenced object itself knows that +it has been blessed, and the reference to it could have been returned directly, like this: sub new { @@ -64,8 +67,9 @@ that wish to call methods in the class as part of the construction: return $self; } -If you care about inheritance (and you should; see L<perlmod/"Modules: -Creation, Use and Abuse">), then you want to use the two-arg form of bless +If you care about inheritance (and you should; see +L<perlmod/"Modules: Creation, Use, and Abuse">), +then you want to use the two-arg form of bless so that your constructors may be inherited: sub new { @@ -78,7 +82,7 @@ so that your constructors may be inherited: Or if you expect people to call not just C<CLASS-E<gt>new()> but also C<$obj-E<gt>new()>, then use something like this. The initialize() -method used will be of whatever $class we blessed the +method used will be of whatever $class we blessed the object into: sub new { @@ -93,17 +97,17 @@ object into: Within the class package, the methods will typically deal with the reference as an ordinary reference. Outside the class package, the reference is generally treated as an opaque value that may -only be accessed through the class's methods. +be accessed only through the class's methods. A constructor may re-bless a referenced object currently belonging to another class, but then the new class is responsible for all cleanup -later. The previous blessing is forgotten, as an object may only -belong to one class at a time. (Although of course it's free to +later. The previous blessing is forgotten, as an object may belong +to only one class at a time. (Although of course it's free to inherit methods from many classes.) A clarification: Perl objects are blessed. References are not. Objects know which package they belong to. References do not. The bless() -function simply uses the reference in order to find the object. Consider +function uses the reference to find the object. Consider the following example: $a = {}; @@ -111,13 +115,13 @@ the following example: bless $a, BLAH; print "\$b is a ", ref($b), "\n"; -This reports $b as being a BLAH, so obviously bless() +This reports $b as being a BLAH, so obviously bless() operated on the object and not on the reference. =head2 A Class is Simply a Package Unlike say C++, Perl doesn't provide any special syntax for class -definitions. You just use a package as a class by putting method +definitions. You use a package as a class by putting method definitions into the class. There is a special array within each package called @ISA which says @@ -126,7 +130,7 @@ package. This is how Perl implements inheritance. Each element of the @ISA array is just the name of another package that happens to be a class package. The classes are searched (depth first) for missing methods in the order that they occur in @ISA. The classes accessible -through @ISA are known as base classes of the current class. +through @ISA are known as base classes of the current class. If a missing method is found in one of the base classes, it is cached in the current class for efficiency. Changing @ISA or defining new @@ -137,10 +141,12 @@ that is called on behalf of the missing method. If neither a method nor an AUTOLOAD routine is found in @ISA, then one last try is made for the method (or an AUTOLOAD routine) in a class -called UNIVERSAL. If that doesn't work, Perl finally gives up and +called UNIVERSAL. (Several commonly used methods are automatically +supplied in the UNIVERSAL class; see L<"Default UNIVERSAL methods"> for +more details.) If that doesn't work, Perl finally gives up and complains. -Perl classes only do method inheritance. Data inheritance is left +Perl classes do only method inheritance. Data inheritance is left up to the class itself. By and large, this is not a problem in Perl, because most classes model the attributes of their object using an anonymous hash, which serves as its own little namespace to be @@ -153,17 +159,18 @@ Unlike say C++, Perl doesn't provide any special syntax for method definition. (It does provide a little syntax for method invocation though. More on that later.) A method expects its first argument to be the object or package it is being invoked on. There are just two -types of methods, which we'll call static and virtual, in honor of -the two C++ method types they most closely resemble. +types of methods, which we'll call class and instance. +(Sometimes you'll hear these called static and virtual, in honor of +the two C++ method types they most closely resemble.) -A static method expects a class name as the first argument. It +A class method expects a class name as the first argument. It provides functionality for the class as a whole, not for any individual -object belonging to the class. Constructors are typically static -methods. Many static methods simply ignore their first argument, since +object belonging to the class. Constructors are typically class +methods. Many class methods simply ignore their first argument, because they already know what package they're in, and don't care what package -they were invoked via. (These aren't necessarily the same, since -static methods follow the inheritance tree just like ordinary virtual -methods.) Another typical use for static methods is to look up an +they were invoked via. (These aren't necessarily the same, because +class methods follow the inheritance tree just like ordinary instance +methods.) Another typical use for class methods is to look up an object by name: sub find { @@ -171,7 +178,7 @@ object by name: $objtable{$name}; } -A virtual method expects an object reference as its first argument. +An instance method expects an object reference as its first argument. Typically it shifts the first argument into a "self" or "this" variable, and then uses that as an ordinary reference. @@ -191,9 +198,9 @@ already had an "indirect object" syntax that you use when you say print STDERR "help!!!\n"; -This same syntax can be used to call either static or virtual methods. -We'll use the two methods defined above, the static method to lookup -an object reference and the virtual method to print out its attributes. +This same syntax can be used to call either class or instance methods. +We'll use the two methods defined above, the class method to lookup +an object reference and the instance method to print out its attributes. $fred = find Critter "Fred"; display $fred 'Height', 'Weight'; @@ -220,7 +227,7 @@ Indirect object method calls are parsed using the same rule as list operators: "If it looks like a function, it is a function". (Presuming for the moment that you think two words in a row can look like a function name. C++ programmers seem to think so with some regularity, -especially when the first word is "new".) Thus, the parens of +especially when the first word is "new".) Thus, the parentheses of new Critter ('Barney', 1.5, 70) @@ -242,8 +249,8 @@ call, being sure to pass the requisite first argument explicitly: $fred = MyCritter::find("Critter", "Fred"); MyCritter::display($fred, 'Height', 'Weight'); -Note however, that this does not do any inheritance. If you merely -wish to specify that Perl should I<START> looking for a method in a +Note however, that this does not do any inheritance. If you wish +merely to specify that Perl should I<START> looking for a method in a particular package, use an ordinary method call, but qualify the method name with the package like this: @@ -251,13 +258,13 @@ name with the package like this: $fred->MyCritter::display('Height', 'Weight'); If you're trying to control where the method search begins I<and> you're -executing in the class itself, then you may use the SUPER pseudoclass, +executing in the class itself, then you may use the SUPER pseudo class, which says to start looking in your base class's @ISA list without having -to explicitly name it: +to name it explicitly: $self->SUPER::display('Height', 'Weight'); -Please note that the C<SUPER::> construct is I<only> meaningful within the +Please note that the C<SUPER::> construct is meaningful I<only> within the class. Sometimes you want to call a method when you don't know the method name @@ -267,6 +274,56 @@ with a simple scalar variable containing the method name: $method = $fast ? "findfirst" : "findbest"; $fred->$method(@args); +=head2 Default UNIVERSAL methods + +The C<UNIVERSAL> package automatically contains the following methods that +are inherited by all other classes: + +=over 4 + +=item isa(CLASS) + +C<isa> returns I<true> if its object is blessed into a subclass of C<CLASS> + +C<isa> is also exportable and can be called as a sub with two arguments. This +allows the ability to check what a reference points to. Example + + use UNIVERSAL qw(isa); + + if(isa($ref, 'ARRAY')) { + ... + } + +=item can(METHOD) + +C<can> checks to see if its object has a method called C<METHOD>, +if it does then a reference to the sub is returned, if it does not then +I<undef> is returned. + +=item VERSION( [NEED] ) + +C<VERSION> returns the version number of the class (package). If the +NEED argument is given then it will check that the current version (as +defined by the $VERSION variable in the given package) not less than +NEED; it will die if this is not the case. This method is normally +called as a class method. This method is called automatically by the +C<VERSION> form of C<use>. + + use A 1.2 qw(some imported subs); + # implies: + A->VERSION(1.2); + +=back + +B<NOTE:> C<can> directly uses Perl's internal code for method lookup, and +C<isa> uses a very similar method and cache-ing strategy. This may cause +strange effects if the Perl code dynamically changes @ISA in any package. + +You may add other methods to the UNIVERSAL class via Perl or XS code. +You do not need to C<use UNIVERSAL> in order to make these methods +available to your program. This is necessary only if you wish to +have C<isa> available as a plain subroutine in the current package. + =head2 Destructors When the last reference to an object goes away, the object is @@ -277,9 +334,9 @@ your class. It will automatically be called at the appropriate moment, and you can do any extra cleanup you need to do. Perl doesn't do nested destruction for you. If your constructor -reblessed a reference from one of your base classes, your DESTROY may -need to call DESTROY for any base classes that need it. But this only -applies to reblessed objects--an object reference that is merely +re-blessed a reference from one of your base classes, your DESTROY may +need to call DESTROY for any base classes that need it. But this applies +to only re-blessed objects--an object reference that is merely I<CONTAINED> in the current object will be freed and destroyed automatically when the current object is freed. @@ -288,19 +345,19 @@ automatically when the current object is freed. An indirect object is limited to a name, a scalar variable, or a block, because it would have to do too much lookahead otherwise, just like any other postfix dereference in the language. The left side of -E<gt> is not so -limited, because it's an infix operator, not a postfix operator. +limited, because it's an infix operator, not a postfix operator. -That means that below, A and B are equivalent to each other, and C and D -are equivalent, but AB and CD are different: +That means that in the following, A and B are equivalent to each other, and +C and D are equivalent, but A/B and C/D are different: - A: method $obref->{"fieldname"} + A: method $obref->{"fieldname"} B: (method $obref)->{"fieldname"} - C: $obref->{"fieldname"}->method() + C: $obref->{"fieldname"}->method() D: method {$obref->{"fieldname"}} =head2 Summary -That's about all there is to it. Now you just need to go off and buy a +That's about all there is to it. Now you need just to go off and buy a book about object-oriented design methodology, and bang your forehead with it for the next six months or so. @@ -315,12 +372,12 @@ probably won't matter. A more serious concern is that unreachable memory with a non-zero reference count will not normally get freed. Therefore, this is a bad -idea: +idea: { my $a; $a = \$a; - } + } Even thought $a I<should> go away, it can't. When building recursive data structures, you'll have to break the self-reference yourself explicitly @@ -334,7 +391,7 @@ node such as one might use in a sophisticated tree structure: $node->{LEFT} = $node->{RIGHT} = $node; $node->{DATA} = [ @_ ]; return bless $node => $class; - } + } If you create nodes like that, they (currently) won't go away unless you break their self reference yourself. (In other words, this is not to be @@ -349,7 +406,7 @@ destroyed. This is essential to support Perl as an embedded or a multithreadable language. For example, this program demonstrates Perl's two-phased garbage collection: - #!/usr/bin/perl + #!/usr/bin/perl package Subtle; sub new { @@ -357,12 +414,12 @@ two-phased garbage collection: $test = \$test; warn "CREATING " . \$test; return bless \$test; - } + } sub DESTROY { my $self = shift; warn "DESTROYING $self"; - } + } package main; @@ -372,7 +429,7 @@ two-phased garbage collection: my $b = Subtle->new; $$a = 0; # break selfref warn "leaving block"; - } + } warn "just exited block"; warn "time to die..."; @@ -390,12 +447,12 @@ When run as F</tmp/test>, the following output is produced: DESTROYING Subtle=SCALAR(0x8e57c) during global destruction. Notice that "global destruction" bit there? That's the thread -garbage collector reaching the unreachable. +garbage collector reaching the unreachable. Objects are always destructed, even when regular refs aren't and in fact are destructed in a separate pass before ordinary refs just to try to prevent object destructors from using refs that have been themselves -destructed. Plain refs are only garbage collected if the destruct level +destructed. Plain refs are only garbage-collected if the destruct level is greater than 0. You can test the higher levels of global destruction by setting the PERL_DESTRUCT_LEVEL environment variable, presuming C<-DDEBUGGING> was enabled during perl build time. @@ -405,6 +462,8 @@ at a future date. =head1 SEE ALSO -You should also check out L<perlbot> for other object tricks, traps, and tips, -as well as L<perlmod> for some style guides on constructing both modules +A kinder, gentler tutorial on object-oriented programming in Perl can +be found in L<perltoot>. +You should also check out L<perlbot> for other object tricks, traps, and tips, +as well as L<perlmodlib> for some style guides on constructing both modules and classes. diff --git a/pod/perlop.pod b/pod/perlop.pod index 483a686ebb..d853865520 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -8,7 +8,7 @@ Perl operators have the following associativity and precedence, listed from highest precedence to lowest. Note that all operators borrowed from C keep the same precedence relationship with each other, even where C's precedence is slightly screwy. (This makes learning -Perl easier for C folks.) With very few exceptions, these all +Perl easier for C folks.) With very few exceptions, these all operate on scalar values only, not array values. left terms and list operators (leftward) @@ -16,7 +16,7 @@ operate on scalar values only, not array values. nonassoc ++ -- right ** right ! ~ \ and unary + and - - left =~ !~ + left =~ !~ left * / % x left + - . left << >> @@ -27,7 +27,7 @@ operate on scalar values only, not array values. left | ^ left && left || - nonassoc .. + nonassoc .. ... right ?: right = += -= *= etc. left , => @@ -42,8 +42,8 @@ In the following sections, these operators are covered in precedence order. =head2 Terms and List Operators (Leftward) -Any TERM is of highest precedence of Perl. These includes variables, -quote and quotelike operators, any expression in parentheses, +A TERM has the highest precedence in Perl. They includes variables, +quote and quote-like operators, any expression in parentheses, and any function whose arguments are parenthesized. Actually, there aren't really functions in this sense, just list operators and unary operators behaving as functions because you put parentheses around @@ -56,7 +56,7 @@ just like a normal function call. In the absence of parentheses, the precedence of list operators such as C<print>, C<sort>, or C<chmod> is either very high or very low depending on -whether you look at the left side of operator or the right side of it. +whether you are looking at the left side or the right side of the operator. For example, in @ary = (1, 3, sort 4, 2); @@ -66,7 +66,7 @@ the commas on the right of the sort are evaluated before the sort, but the commas on the left are evaluated after. In other words, list operators tend to gobble up all the arguments that follow them, and then act like a simple TERM with regard to the preceding expression. -Note that you have to be careful with parens: +Note that you have to be careful with parentheses: # These evaluate exit before doing the print: print($foo, exit); # Obviously not what you want. @@ -81,14 +81,14 @@ Also note that print ($foo & 255) + 1, "\n"; -probably doesn't do what you expect at first glance. See +probably doesn't do what you expect at first glance. See L<Named Unary Operators> for more discussion of this. Also parsed as terms are the C<do {}> and C<eval {}> constructs, as -well as subroutine and method calls, and the anonymous +well as subroutine and method calls, and the anonymous constructors C<[]> and C<{}>. -See also L<Quote and Quotelike Operators> toward the end of this section, +See also L<Quote and Quote-like Operators> toward the end of this section, as well as L<"I/O Operators">. =head2 The Arrow Operator @@ -104,16 +104,16 @@ containing the method name, and the left side must either be an object (a blessed reference) or a class name (that is, a package name). See L<perlobj>. -=head2 Autoincrement and Autodecrement +=head2 Auto-increment and Auto-decrement "++" and "--" work as in C. That is, if placed before a variable, they increment or decrement the variable before returning the value, and if placed after, increment or decrement the variable after returning the value. -The autoincrement operator has a little extra built-in magic to it. If +The auto-increment operator has a little extra builtin magic to it. If you increment a variable that is numeric, or that has ever been used in a numeric context, you get a normal increment. If, however, the -variable has only been used in string contexts since it was set, and +variable has been used in only string contexts since it was set, and has a value that is not null and matches the pattern C</^[a-zA-Z]*[0-9]*$/>, the increment is done as a string, preserving each character within its range, with carry: @@ -123,7 +123,7 @@ character within its range, with carry: print ++($foo = 'Az'); # prints 'Ba' print ++($foo = 'zz'); # prints 'aaa' -The autodecrement operator is not magical. +The auto-decrement operator is not magical. =head2 Exponentiation @@ -134,7 +134,7 @@ internally.) =head2 Symbolic Unary Operators -Unary "!" performs logical negation, i.e. "not". See also C<not> for a lower +Unary "!" performs logical negation, i.e., "not". See also C<not> for a lower precedence version of this. Unary "-" performs arithmetic negation if the operand is numeric. If @@ -144,12 +144,13 @@ starts with a plus or minus, a string starting with the opposite sign is returned. One effect of these rules is that C<-bareword> is equivalent to C<"-bareword">. -Unary "~" performs bitwise negation, i.e. 1's complement. +Unary "~" performs bitwise negation, i.e., 1's complement. +(See also L<Integer Arithmetic>.) Unary "+" has no effect whatsoever, even on strings. It is useful syntactically for separating a function name from a parenthesized expression that would otherwise be interpreted as the complete list of function -arguments. (See examples above under L<List Operators>.) +arguments. (See examples above under L<Terms and List Operators (Leftward)>.) Unary "\" creates a reference to whatever follows it. See L<perlref>. Do not confuse this behavior with the behavior of backslash within a @@ -166,9 +167,8 @@ supposed to be searched, substituted, or translated instead of the default $_. The return value indicates the success of the operation. (If the right argument is an expression rather than a search pattern, substitution, or translation, it is interpreted as a search pattern at run -time. This is less efficient than an explicit search, since the pattern -must be compiled every time the expression is evaluated--unless you've -used C</o>.) +time. This can be is less efficient than an explicit search, because the +pattern must be compiled every time the expression is evaluated. Binary "!~" is just like "=~" except the return value is negated in the logical sense. @@ -179,12 +179,17 @@ Binary "*" multiplies two numbers. Binary "/" divides two numbers. -Binary "%" computes the modulus of the two numbers. +Binary "%" computes the modulus of two numbers. Given integer +operands C<$a> and C<$b>: If C<$b> is positive, then C<$a % $b> is +C<$a> minus the largest multiple of C<$b> that is not greater than +C<$a>. If C<$b> is negative, then C<$a % $b> is C<$a> minus the +smallest multiple of C<$b> that is not less than C<$a> (i.e. the +result will be less than or equal to zero). Binary "x" is the repetition operator. In a scalar context, it returns a string consisting of the left operand repeated the number of times specified by the right operand. In a list context, if the left -operand is a list in parens, it repeats the list. +operand is a list in parentheses, it repeats the list. print '-' x 80; # print row of dashes @@ -205,12 +210,12 @@ Binary "." concatenates two strings. =head2 Shift Operators Binary "<<" returns the value of its left argument shifted left by the -number of bits specified by the right argument. Arguments should be -integers. +number of bits specified by the right argument. Arguments should be +integers. (See also L<Integer Arithmetic>.) -Binary ">>" returns the value of its left argument shifted right by the -number of bits specified by the right argument. Arguments should be -integers. +Binary ">>" returns the value of its left argument shifted right by +the number of bits specified by the right argument. Arguments should +be integers. (See also L<Integer Arithmetic>.) =head2 Named Unary Operators @@ -240,20 +245,20 @@ but, because * is higher precedence than ||: rand (10) * 20; # (rand 10) * 20 rand +(10) * 20; # rand (10 * 20) -See also L<"List Operators">. +See also L<"Terms and List Operators (Leftward)">. =head2 Relational Operators -Binary "<" returns true if the left argument is numerically less than +Binary "E<lt>" returns true if the left argument is numerically less than the right argument. -Binary ">" returns true if the left argument is numerically greater +Binary "E<gt>" returns true if the left argument is numerically greater than the right argument. -Binary "<=" returns true if the left argument is numerically less than +Binary "E<lt>=" returns true if the left argument is numerically less than or equal to the right argument. -Binary ">=" returns true if the left argument is numerically greater +Binary "E<gt>=" returns true if the left argument is numerically greater than or equal to the right argument. Binary "lt" returns true if the left argument is stringwise less than @@ -276,8 +281,9 @@ the right argument. Binary "!=" returns true if the left argument is numerically not equal to the right argument. -Binary "<=>" returns -1, 0, or 1 depending on whether the left argument is numerically -less than, equal to, or greater than the right argument. +Binary "E<lt>=E<gt>" returns -1, 0, or 1 depending on whether the left +argument is numerically less than, equal to, or greater than the right +argument. Binary "eq" returns true if the left argument is stringwise equal to the right argument. @@ -288,15 +294,21 @@ to the right argument. Binary "cmp" returns -1, 0, or 1 depending on whether the left argument is stringwise less than, equal to, or greater than the right argument. +"lt", "le", "ge", "gt" and "cmp" use the collation (sort) order specified +by the current locale if C<use locale> is in effect. See L<perllocale>. + =head2 Bitwise And Binary "&" returns its operators ANDed together bit by bit. +(See also L<Integer Arithmetic>.) =head2 Bitwise Or and Exclusive Or Binary "|" returns its operators ORed together bit by bit. +(See also L<Integer Arithmetic>.) Binary "^" returns its operators XORed together bit by bit. +(See also L<Integer Arithmetic>.) =head2 C-style Logical And @@ -340,12 +352,12 @@ operators depending on the context. In a list context, it returns an array of values counting (by ones) from the left value to the right value. This is useful for writing C<for (1..10)> loops and for doing slice operations on arrays. Be aware that under the current implementation, -a temporary array is created, so you'll burn a lot of memory if you +a temporary array is created, so you'll burn a lot of memory if you write something like this: for (1 .. 1_000_000) { # code - } + } In a scalar context, ".." returns a boolean value. The operator is bistable, like a flip-flop, and emulates the line-range (comma) operator @@ -384,7 +396,7 @@ As a list operator: @foo = @foo[$#foo-4 .. $#foo]; # slice last 5 items The range operator (in a list context) makes use of the magical -autoincrement algorithm if the operands are strings. You +auto-increment algorithm if the operands are strings. You can say @alphabet = ('A' .. 'Z'); @@ -409,11 +421,11 @@ like an if-then-else. If the argument before the ? is true, the argument before the : is returned, otherwise the argument after the : is returned. For example: - printf "I have %d dog%s.\n", $n, + printf "I have %d dog%s.\n", $n, ($n == 1) ? '' : "s"; Scalar or list context propagates downward into the 2nd -or 3rd argument, whichever is selected. +or 3rd argument, whichever is selected. $a = $ok ? $b : $c; # get a scalar @a = $ok ? @b : @c; # get an array @@ -439,8 +451,8 @@ is equivalent to $a = $a + 2; although without duplicating any side effects that dereferencing the lvalue -might trigger, such as from tie(). Other assignment operators work similarly. -The following are recognized: +might trigger, such as from tie(). Other assignment operators work similarly. +The following are recognized: **= += *= &= <<= &&= -= /= |= >>= ||= @@ -475,7 +487,7 @@ argument and returns that value. This is just like C's comma operator. In a list context, it's just the list argument separator, and inserts both its arguments into the list. -The => digraph is mostly just a synonym for the comma operator. It's useful for +The =E<gt> digraph is mostly just a synonym for the comma operator. It's useful for documenting arguments that come in pairs. As of release 5.001, it also forces any word to the left of it to be interpreted as a string. @@ -490,7 +502,7 @@ operators without the need for extra parentheses: open HANDLE, "filename" or die "Can't open: $!\n"; -See also discussion of list operators in L<List Operators (Leftward)>. +See also discussion of list operators in L<Terms and List Operators (Leftward)>. =head2 Logical Not @@ -501,14 +513,14 @@ It's the equivalent of "!" except for the very low precedence. Binary "and" returns the logical conjunction of the two surrounding expressions. It's equivalent to && except for the very low -precedence. This means that it short-circuits: i.e. the right +precedence. This means that it short-circuits: i.e., the right expression is evaluated only if the left expression is true. =head2 Logical or and Exclusive Or Binary "or" returns the logical disjunction of the two surrounding expressions. It's equivalent to || except for the very low -precedence. This means that it short-circuits: i.e. the right +precedence. This means that it short-circuits: i.e., the right expression is evaluated only if the left expression is false. Binary "xor" returns the exclusive-OR of the two surrounding expressions. @@ -526,16 +538,16 @@ Address-of operator. (But see the "\" operator for taking a reference.) =item unary * -Dereference-address operator. (Perl's prefix dereferencing +Dereference-address operator. (Perl's prefix dereferencing operators are typed: $, @, %, and &.) =item (TYPE) -Type casting operator. +Type casting operator. =back -=head2 Quote and Quotelike Operators +=head2 Quote and Quote-like Operators While we usually think of quotes as literal values, in Perl they function as operators, providing various kinds of interpolating and @@ -543,7 +555,7 @@ pattern matching capabilities. Perl provides customary quote characters for these behaviors, but also provides a way for you to choose your quote character for any of them. In the following table, a C<{}> represents any pair of delimiters you choose. Non-bracketing delimiters use -the same character fore and aft, but the 4 sorts of brackets +the same character fore and aft, but the 4 sorts of brackets (round, angle, square, curly) will all nest. Customary Generic Meaning Interpolates @@ -558,13 +570,13 @@ the same character fore and aft, but the 4 sorts of brackets For constructs that do interpolation, variables beginning with "C<$>" or "C<@>" are interpolated, as are the following sequences: - \t tab - \n newline - \r return - \f form feed - \b backspace - \a alarm (bell) - \e escape + \t tab (HT, TAB) + \n newline (LF, NL) + \r return (CR) + \f form feed (FF) + \b backspace (BS) + \a alarm (bell) (BEL) + \e escape (ESC) \033 octal char \x1b hex char \c[ control char @@ -575,6 +587,9 @@ are interpolated, as are the following sequences: \E end case modification \Q quote regexp metacharacters till \E +If C<use locale> is in effect, the case map used by C<\l>, C<\L>, C<\u> +and <\U> is taken from the current locale. See L<perllocale>. + Patterns are subject to an additional level of interpretation as a regular expression. This is done as a second pass, after variables are interpolated, so that regular expressions may be incorporated into the @@ -582,13 +597,13 @@ pattern from the variables. If this is not what you want, use C<\Q> to interpolate a variable literally. Apart from the above, there are no multiple levels of interpolation. In -particular, contrary to the expectations of shell programmers, backquotes +particular, contrary to the expectations of shell programmers, back-quotes do I<NOT> interpolate within double quotes, nor do single quotes impede evaluation of variables when used within double quotes. -=head2 Regexp Quotelike Operators +=head2 Regexp Quote-Like Operators -Here are the quotelike operators that apply to pattern +Here are the quote-like operators that apply to pattern matching and related activities. =over 8 @@ -597,7 +612,7 @@ matching and related activities. This is just like the C</pattern/> search, except that it matches only once between calls to the reset() operator. This is a useful -optimization when you only want to see the first occurrence of +optimization when you want to see only the first occurrence of something in each file of a set of files, for instance. Only C<??> patterns local to the current package are reset. @@ -614,20 +629,23 @@ C<!~> operator, the $_ string is searched. (The string specified with C<=~> need not be an lvalue--it may be the result of an expression evaluation, but remember the C<=~> binds rather tightly.) See also L<perlre>. +See L<perllocale> for discussion of additional considerations which apply +when C<use locale> is in effect. Options are: - g Match globally, i.e. find all occurrences. + g Match globally, i.e., find all occurrences. i Do case-insensitive pattern matching. m Treat string as multiple lines. - o Only compile pattern once. + o Compile pattern only once. s Treat string as single line. x Use extended regular expressions. If "/" is the delimiter then the initial C<m> is optional. With the C<m> you can use any pair of non-alphanumeric, non-whitespace characters as delimiters. This is particularly useful for matching Unix path names -that contain "/", to avoid LTS (leaning toothpick syndrome). +that contain "/", to avoid LTS (leaning toothpick syndrome). If "?" is +the delimiter, then the match-only-once rule of C<?PATTERN?> applies. PATTERN may contain variables, which will be interpolated (and the pattern recompiled) every time the pattern search is evaluated. (Note @@ -644,7 +662,7 @@ successfully executed regular expression is used instead. If used in a context that requires a list value, a pattern match returns a list consisting of the subexpressions matched by the parentheses in the -pattern, i.e. ($1, $2, $3...). (Note that here $1 etc. are also set, and +pattern, i.e., (C<$1>, $2, $3...). (Note that here $1 etc. are also set, and that this differs from Perl 4's behavior.) If the match fails, a null array is returned. If the match succeeds, but there were no parentheses, a list value of (1) is returned. @@ -667,8 +685,8 @@ Examples: if (($F1, $F2, $Etc) = ($foo =~ /^(\S+)\s+(\S+)\s*(.*)/)) This last example splits $foo into the first two words and the -remainder of the line, and assigns those three fields to $F1, $F2 and -$Etc. The conditional is true if any variables were assigned, i.e. if +remainder of the line, and assigns those three fields to $F1, $F2, and +$Etc. The conditional is true if any variables were assigned, i.e., if the pattern matched. The C</g> modifier specifies global pattern matching--that is, matching @@ -679,35 +697,93 @@ If there are no parentheses, it returns a list of all the matched strings, as if there were parentheses around the whole pattern. In a scalar context, C<m//g> iterates through the string, returning TRUE -each time it matches, and FALSE when it eventually runs out of -matches. (In other words, it remembers where it left off last time and -restarts the search at that point. You can actually find the current -match position of a string using the pos() function--see L<perlfunc>.) -If you modify the string in any way, the match position is reset to the -beginning. Examples: +each time it matches, and FALSE when it eventually runs out of matches. +(In other words, it remembers where it left off last time and restarts +the search at that point. You can actually find the current match +position of a string or set it using the pos() function; see +L<perlfunc/pos>.) A failed match normally resets the search position to +the beginning of the string, but you can avoid that by adding the "c" +modifier (e.g. C<m//gc>). Modifying the target string also resets the +search position. + +You can intermix C<m//g> matches with C<m/\G.../g>, where C<\G> is a +zero-width assertion that matches the exact position where the previous +C<m//g>, if any, left off. The C<\G> assertion is not supported without +the C</g> modifier; currently, without C</g>, C<\G> behaves just like +C<\A>, but that's accidental and may change in the future. + +Examples: # list context ($one,$five,$fifteen) = (`uptime` =~ /(\d+\.\d+)/g); # scalar context - $/ = ""; $* = 1; # $* deprecated in Perl 5 - while ($paragraph = <>) { + $/ = ""; $* = 1; # $* deprecated in modern perls + while (defined($paragraph = <>)) { while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\s/g) { $sentences++; } } print "$sentences\n"; + # using m//gc with \G + $_ = "ppooqppqq"; + while ($i++ < 2) { + print "1: '"; + print $1 while /(o)/gc; print "', pos=", pos, "\n"; + print "2: '"; + print $1 if /\G(q)/gc; print "', pos=", pos, "\n"; + print "3: '"; + print $1 while /(p)/gc; print "', pos=", pos, "\n"; + } + +The last example should print: + + 1: 'oo', pos=4 + 2: 'q', pos=5 + 3: 'pp', pos=7 + 1: '', pos=7 + 2: 'q', pos=8 + 3: '', pos=8 + +A useful idiom for C<lex>-like scanners is C</\G.../gc>. You can +combine several regexps like this to process a string part-by-part, +doing different actions depending on which regexp matched. Each +regexp tries to match where the previous one leaves off. + + $_ = <<'EOL'; + $url = new URI::URL "http://www/"; die if $url eq "xXx"; + EOL + LOOP: + { + print(" digits"), redo LOOP if /\G\d+\b[,.;]?\s*/gc; + print(" lowercase"), redo LOOP if /\G[a-z]+\b[,.;]?\s*/gc; + print(" UPPERCASE"), redo LOOP if /\G[A-Z]+\b[,.;]?\s*/gc; + print(" Capitalized"), redo LOOP if /\G[A-Z][a-z]+\b[,.;]?\s*/gc; + print(" MiXeD"), redo LOOP if /\G[A-Za-z]+\b[,.;]?\s*/gc; + print(" alphanumeric"), redo LOOP if /\G[A-Za-z0-9]+\b[,.;]?\s*/gc; + print(" line-noise"), redo LOOP if /\G[^A-Za-z0-9]+/gc; + print ". That's all!\n"; + } + +Here is the output (split into several lines): + + line-noise lowercase line-noise lowercase UPPERCASE line-noise + UPPERCASE line-noise lowercase line-noise lowercase line-noise + lowercase lowercase line-noise lowercase lowercase line-noise + MiXeD line-noise. That's all! + =item q/STRING/ =item C<'STRING'> -A single-quoted, literal string. Backslashes are ignored, unless -followed by the delimiter or another backslash, in which case the -delimiter or backslash is interpolated. +A single-quoted, literal string. A backslash represents a backslash +unless followed by the delimiter or another backslash, in which case +the delimiter or backslash is interpolated. $foo = q!I said, "You said, 'She said it.'"!; $bar = q('This is it.'); + $baz = '\n'; # a two-character string =item qq/STRING/ @@ -718,6 +794,7 @@ A double-quoted, interpolated string. $_ .= qq (*** The previous line contains the naughty word "$1".\n) if /(tcl|rexx|python)/; # :-) + $baz = "\n"; # a one-character string =item qx/STRING/ @@ -731,7 +808,7 @@ with $/ or $INPUT_RECORD_SEPARATOR). $today = qx{ date }; -See L<I/O Operators> for more discussion. +See L<"I/O Operators"> for more discussion. =item qw/STRING/ @@ -745,43 +822,50 @@ Some frequently seen examples: use POSIX qw( setlocale localeconv ) @EXPORT = qw( foo bar baz ); +A common mistake is to try to separate the words with comma or to put +comments into a multi-line qw-string. For this reason the C<-w> +switch produce warnings if the STRING contains the "," or the "#" +character. + =item s/PATTERN/REPLACEMENT/egimosx Searches a string for a pattern, and if found, replaces that pattern with the replacement text and returns the number of substitutions -made. Otherwise it returns false (0). +made. Otherwise it returns false (specifically, the empty string). If no string is specified via the C<=~> or C<!~> operator, the C<$_> variable is searched and modified. (The string specified with C<=~> must be a scalar variable, an array element, a hash element, or an assignment -to one of those, i.e. an lvalue.) +to one of those, i.e., an lvalue.) If the delimiter chosen is single quote, no variable interpolation is done on either the PATTERN or the REPLACEMENT. Otherwise, if the PATTERN contains a $ that looks like a variable rather than an end-of-string test, the variable will be interpolated into the pattern -at run-time. If you only want the pattern compiled once the first time +at run-time. If you want the pattern compiled only once the first time the variable is interpolated, use the C</o> option. If the pattern evaluates to a null string, the last successfully executed regular expression is used instead. See L<perlre> for further explanation on these. +See L<perllocale> for discussion of additional considerations which apply +when C<use locale> is in effect. Options are: e Evaluate the right side as an expression. - g Replace globally, i.e. all occurrences. + g Replace globally, i.e., all occurrences. i Do case-insensitive pattern matching. m Treat string as multiple lines. - o Only compile pattern once. + o Compile pattern only once. s Treat string as single line. x Use extended regular expressions. Any non-alphanumeric, non-whitespace delimiter may replace the slashes. If single quotes are used, no interpretation is done on the -replacement string (the C</e> modifier overrides this, however). If -backquotes are used, the replacement string is a command to execute -whose output will be used as the actual replacement text. If the +replacement string (the C</e> modifier overrides this, however). Unlike +Perl 4, Perl 5 treats backticks as normal delimiters; the replacement +text is not evaluated as a command. If the PATTERN is delimited by bracketing quotes, the REPLACEMENT has its own -pair of quotes, which may or may not be bracketing quotes, e.g. +pair of quotes, which may or may not be bracketing quotes, e.g., C<s(foo)(bar)> or C<sE<lt>fooE<gt>/bar/>. A C</e> will cause the replacement portion to be interpreter as a full-fledged Perl expression and eval()ed right then and there. It is, however, syntax checked at @@ -823,11 +907,11 @@ Examples: s/([^ ]*) *([^ ]*)/$2 $1/; # reverse 1st two fields -Note the use of $ instead of \ in the last example. Unlike -B<sed>, we only use the \<I<digit>> form in the left hand side. -Anywhere else it's $<I<digit>>. +Note the use of $ instead of \ in the last example. Unlike +B<sed>, we use the \E<lt>I<digit>E<gt> form in only the left hand side. +Anywhere else it's $E<lt>I<digit>E<gt>. -Occasionally, you can't just use a C</g> to get all the changes +Occasionally, you can't use just a C</g> to get all the changes to occur. Here are two common cases: # put commas in the right places in an integer @@ -846,12 +930,12 @@ Translates all occurrences of the characters found in the search list with the corresponding character in the replacement list. It returns the number of characters replaced or deleted. If no string is specified via the =~ or !~ operator, the $_ string is translated. (The -string specified with =~ must be a scalar variable, an array element, -or an assignment to one of those, i.e. an lvalue.) For B<sed> devotees, -C<y> is provided as a synonym for C<tr>. If the SEARCHLIST is -delimited by bracketing quotes, the REPLACEMENTLIST has its own pair of -quotes, which may or may not be bracketing quotes, e.g. C<tr[A-Z][a-z]> -or C<tr(+-*/)/ABCD/>. +string specified with =~ must be a scalar variable, an array element, a +hash element, or an assignment to one of those, i.e., an lvalue.) +For B<sed> devotees, C<y> is provided as a synonym for C<tr>. If the +SEARCHLIST is delimited by bracketing quotes, the REPLACEMENTLIST has +its own pair of quotes, which may or may not be bracketing quotes, +e.g., C<tr[A-Z][a-z]> or C<tr(+-*/)/ABCD/>. Options: @@ -914,7 +998,7 @@ an eval(): =head2 I/O Operators -There are several I/O operators you should know about. +There are several I/O operators you should know about. A string is enclosed by backticks (grave accents) first undergoes variable substitution just like a double quoted string. It is then interpreted as a command, and the output of that command is the value @@ -929,19 +1013,19 @@ data--newlines remain newlines. Unlike in any of the shells, single quotes do not hide variable names in the command from interpretation. To pass a $ through to the shell you need to hide it with a backslash. The generalized form of backticks is C<qx//>. (Because backticks -always undergo shell expansion as well, see L<perlsec> for +always undergo shell expansion as well, see L<perlsec> for security concerns.) Evaluating a filehandle in angle brackets yields the next line from -that file (newline included, so it's never false until end of file, at -which time an undefined value is returned). Ordinarily you must assign -that value to a variable, but there is one situation where an automatic -assignment happens. I<If and ONLY if> the input symbol is the only -thing inside the conditional of a C<while> loop, the value is -automatically assigned to the variable C<$_>. The assigned value is -then tested to see if it is defined. (This may seem like an odd thing -to you, but you'll use the construct in almost every Perl script you -write.) Anyway, the following lines are equivalent to each other: +that file (newline, if any, included), or C<undef> at end of file. +Ordinarily you must assign that value to a variable, but there is one +situation where an automatic assignment happens. I<If and ONLY if> the +input symbol is the only thing inside the conditional of a C<while> or +C<for(;;)> loop, the value is automatically assigned to the variable +C<$_>. The assigned value is then tested to see if it is defined. +(This may seem like an odd thing to you, but you'll use the construct +in almost every Perl script you write.) Anyway, the following lines +are equivalent to each other: while (defined($_ = <STDIN>)) { print; } while (<STDIN>) { print; } @@ -949,13 +1033,13 @@ write.) Anyway, the following lines are equivalent to each other: print while defined($_ = <STDIN>); print while <STDIN>; -The filehandles STDIN, STDOUT and STDERR are predefined. (The -filehandles C<stdin>, C<stdout> and C<stderr> will also work except in +The filehandles STDIN, STDOUT, and STDERR are predefined. (The +filehandles C<stdin>, C<stdout>, and C<stderr> will also work except in packages, where they would be interpreted as local identifiers rather than global.) Additional filehandles may be created with the open() function. See L<perlfunc/open()> for details on this. -If a <FILEHANDLE> is used in a context that is looking for a list, a +If a E<lt>FILEHANDLEE<gt> is used in a context that is looking for a list, a list consisting of all the input lines is returned, one line per list element. It's easy to make a I<LARGE> data space this way, so use with care. @@ -984,9 +1068,9 @@ is equivalent to the following Perl-like pseudo code: except that it isn't so cumbersome to say, and will actually work. It really does shift array @ARGV and put the current filename into variable -$ARGV. It also uses filehandle I<ARGV> internally--E<lt>E<gt> is just a synonym -for <ARGV>, which is magical. (The pseudo code above doesn't work -because it treats <ARGV> as non-magical.) +$ARGV. It also uses filehandle I<ARGV> internally--E<lt>E<gt> is just a +synonym for E<lt>ARGVE<gt>, which is magical. (The pseudo code above +doesn't work because it treats E<lt>ARGVE<gt> as non-magical.) You can modify @ARGV before the first E<lt>E<gt> as long as the array ends up containing the list of filenames you really want. Line numbers (C<$.>) @@ -994,7 +1078,7 @@ continue as if the input were one big happy file. (But see example under eof() for how to reset line numbers on each file.) If you want to set @ARGV to your own list of files, go right ahead. If -you want to pass switches into your script, you can use one of the +you want to pass switches into your script, you can use one of the Getopts modules or put a loop on the front like this: while ($_ = $ARGV[0], /^-/) { @@ -1013,7 +1097,7 @@ this it will assume you are processing another @ARGV list, and if you haven't set @ARGV, will input from STDIN. If the string inside the angle brackets is a reference to a scalar -variable (e.g. <$foo>), then that variable contains the name of the +variable (e.g., E<lt>$fooE<gt>), then that variable contains the name of the filehandle to input from, or a reference to the same. For example: $fh = \*STDIN; @@ -1025,7 +1109,7 @@ as a filename pattern to be globbed, and either a list of filenames or the next filename in the list is returned, depending on context. One level of $ interpretation is done first, but you can't say C<E<lt>$fooE<gt>> because that's an indirect filehandle as explained in the previous -paragraph. In older version of Perl, programmers would insert curly +paragraph. (In older versions of Perl, programmers would insert curly brackets to force interpretation as a filename glob: C<E<lt>${foo}E<gt>>. These days, it's considered cleaner to call the internal function directly as C<glob($foo)>, which is probably the right way to have done it in the @@ -1050,11 +1134,11 @@ machine.) Of course, the shortest way to do the above is: chmod 0644, <*.c>; Because globbing invokes a shell, it's often faster to call readdir() yourself -and just do your own grep() on the filenames. Furthermore, due to its current -implementation of using a shell, the glob() routine may get "Arg list too +and do your own grep() on the filenames. Furthermore, due to its current +implementation of using a shell, the glob() routine may get "Arg list too long" errors (unless you've installed tcsh(1L) as F</bin/csh>). -A glob only evaluates its (embedded) argument when it is starting a new +A glob evaluates its (embedded) argument only when it is starting a new list. All values must be read before it will start over. In a list context this isn't important, because you automatically get them all anyway. In a scalar context, however, the operator returns the next value @@ -1069,11 +1153,11 @@ than $file = <blurch*>; because the latter will alternate between returning a filename and -returning FALSE. +returning FALSE. It you're trying to do variable interpolation, it's definitely better to use the glob() function, because the older notation can cause people -to become confused with the indirect filehandle notatin. +to become confused with the indirect filehandle notation. @files = glob("$dir/*.[ch]"); @files = glob($files[$i]); @@ -1090,19 +1174,19 @@ compile time. You can say 'Now is the time for all' . "\n" . 'good men to come to.' -and this all reduces to one string internally. Likewise, if +and this all reduces to one string internally. Likewise, if you say foreach $file (@filenames) { if (-s $file > 5 + 100 * 2**16) { ... } - } + } -the compiler will pre-compute the number that +the compiler will precompute the number that expression represents so that the interpreter won't have to. -=head2 Integer arithmetic +=head2 Integer Arithmetic By default Perl assumes that it must do most of its arithmetic in floating point. But by saying @@ -1111,9 +1195,35 @@ floating point. But by saying you may tell the compiler that it's okay to use integer operations from here to the end of the enclosing BLOCK. An inner BLOCK may -countermand this by saying +countermand this by saying no integer; which lasts until the end of that BLOCK. +The bitwise operators ("&", "|", "^", "~", "<<", and ">>") always +produce integral results. However, C<use integer> still has meaning +for them. By default, their results are interpreted as unsigned +integers. However, if C<use integer> is in effect, their results are +interpreted as signed integers. For example, C<~0> usually evaluates +to a large integral value. However, C<use integer; ~0> is -1. + +=head2 Floating-point Arithmetic + +While C<use integer> provides integer-only arithmetic, there is no +similar ways to provide rounding or truncation at a certain number of +decimal places. For rounding to a certain number of digits, sprintf() +or printf() is usually the easiest route. + +The POSIX module (part of the standard perl distribution) implements +ceil(), floor(), and a number of other mathematical and trigonometric +functions. The Math::Complex module (part of the standard perl +distribution) defines a number of mathematical functions that can also +work on real numbers. Math::Complex not as efficient as POSIX, but +POSIX can't work with complex numbers. + +Rounding in financial applications can have serious implications, and +the rounding method used should be specified precisely. In these +cases, it probably pays not to trust whichever system rounding is +being used by Perl, but to instead implement the rounding function you +need yourself. diff --git a/pod/perlovl.pod b/pod/perlovl.pod deleted file mode 100644 index 208456d239..0000000000 --- a/pod/perlovl.pod +++ /dev/null @@ -1,15 +0,0 @@ -=head1 NAME - -perlovl - overload perl mathematical functions [superseded] - -=head1 DESCRIPTION - -This man page has been superseded by L<overload>. - -=head1 WARNING - -The old interface involving %OVERLOAD is deprecated and will go away -RSN. Convert your scripts to -use overload ...; -style. - diff --git a/pod/perlpod.pod b/pod/perlpod.pod index 6566ffb357..6a578caec3 100644 --- a/pod/perlpod.pod +++ b/pod/perlpod.pod @@ -31,18 +31,21 @@ use however it pleases. Currently recognized commands are =back =cut =pod + =for X + =begin X + =end X The "=pod" directive does nothing beyond telling the compiler to lay -off of through the next "=cut". It's useful for adding another -paragraph to the doc if you're mixing up code and pod a lot. +off parsing code through the next "=cut". It's useful for adding +another paragraph to the doc if you're mixing up code and pod a lot. -Head1 and head2 produce first and second level headings, with the text on -the same paragraph as "=headn" forming the heading description. +Head1 and head2 produce first and second level headings, with the text in +the same paragraph as the "=headn" directive forming the heading description. -Item, over, and back require a little more explanation: Over starts a -section specifically for the generation of a list using =item commands. At -the end of your list, use =back to end it. You will probably want to give -"4" as the number to =over, as some formatters will use this for indention. +Item, over, and back require a little more explanation: "=over" starts a +section specifically for the generation of a list using "=item" commands. At +the end of your list, use "=back" to end it. You will probably want to give +"4" as the number to "=over", as some formatters will use this for indentation. This should probably be a default. Note also that there are some basic rules to using =item: don't use them outside of an =over/=back block, use at least one inside an =over/=back block, you don't _have_ to include the =back if @@ -51,11 +54,48 @@ items consistent: either use "=item *" for all of them, to produce bullets, or use "=item 1.", "=item 2.", etc., to produce numbered lists, or use "=item foo", "=item bar", etc., i.e., things that looks nothing like bullets or numbers. If you start with bullets or numbers, stick with them, as many -formatters you the first =item type to decide how to format the list. +formatters use the first "=item" type to decide how to format the list. -And don't forget, when using any command, that that command lasts up until +For, begin, and end let you include sections that are not interpreted +as pod text, but passed directly to particular formatters. A formatter +that can utilize that format will use the section, otherwise it will be +completely ignored. The directive "=for" specifies that the entire next +paragraph is in the format indicated by the first word after +"=for", like this: + + =for html <br> + <p> This is a raw HTML paragraph </p> + +The paired commands "=begin" and "=end" work very similarly to "=for", but +instead of only accepting a single paragraph, all text from "=begin" to a +paragraph with a matching "=end" are treated as a particular format. + +Here are some examples of how to use these: + + =begin html + + <br>Figure 1.<IMG SRC="figure1.png"><br> + + =end html + + =begin text + + --------------- + | foo | + | bar | + --------------- + + ^^^^ Figure 1. ^^^^ + + =end text + +Some format names that formatters currently are known to accept include +"roff", "man", "latex", "tex", "text", and "html". (Some formatters will +treat some of these as synonyms.) + +And don't forget, when using any command, that the command lasts up until the end of the B<paragraph>, not the line. Hence in the examples below, you -can see the blank lines after each command to end it's paragraph. +can see the empty lines after each command to end its paragraph. Some examples of lists include: @@ -92,24 +132,34 @@ here and in commands: I<text> italicize text, used for emphasis or variables B<text> embolden text, used for switches and programs S<text> text contains non-breaking spaces - C<code> literal code + C<code> literal code L<name> A link (cross reference) to name - L<name> manpage - L<name/ident> item in manpage - L<name/"sec"> section in other manpage - L<"sec"> section in this manpage + L<name> manual page + L<name/ident> item in manual page + L<name/"sec"> section in other manual page + L<"sec"> section in this manual page (the quotes are optional) L</"sec"> ditto F<file> Used for filenames X<index> An index entry - Z<> A zero-width character + Z<> A zero-width character + E<escape> A named character (very similar to HTML escapes) + E<lt> A literal < + E<gt> A literal > + (these are optional except in other interior + sequences and when preceded by a capital letter) + E<n> Character number n (probably in ASCII) + E<html> Some non-numeric HTML entity, such + as E<Agrave> + +=back That's it. The intent is simplicity, not power. I wanted paragraphs to look like paragraphs (block format), so that they stand out visually, and so that I could run them through fmt easily to reformat them (that's F7 in my version of B<vi>). I wanted the translator (and not me) to worry about whether " or ' is a left quote or a right quote -within filled text, and I wanted it to leave the quotes alone dammit in +within filled text, and I wanted it to leave the quotes alone, dammit, in verbatim mode, so I could slurp in a working program, shift it over 4 spaces, and have it print out, er, verbatim. And presumably in a constant width font. @@ -134,22 +184,53 @@ B<pod2html>, B<pod2latex>, and B<pod2fm>. =head1 Embedding Pods in Perl Modules You can embed pod documentation in your Perl scripts. Start your -documentation with a =head1 command at the beg, and end it with -an =cut command. Perl will ignore the pod text. See any of the -supplied library modules for examples. If you're going to put -your pods at the end of the file, and you're using an __END__ -or __DATA__ cut mark, make sure to put a blank line there before -the first pod directive. +documentation with a "=head1" command at the beginning, and end it +with a "=cut" command. Perl will ignore the pod text. See any of the +supplied library modules for examples. If you're going to put your +pods at the end of the file, and you're using an __END__ or __DATA__ +cut mark, make sure to put an empty line there before the first pod +directive. __END__ + =head1 NAME modern - I am a modern module -If you had not had that blank line there, then the translators wouldn't +If you had not had that empty line there, then the translators wouldn't have seen it. +=head1 Common Pod Pitfalls + +=over 4 + +=item * + +Pod translators usually will require paragraphs to be separated by +completely empty lines. If you have an apparently empty line with +some spaces on it, this can cause odd formatting. + +=item * + +Translators will mostly add wording around a LE<lt>E<gt> link, so that +C<LE<lt>foo(1)E<gt>> becomes "the I<foo>(1) manpage", for example (see +B<pod2man> for details). Thus, you shouldn't write things like C<the +LE<lt>fooE<gt> manpage>, if you want the translated document to read +sensibly. + +=item * + +The script F<pod/checkpods.PL> in the Perl source distribution +provides skeletal checking for lines that look empty but aren't +B<only>, but is there as a placeholder until someone writes +Pod::Checker. The best way to check your pod is to pass it through +one or more translators and proofread the result, or print out the +result and proofread that. Some of the problems found may be bugs in +the translators, which you may or may not wish to work around. + +=back + =head1 SEE ALSO L<pod2man> and L<perlsyn/"PODs: Embedded Documentation"> diff --git a/pod/perlre.pod b/pod/perlre.pod index 5446746e91..2b24379c8b 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -5,18 +5,40 @@ perlre - Perl regular expressions =head1 DESCRIPTION This page describes the syntax of regular expressions in Perl. For a -description of how to actually I<use> regular expressions in matching +description of how to I<use> regular expressions in matching operations, plus various examples of the same, see C<m//> and C<s///> in L<perlop>. -The matching operations can -have various modifiers, some of which relate to the interpretation of -the regular expression inside. These are: +The matching operations can have various modifiers. The modifiers +which relate to the interpretation of the regular expression inside +are listed below. For the modifiers that alter the behaviour of the +operation, see L<perlop/"m//"> and L<perlop/"s//">. - i Do case-insensitive pattern matching. - m Treat string as multiple lines. - s Treat string as single line. - x Extend your pattern's legibility with whitespace and comments. +=over 4 + +=item i + +Do case-insensitive pattern matching. + +If C<use locale> is in effect, the case map is taken from the current +locale. See L<perllocale>. + +=item m + +Treat string as multiple lines. That is, change "^" and "$" from matching +at only the very start or end of the string to the start or end of any +line anywhere within the string, + +=item s + +Treat string as single line. That is, change "." to match any character +whatsoever, even a newline, which it normally would not match. + +=item x + +Extend your pattern's legibility by permitting whitespace and comments. + +=back These are usually written as "the C</x> modifier", even though the delimiter in question might not actually be a slash. In fact, any of these @@ -24,13 +46,15 @@ modifiers may also be embedded within the regular expression itself using the new C<(?...)> construct. See below. The C</x> modifier itself needs a little more explanation. It tells -the regular expression parser to ignore whitespace that is not -backslashed or within a character class. You can use this to break up +the regular expression parser to ignore whitespace that is neither +backslashed nor within a character class. You can use this to break up your regular expression into (slightly) more readable parts. The C<#> character is also treated as a metacharacter introducing a comment, -just as in ordinary Perl code. Taken together, these features go a -long way towards making Perl 5 a readable language. See the C comment -deletion code in L<perlop>. +just as in ordinary Perl code. This also means that if you want real +whitespace or C<#> characters in the pattern that you'll have to either +escape them or encode them using octal or hex escapes. Taken together, +these features go a long way towards making Perl's regular expressions +more readable. See the C comment deletion code in L<perlop>. =head2 Regular Expressions @@ -51,8 +75,8 @@ meanings: () Grouping [] Character class -By default, the "^" character is guaranteed to match only at the -beginning of the string, the "$" character only at the end (or before the +By default, the "^" character is guaranteed to match at only the +beginning of the string, the "$" character at only the end (or before the newline at the end) and Perl does certain optimizations with the assumption that the string contains only one line. Embedded newlines will not be matched by "^" or "$". You may, however, wish to treat a @@ -60,10 +84,10 @@ string as a multi-line buffer, such that the "^" will match after any newline within the string, and "$" will match before any newline. At the cost of a little more overhead, you can do this by using the /m modifier on the pattern match operator. (Older programs did this by setting C<$*>, -but this practice is deprecated in Perl 5.) +but this practice is now deprecated.) To facilitate multi-line substitutions, the "." character never matches a -newline unless you use the C</s> modifier, which tells Perl to pretend +newline unless you use the C</s> modifier, which in effect tells Perl to pretend the string is a single line--even if it isn't. The C</s> modifier also overrides the setting of C<$*>, in case you have some (badly behaved) older code that sets it in another module. @@ -83,12 +107,10 @@ modifier to C<{1,}>, and the "?" modifier to C<{0,1}>. n and m are limited to integral values less than 65536. By default, a quantified subpattern is "greedy", that is, it will match as -many times as possible without causing the rest of the pattern not to match. -The standard quantifiers are all "greedy", in that they match as many -occurrences as possible (given a particular starting location) without -causing the pattern to fail. If you want it to match the minimum number -of times possible, follow the quantifier with a "?" after any of them. -Note that the meanings don't change, just the "gravity": +many times as possible (given a particular starting location) while still +allowing the rest of the pattern to match. If you want it to match the +minimum number of times possible, follow the quantifier with a "?". Note +that the meanings don't change, just the "greediness": *? Match 0 or more times +? Match 1 or more times @@ -97,15 +119,15 @@ Note that the meanings don't change, just the "gravity": {n,}? Match at least n times {n,m}? Match at least n but not more than m times -Since patterns are processed as double quoted strings, the following +Because patterns are processed as double quoted strings, the following also work: - \t tab - \n newline - \r return - \f form feed - \a alarm (bell) - \e escape (think troff) + \t tab (HT, TAB) + \n newline (LF, NL) + \r return (CR) + \f form feed (FF) + \a alarm (bell) (BEL) + \e escape (think troff) (ESC) \033 octal char (think of a PDP-11) \x1B hex char \c[ control char @@ -116,6 +138,9 @@ also work: \E end case modification (think vi) \Q quote regexp metacharacters till \E +If C<use locale> is in effect, the case map used by C<\l>, C<\L>, C<\u> +and <\U> is taken from the current locale. See L<perllocale>. + In addition, Perl defines the following: \w Match a "word" character (alphanumeric plus "_") @@ -126,48 +151,58 @@ In addition, Perl defines the following: \D Match a non-digit character Note that C<\w> matches a single alphanumeric character, not a whole -word. To match a word you'd need to say C<\w+>. You may use C<\w>, -C<\W>, C<\s>, C<\S>, C<\d> and C<\D> within character classes (though not -as either end of a range). +word. To match a word you'd need to say C<\w+>. If C<use locale> is in +effect, the list of alphabetic characters generated by C<\w> is taken +from the current locale. See L<perllocale>. You may use C<\w>, C<\W>, +C<\s>, C<\S>, C<\d>, and C<\D> within character classes (though not as +either end of a range). Perl defines the following zero-width assertions: \b Match a word boundary \B Match a non-(word boundary) - \A Match only at beginning of string - \Z Match only at end of string (or before newline at the end) - \G Match only where previous m//g left off + \A Match at only beginning of string + \Z Match at only end of string (or before newline at the end) + \G Match only where previous m//g left off (works only with /g) A word boundary (C<\b>) is defined as a spot between two characters that -has a C<\w> on one side of it and and a C<\W> on the other side of it (in +has a C<\w> on one side of it and a C<\W> on the other side of it (in either order), counting the imaginary characters off the beginning and end of the string as matching a C<\W>. (Within character classes C<\b> represents backspace rather than a word boundary.) The C<\A> and C<\Z> are just like "^" and "$" except that they won't match multiple times when the C</m> modifier is used, while "^" and "$" will match at every internal line boundary. To match the actual end of the string, not ignoring newline, -you can use C<\Z(?!\n)>. +you can use C<\Z(?!\n)>. The C<\G> assertion can be used to chain global +matches (using C<m//g>), as described in +L<perlop/"Regexp Quote-Like Operators">. -When the bracketing construct C<( ... )> is used, \<digit> matches the +It is also useful when writing C<lex>-like scanners, when you have several +regexps which you want to match against consequent substrings of your +string, see the previous reference. +The actual location where C<\G> will match can also be influenced +by using C<pos()> as an lvalue. See L<perlfunc/pos>. + +When the bracketing construct C<( ... )> is used, \E<lt>digitE<gt> matches the digit'th substring. Outside of the pattern, always use "$" instead of "\" -in front of the digit. (While the \<digit> notation can on rare occasion work +in front of the digit. (While the \E<lt>digitE<gt> notation can on rare occasion work outside the current pattern, this should not be relied upon. See the -WARNING below.) The scope of $<digit> (and C<$`>, C<$&>, and C<$'>) +WARNING below.) The scope of $E<lt>digitE<gt> (and C<$`>, C<$&>, and C<$'>) extends to the end of the enclosing BLOCK or eval string, or to the next successful pattern match, whichever comes first. If you want to use -parentheses to delimit a subpattern (e.g. a set of alternatives) without -saving it as a subpattern, follow the ( with a ?. +parentheses to delimit a subpattern (e.g., a set of alternatives) without +saving it as a subpattern, follow the ( with a ?:. You may have as many parentheses as you wish. If you have more than 9 substrings, the variables $10, $11, ... refer to the corresponding substring. Within the pattern, \10, \11, etc. refer back -to substrings if there have been at least that many left parens before +to substrings if there have been at least that many left parentheses before the backreference. Otherwise (for backward compatibility) \10 is the same as \010, a backspace, and \11 the same as \011, a tab. And so on. (\1 through \9 are always backreferences.) C<$+> returns whatever the last bracket match matched. C<$&> returns the -entire matched string. ($0 used to return the same thing, but not any +entire matched string. (C<$0> used to return the same thing, but not any more.) C<$`> returns everything before the matched string. C<$'> returns everything after the matched string. Examples: @@ -179,28 +214,39 @@ everything after the matched string. Examples: $seconds = $3; } +Once perl sees that you need one of C<$&>, C<$`> or C<$'> anywhere in +the program, it has to provide them on each and every pattern match. +This can slow your program down. The same mechanism that handles +these provides for the use of $1, $2, etc., so you pay the same price +for each regexp that contains capturing parentheses. But if you never +use $&, etc., in your script, then regexps I<without> capturing +parentheses won't be penalized. So avoid $&, $', and $` if you can, +but if you can't (and some algorithms really appreciate them), once +you've used them once, use them at will, because you've already paid +the price. + You will note that all backslashed metacharacters in Perl are alphanumeric, such as C<\b>, C<\w>, C<\n>. Unlike some other regular expression languages, there are no backslashed symbols that aren't alphanumeric. -So anything that looks like \\, \(, \), \<, \>, \{, or \} is always +So anything that looks like \\, \(, \), \E<lt>, \E<gt>, \{, or \} is always interpreted as a literal character, not a metacharacter. This makes it simple to quote a string that you want to use for a pattern but that -you are afraid might contain metacharacters. Simply quote all the +you are afraid might contain metacharacters. Quote simply all the non-alphanumeric characters: $pattern =~ s/(\W)/\\$1/g; -You can also use the built-in quotemeta() function to do this. +You can also use the builtin quotemeta() function to do this. An even easier way to quote metacharacters right in the match operator is to say /$unquoted\Q$quoted\E$unquoted/ -Perl 5 defines a consistent extension syntax for regular expressions. -The syntax is a pair of parens with a question mark as the first thing -within the parens (this was a syntax error in Perl 4). The character -after the question mark gives the function of the extension. Several -extensions are already supported: +Perl defines a consistent extension syntax for regular expressions. +The syntax is a pair of parentheses with a question mark as the first +thing within the parentheses (this was a syntax error in older +versions of Perl). The character after the question mark gives the +function of the extension. Several extensions are already supported: =over 10 @@ -211,7 +257,7 @@ whitespace formatting, a simple C<#> will suffice. =item (?:regexp) -This groups things like "()" but doesn't make backrefences like "()" does. So +This groups things like "()" but doesn't make backreferences like "()" does. So split(/\b(?:a|b|c)\b/) @@ -235,7 +281,7 @@ use this for lookbehind: C</(?!foo)bar/> will not find an occurrence of "bar" that is preceded by something which is not "foo". That's because the C<(?!foo)> is just saying that the next thing cannot be "foo"--and it's not, it's a "bar", so "foobar" will match. You would have to do -something like C</(?foo)...bar/> for that. We say "like" because there's +something like C</(?!foo)...bar/> for that. We say "like" because there's the case of your "bar" not having three characters before it. You could cover that this way: C</(?:(?!foo)...|^..?)bar/>. Sometimes it's still easier just to say: @@ -248,7 +294,7 @@ easier just to say: One or more embedded pattern-match modifiers. This is particularly useful for patterns that are specified in a table somewhere, some of which want to be case sensitive, and some of which don't. The case -insensitive ones merely need to include C<(?i)> at the front of the +insensitive ones need to include merely C<(?i)> at the front of the pattern. For example: $pattern = "foobar"; @@ -291,7 +337,7 @@ When the match runs, the first part of the regular expression (C<\b(foo)>) finds a possible match right at the beginning of the string, and loads up $1 with "Foo". However, as soon as the matching engine sees that there's no whitespace following the "Foo" that it had saved in $1, it realizes its -mistake and starts over again one character after where it had had the +mistake and starts over again one character after where it had the tentative match. This time it goes all the way until the next occurrence of "foo". The complete regular expression matches this time, and you get the expected output of "table follows foo." @@ -330,7 +376,7 @@ That won't work at all, because C<.*> was greedy and gobbled up the whole string. As C<\d*> can match on an empty string the complete regular expression matched successfully. - Beginning is <I have 2: 53147>, number is <>. + Beginning is <I have 2 numbers: 53147>, number is <>. Here are some variants, most of which don't work: @@ -370,11 +416,10 @@ As you see, this can be a bit tricky. It's important to realize that a regular expression is merely a set of assertions that gives a definition of success. There may be 0, 1, or several different ways that the definition might succeed against a particular string. And if there are -multiple ways it might succeed, you need to understand backtracking in -order to know which variety of success you will achieve. +multiple ways it might succeed, you need to understand backtracking to know which variety of success you will achieve. When using lookahead assertions and negations, this can all get even -tricker. Imagine you'd like to find a sequence of nondigits not +tricker. Imagine you'd like to find a sequence of non-digits not followed by "123". You might try to write that as $_ = "ABC123"; @@ -401,23 +446,23 @@ This prints 3: got AB 4: got ABC -You might have expected test 3 to fail because it just seems to a more +You might have expected test 3 to fail because it seems to a more general purpose version of test 1. The important difference between them is that test 3 contains a quantifier (C<\D*>) and so can use backtracking, whereas test 1 will not. What's happening is that you've asked "Is it true that at the start of $x, following 0 or more -nondigits, you have something that's not 123?" If the pattern matcher had +non-digits, you have something that's not 123?" If the pattern matcher had let C<\D*> expand to "ABC", this would have caused the whole pattern to -fail. +fail. The search engine will initially match C<\D*> with "ABC". Then it will try to match C<(?!123> with "123" which, of course, fails. But because a quantifier (C<\D*>) has been used in the regular expression, the search engine can backtrack and retry the match differently -in the hope of matching the complete regular expression. +in the hope of matching the complete regular expression. -Well now, +Well now, the pattern really, I<really> wants to succeed, so it uses the -standard regexp backoff-and-retry and lets C<\D*> expand to just "AB" this +standard regexp back-off-and-retry and lets C<\D*> expand to just "AB" this time. Now there's indeed something following "AB" that is not "123". It's in fact "C123", which suffices. @@ -460,7 +505,7 @@ routines, here are the pattern-matching rules not described above. Any single character matches itself, unless it is a I<metacharacter> with a special meaning described here or above. You can cause characters which normally function as metacharacters to be interpreted -literally by prefixing them with a "\" (e.g. "\." matches a ".", not any +literally by prefixing them with a "\" (e.g., "\." matches a ".", not any character; "\\" matches a "\"). A series of characters matches that series of characters in the target string, so the pattern C<blurfl> would match "blurfl" in the target string. @@ -476,7 +521,7 @@ Characters may be specified using a metacharacter syntax much like that used in C: "\n" matches a newline, "\t" a tab, "\r" a carriage return, "\f" a form feed, etc. More generally, \I<nnn>, where I<nnn> is a string of octal digits, matches the character whose ASCII value is I<nnn>. -Similarly, \xI<nn>, where I<nn> are hexidecimal digits, matches the +Similarly, \xI<nn>, where I<nn> are hexadecimal digits, matches the character whose ASCII value is I<nn>. The expression \cI<x> matches the ASCII character control-I<x>. Finally, the "." metacharacter matches any character except "\n" (unless you use C</s>). @@ -500,7 +545,7 @@ Subpatterns are numbered based on the left to right order of their opening parenthesis. Note that a backreference matches whatever actually matched the subpattern in the string being examined, not the rules for that subpattern. Therefore, C<(0|0x)\d*\s\1\d*> will -match "0x1234 0x4321",but not "0x1234 01234", since subpattern 1 +match "0x1234 0x4321",but not "0x1234 01234", because subpattern 1 actually matched "0x", even though the rule C<0|0x> could potentially match the leading 0 in the second number. @@ -512,7 +557,7 @@ Some people get too used to writing things like This is grandfathered for the RHS of a substitute to avoid shocking the B<sed> addicts, but it's a dirty habit to get into. That's because in -PerlThink, the right-hand side of a C<s///> is a double-quoted string. C<\1> in +PerlThink, the righthand side of a C<s///> is a double-quoted string. C<\1> in the usual double-quoted string means a control-A. The customary Unix meaning of C<\1> is kludged in for C<s///>. However, if you get into the habit of doing that, you get yourself into trouble if you then add an C</e> @@ -528,3 +573,7 @@ You can't disambiguate that by saying C<\{1}000>, whereas you can fix it with C<${1}000>. Basically, the operation of interpolation should not be confused with the operation of matching a backreference. Certainly they mean two different things on the I<left> side of the C<s///>. + +=head2 SEE ALSO + +"Mastering Regular Expressions" (see L<perlbook>) by Jeffrey Friedl. diff --git a/pod/perlref.pod b/pod/perlref.pod index d528bc8797..6aa086088d 100644 --- a/pod/perlref.pod +++ b/pod/perlref.pod @@ -7,40 +7,49 @@ perlref - Perl references and nested data structures Before release 5 of Perl it was difficult to represent complex data structures, because all references had to be symbolic, and even that was difficult to do when you wanted to refer to a variable rather than a -symbol table entry. Perl 5 not only makes it easier to use symbolic +symbol table entry. Perl not only makes it easier to use symbolic references to variables, but lets you have "hard" references to any piece -of data. Any scalar may hold a hard reference. Since arrays and hashes +of data. Any scalar may hold a hard reference. Because arrays and hashes contain scalars, you can now easily build arrays of arrays, arrays of hashes, hashes of arrays, arrays of hashes of functions, and so on. Hard references are smart--they keep track of reference counts for you, -automatically freeing the thing referred to when its reference count -goes to zero. If that thing happens to be an object, the object is -destructed. See L<perlobj> for more about objects. (In a sense, -everything in Perl is an object, but we usually reserve the word for -references to objects that have been officially "blessed" into a class package.) - -A symbolic reference contains the name of a variable, just as a -symbolic link in the filesystem merely contains the name of a file. -The C<*glob> notation is a kind of symbolic reference. Hard references -are more like hard links in the file system: merely another way -at getting at the same underlying object, irrespective of its name. - -"Hard" references are easy to use in Perl. There is just one -overriding principle: Perl does no implicit referencing or -dereferencing. When a scalar is holding a reference, it always behaves -as a scalar. It doesn't magically start being an array or a hash -unless you tell it so explicitly by dereferencing it. - -References can be constructed several ways. +automatically freeing the thing referred to when its reference count goes +to zero. (Note: The reference counts for values in self-referential or +cyclic data structures may not go to zero without a little help; see +L<perlobj/"Two-Phased Garbage Collection"> for a detailed explanation. +If that thing happens to be an object, the object is destructed. See +L<perlobj> for more about objects. (In a sense, everything in Perl is an +object, but we usually reserve the word for references to objects that +have been officially "blessed" into a class package.) + +Symbolic references are names of variables or other objects, just as a +symbolic link in a Unix filesystem contains merely the name of a file. +The C<*glob> notation is a kind of symbolic reference. (Symbolic +references are sometimes called "soft references", but please don't call +them that; references are confusing enough without useless synonyms.) + +In contrast, hard references are more like hard links in a Unix file +system: They are used to access an underlying object without concern for +what its (other) name is. When the word "reference" is used without an +adjective, like in the following paragraph, it usually is talking about a +hard reference. + +References are easy to use in Perl. There is just one overriding +principle: Perl does no implicit referencing or dereferencing. When a +scalar is holding a reference, it always behaves as a simple scalar. It +doesn't magically start being an array or hash or subroutine; you have to +tell it explicitly to do so, by dereferencing it. + +References can be constructed in several ways. =over 4 =item 1. By using the backslash operator on a variable, subroutine, or value. -(This works much like the & (address-of) operator works in C.) Note -that this typically creates I<ANOTHER> reference to a variable, since +(This works much like the & (address-of) operator in C.) Note +that this typically creates I<ANOTHER> reference to a variable, because there's already a reference to the variable in the symbol table. But the symbol table reference might go away, and you'll still have the reference that the backslash returned. Here are some examples: @@ -49,8 +58,13 @@ reference that the backslash returned. Here are some examples: $arrayref = \@ARGV; $hashref = \%ENV; $coderef = \&handler; - $globref = \*STDOUT; + $globref = \*foo; +It isn't possible to create a true reference to an IO handle (filehandle or +dirhandle) using the backslash operator. See the explanation of the +*foo{THING} syntax below. (However, you're apt to find Perl code +out there using globrefs as though they were IO handles, which is +grandfathered into continued functioning.) =item 2. @@ -60,17 +74,20 @@ brackets: $arrayref = [1, 2, ['a', 'b', 'c']]; Here we've constructed a reference to an anonymous array of three elements -whose final element is itself reference to another anonymous array of three +whose final element is itself a reference to another anonymous array of three elements. (The multidimensional syntax described later can be used to -access this. For example, after the above, $arrayref-E<gt>[2][1] would have +access this. For example, after the above, C<$arrayref-E<gt>[2][1]> would have the value "b".) Note that taking a reference to an enumerated list is not the same as using square brackets--instead it's the same as creating a list of references! - @list = (\$a, \$b, \$c); - @list = \($a, $b, $c); # same thing! + @list = (\$a, \@b, \%c); + @list = \($a, @b, %c); # same thing! + +As a special case, C<\(@foo)> returns a list of references to the contents +of C<@foo>, not a reference to C<@foo> itself. Likewise for C<%foo>. =item 3. @@ -126,8 +143,8 @@ context even when it's called outside of the context. In human terms, it's a funny way of passing arguments to a subroutine when you define it as well as when you call it. It's useful for setting up little bits of code to run later, such as callbacks. You can even -do object-oriented stuff with it, though Perl provides a different -mechanism to do that already--see L<perlobj>. +do object-oriented stuff with it, though Perl already provides a different +mechanism to do that--see L<perlobj>. You can also think of closure as a way to write a subroutine template without using eval. (In fact, in version 5.000, eval was the I<only> way to get @@ -157,7 +174,7 @@ newprint() I<despite> the fact that the "my $x" has seemingly gone out of scope by the time the anonymous subroutine runs. That's what closure is all about. -This only applies to lexical variables, by the way. Dynamic variables +This applies to only lexical variables, by the way. Dynamic variables continue to work as they have always worked. Closure is not something that most Perl programmers need trouble themselves about to begin with. @@ -176,27 +193,62 @@ named new(), but don't have to be: =item 6. References of the appropriate type can spring into existence if you -dereference them in a context that assumes they exist. Since we haven't +dereference them in a context that assumes they exist. Because we haven't talked about dereferencing yet, we can't show you any examples yet. =item 7. -References to filehandles can be created by taking a reference to -a typeglob. This is currently the best way to pass filehandles into or +A reference can be created by using a special syntax, lovingly known as +the *foo{THING} syntax. *foo{THING} returns a reference to the THING +slot in *foo (which is the symbol table entry which holds everything +known as foo). + + $scalarref = *foo{SCALAR}; + $arrayref = *ARGV{ARRAY}; + $hashref = *ENV{HASH}; + $coderef = *handler{CODE}; + $ioref = *STDIN{IO}; + $globref = *foo{GLOB}; + +All of these are self-explanatory except for *foo{IO}. It returns the +IO handle, used for file handles (L<perlfunc/open>), sockets +(L<perlfunc/socket> and L<perlfunc/socketpair>), and directory handles +(L<perlfunc/opendir>). For compatibility with previous versions of +Perl, *foo{FILEHANDLE} is a synonym for *foo{IO}. + +*foo{THING} returns undef if that particular THING hasn't been used yet, +except in the case of scalars. *foo{SCALAR} returns a reference to an +anonymous scalar if $foo hasn't been used yet. This might change in a +future release. + +The use of *foo{IO} is the best way to pass bareword filehandles into or out of subroutines, or to store them in larger data structures. - splutter(\*STDOUT); + splutter(*STDOUT{IO}); sub splutter { my $fh = shift; print $fh "her um well a hmmm\n"; } - $rec = get_rec(\*STDIN); + $rec = get_rec(*STDIN{IO}); sub get_rec { my $fh = shift; return scalar <$fh>; } +Beware, though, that you can't do this with a routine which is going to +open the filehandle for you, because *HANDLE{IO} will be undef if HANDLE +hasn't been used yet. Use \*HANDLE for that sort of thing instead. + +Using \*HANDLE (or *HANDLE) is another way to use and store non-bareword +filehandles (before perl version 5.002 it was the only way). The two +methods are largely interchangeable, you can do + + splutter(\*STDOUT); + $rec = get_rec(\*STDIN); + +with the above subroutine definitions. + =back That's it for creating references. By now you're probably dying to @@ -207,9 +259,9 @@ are several basic methods. =item 1. -Anywhere you'd put an identifier as part of a variable or subroutine -name, you can replace the identifier with a simple scalar variable -containing a reference of the correct type: +Anywhere you'd put an identifier (or chain of identifiers) as part +of a variable or subroutine name, you can replace the identifier with +a simple scalar variable containing a reference of the correct type: $bar = $$scalarref; push(@$arrayref, $filename); @@ -230,28 +282,28 @@ However, a "simple scalar" includes an identifier that itself uses method =item 2. -Anywhere you'd put an identifier as part of a variable or subroutine -name, you can replace the identifier with a BLOCK returning a reference -of the correct type. In other words, the previous examples could be -written like this: +Anywhere you'd put an identifier (or chain of identifiers) as part of a +variable or subroutine name, you can replace the identifier with a +BLOCK returning a reference of the correct type. In other words, the +previous examples could be written like this: $bar = ${$scalarref}; push(@{$arrayref}, $filename); ${$arrayref}[0] = "January"; ${$hashref}{"KEY"} = "VALUE"; &{$coderef}(1,2,3); - $globref->print("output\n"); # iff you use FileHandle + $globref->print("output\n"); # iff IO::Handle is loaded Admittedly, it's a little silly to use the curlies in this case, but the BLOCK can contain any arbitrary expression, in particular, subscripted expressions: - &{ $dispatch{$index} }(1,2,3); # call correct routine + &{ $dispatch{$index} }(1,2,3); # call correct routine Because of being able to omit the curlies for the simple case of C<$$x>, people often make the mistake of viewing the dereferencing symbols as proper operators, and wonder about their precedence. If they were, -though, you could use parens instead of braces. That's not the case. +though, you could use parentheses instead of braces. That's not the case. Consider the difference below; case 0 is a short-hand version of case 1, I<NOT> case 2: @@ -266,14 +318,15 @@ it's presumably referencing. That would be case 3. =item 3. -The case of individual array elements arises often enough that it gets -cumbersome to use method 2. As a form of syntactic sugar, the two -lines like that above can be written: +Subroutine calls and lookups of individual array elements arise often +enough that it gets cumbersome to use method 2. As a form of +syntactic sugar, the examples for method 2 may be written: - $arrayref->[0] = "January"; - $hashref->{"KEY"} = "VALUE"; + $arrayref->[0] = "January"; # Array element + $hashref->{"KEY"} = "VALUE"; # Hash element + $coderef->(1,2,3); # Subroutine call -The left side of the array can be any expression returning a reference, +The left side of the arrow can be any expression returning a reference, including a previous dereference. Note that C<$array[$x]> is I<NOT> the same thing as C<$array-E<gt>[$x]> here: @@ -317,7 +370,7 @@ reference is pointing to. See L<perlfunc>. The bless() operator may be used to associate a reference with a package functioning as an object class. See L<perlobj>. -A typeglob may be dereferenced the same way a reference can, since +A typeglob may be dereferenced the same way a reference can, because the dereference syntax always indicates the kind of reference desired. So C<${*foo}> and C<${\$foo}> both indicate the same scalar variable. @@ -332,7 +385,7 @@ the whole block returns a reference to an array, which is then dereferenced by C<@{...}> and stuck into the double-quoted string. This chicanery is also useful for arbitrary expressions: - print "That yeilds @{[$n + 5]} widgets\n"; + print "That yields @{[$n + 5]} widgets\n"; =head2 Symbolic references @@ -364,7 +417,7 @@ that, you can say use strict 'refs'; and then only hard references will be allowed for the rest of the enclosing -block. An inner block may countermand that with +block. An inner block may countermand that with no strict 'refs'; @@ -377,15 +430,15 @@ invisible to this mechanism. For example: { my $value = 20; print $$ref; - } + } This will still print 10, not 20. Remember that local() affects package variables, which are all "global" to the package. =head2 Not-so-symbolic references -A new feature contributing to readability in 5.001 is that the brackets -around a symbolic reference behave more like quotes, just as they +A new feature contributing to readability in perl version 5.001 is that the +brackets around a symbolic reference behave more like quotes, just as they always have within a string. That is, $push = "pop on "; @@ -402,7 +455,7 @@ and even print ${ push } . "over"; will have the same effect. (This would have been a syntax error in -5.000, though Perl 4 allowed it in the spaceless form.) Note that this +Perl 5.000, though Perl 4 allowed it in the spaceless form.) Note that this construct is I<not> considered to be a symbolic reference when you're using strict refs: @@ -416,7 +469,7 @@ subscripting a hash. So now, instead of writing $array{ "aaa" }{ "bbb" }{ "ccc" } -you can just write +you can write just $array{ aaa }{ bbb }{ ccc } @@ -433,7 +486,7 @@ makes it more than a bareword: $array{ shift @_ } The B<-w> switch will warn you if it interprets a reserved word as a string. -But it will no longer warn you about using lowercase words, since the +But it will no longer warn you about using lowercase words, because the string is effectively quoted. =head1 WARNING @@ -443,8 +496,8 @@ converted into a string: $x{ \$a } = $a; -If you try to dereference the key, it won't do a hard dereference, and -you won't accomplish what you're attemping. You might want to do something +If you try to dereference the key, it won't do a hard dereference, and +you won't accomplish what you're attempting. You might want to do something more like $r = \@a; diff --git a/pod/perlrun.pod b/pod/perlrun.pod index 4f6294cc69..c4679e1def 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -33,7 +33,7 @@ Contained in the file specified by the first filename on the command line. =item 3. -Passed in implicitly via standard input. This only works if there are +Passed in implicitly via standard input. This works only if there are no filename arguments--to pass arguments to a STDIN script you must explicitly specify a "-" for the script name. @@ -44,13 +44,13 @@ beginning, unless you've specified a B<-x> switch, in which case it scans for the first line starting with #! and containing the word "perl", and starts there instead. This is useful for running a script embedded in a larger message. (In this case you would indicate the end -of the script using the __END__ token.) +of the script using the C<__END__> token.) -As of Perl 5, the #! line is always examined for switches as the line is -being parsed. Thus, if you're on a machine that only allows one argument -with the #! line, or worse, doesn't even recognize the #! line, you still -can get consistent switch behavior regardless of how Perl was invoked, -even if B<-x> was used to find the beginning of the script. +The #! line is always examined for switches as the line is being +parsed. Thus, if you're on a machine that allows only one argument +with the #! line, or worse, doesn't even recognize the #! line, you +still can get consistent switch behavior regardless of how Perl was +invoked, even if B<-x> was used to find the beginning of the script. Because many operating systems silently chop off kernel interpretation of the #! line after 32 characters, some switches may be passed in on the @@ -67,8 +67,8 @@ The sequences "-*" and "- " are specifically ignored so that you could, if you were so inclined, say #!/bin/sh -- # -*- perl -*- -p - eval 'exec perl $0 -S ${1+"$@"}' - if 0; + eval 'exec /usr/bin/perl $0 -S ${1+"$@"}' + if $running_under_some_shell; to let Perl see the B<-p> switch. @@ -81,12 +81,90 @@ dispatch the program to the correct interpreter for them. After locating your script, Perl compiles the entire script to an internal form. If there are any compilation errors, execution of the script is not attempted. (This is unlike the typical shell script, -which might run partway through before finding a syntax error.) +which might run part-way through before finding a syntax error.) If the script is syntactically correct, it is executed. If the script runs off the end without hitting an exit() or die() operator, an implicit C<exit(0)> is provided to indicate successful completion. +=head2 #! and quoting on non-Unix systems + +Unix's #! technique can be simulated on other systems: + +=over 4 + +=item OS/2 + +Put + + extproc perl -S -your_switches + +as the first line in C<*.cmd> file (C<-S> due to a bug in cmd.exe's +`extproc' handling). + +=item MS-DOS + +Create a batch file to run your script, and codify it in +C<ALTERNATIVE_SHEBANG> (see the F<dosish.h> file in the source +distribution for more information). + +=item Win95/NT + +The Win95/NT installation, when using the Activeware port of Perl, +will modify the Registry to associate the .pl extension with the perl +interpreter. If you install another port of Perl, including the one +in the Win32 directory of the Perl distribution, then you'll have to +modify the Registry yourself. + +=item Macintosh + +Macintosh perl scripts will have the appropriate Creator and +Type, so that double-clicking them will invoke the perl application. + +=back + +Command-interpreters on non-Unix systems have rather different ideas +on quoting than Unix shells. You'll need to learn the special +characters in your command-interpreter (C<*>, C<\> and C<"> are +common) and how to protect whitespace and these characters to run +one-liners (see C<-e> below). + +On some systems, you may have to change single-quotes to double ones, +which you must I<NOT> do on Unix or Plan9 systems. You might also +have to change a single % to a %%. + +For example: + + # Unix + perl -e 'print "Hello world\n"' + + # MS-DOS, etc. + perl -e "print \"Hello world\n\"" + + # Macintosh + print "Hello world\n" + (then Run "Myscript" or Shift-Command-R) + + # VMS + perl -e "print ""Hello world\n""" + +The problem is that none of this is reliable: it depends on the command +and it is entirely possible neither works. If 4DOS was the command shell, this would +probably work better: + + perl -e "print <Ctrl-x>"Hello world\n<Ctrl-x>"" + +CMD.EXE in Windows NT slipped a lot of standard Unix functionality in +when nobody was looking, but just try to find documentation for its +quoting rules. + +Under the Macintosh, it depends which environment you are using. The MacPerl +shell, or MPW, is much like Unix shells in its support for several +quoting variants, except that it makes free use of the Macintosh's non-ASCII +characters as control characters. + +There is no general solution to all of this. It's just a mess. + =head2 Switches A single-character switch may be combined with the following switch, if @@ -100,7 +178,7 @@ Switches include: =item B<-0>[I<digits>] -specifies the record separator (C<$/>) as an octal number. If there are +specifies the input record separator (C<$/>) as an octal number. If there are no digits, the null character is the separator. Other switches may precede or follow the digits. For example, if you have a version of B<find> which can print filenames terminated by the null character, you @@ -109,7 +187,7 @@ can say this: find . -name '*.bak' -print0 | perl -n0e unlink The special value 00 will cause Perl to slurp files in paragraph mode. -The value 0777 will cause Perl to slurp files whole since there is no +The value 0777 will cause Perl to slurp files whole because there is no legal character with that value. =item B<-a> @@ -133,7 +211,7 @@ An alternate delimiter may be specified using B<-F>. causes Perl to check the syntax of the script and then exit without executing it. Actually, it I<will> execute C<BEGIN>, C<END>, and C<use> blocks, -since these are considered as occurring outside the execution of +because these are considered as occurring outside the execution of your program. =item B<-d> @@ -151,10 +229,10 @@ Devel::DProf profiler. See L<perldebug>. =item B<-D>I<list> sets debugging flags. To watch how it executes your script, use -B<-D14>. (This only works if debugging is compiled into your +B<-D14>. (This works only if debugging is compiled into your Perl.) Another nice value is B<-D1024>, which lists your compiled syntax tree. And B<-D512> displays compiled regular expressions. As an -alternative specify a list of letters instead of numbers (e.g. B<-D14> is +alternative specify a list of letters instead of numbers (e.g., B<-D14> is equivalent to B<-Dtls>): 1 p Tokenizing and Parsing @@ -176,17 +254,17 @@ equivalent to B<-Dtls>): =item B<-e> I<commandline> -may be used to enter one line of script. +may be used to enter one line of script. If B<-e> is given, Perl -will not look for a script filename in the argument list. +will not look for a script filename in the argument list. Multiple B<-e> commands may -be given to build up a multi-line script. +be given to build up a multi-line script. Make sure to use semicolons where you would in a normal program. =item B<-F>I<pattern> specifies the pattern to split on if B<-a> is also in effect. The -pattern may be surrounded by C<//>, C<""> or C<''>, otherwise it will be +pattern may be surrounded by C<//>, C<"">, or C<''>, otherwise it will be put in single quotes. =item B<-h> @@ -231,23 +309,24 @@ know when the filename has changed. It does, however, use ARGVOUT for the selected filehandle. Note that STDOUT is restored as the default output filehandle after the loop. -You can use C<eof> without parenthesis to locate the end of each input file, -in case you want to append to each file, or reset line numbering (see +You can use C<eof> without parenthesis to locate the end of each input file, +in case you want to append to each file, or reset line numbering (see example in L<perlfunc/eof>). =item B<-I>I<directory> Directories specified by B<-I> are prepended to the search path for -modules (@INC), and also tells the C preprocessor where to search for +modules (C<@INC>), and also tells the C preprocessor where to search for include files. The C preprocessor is invoked with B<-P>; by default it searches /usr/include and /usr/lib/perl. =item B<-l>[I<octnum>] enables automatic line-ending processing. It has two effects: first, -it automatically chomps the line terminator when used with B<-n> or -B<-p>, and second, it assigns "C<$\>" to have the value of I<octnum> so that -any print statements will have that line terminator added back on. If +it automatically chomps "C<$/>" (the input record separator) when used +with B<-n> or B<-p>, and second, it assigns "C<$\>" +(the output record separator) to have the value of I<octnum> so that +any print statements will have that separator added back on. If I<octnum> is omitted, sets "C<$\>" to the current value of "C<$/>". For instance, to trim lines to 80 columns: @@ -259,7 +338,7 @@ separator if the B<-l> switch is followed by a B<-0> switch: gnufind / -print0 | perl -ln0e 'print "found $_" if -p' -This sets $\ to newline and then sets $/ to the null character. +This sets C<$\> to newline and then sets C<$/> to the null character. =item B<-m>[B<->]I<module> @@ -279,7 +358,7 @@ e.g., C<-M'module qw(foo bar)'>. If the first character after the C<-M> or C<-m> is a dash (C<->) then the 'use' is replaced with 'no'. -A little built-in syntactic sugar means you can also say +A little builtin syntactic sugar means you can also say C<-mmodule=foo,bar> or C<-Mmodule=foo,bar> as a shortcut for C<-M'module qw(foo bar)'>. This avoids the need to use quotes when importing symbols. The actual code generated by C<-Mmodule=foo,bar> is @@ -329,9 +408,9 @@ the implicit loop, just as in awk. =item B<-P> causes your script to be run through the C preprocessor before -compilation by Perl. (Since both comments and cpp directives begin +compilation by Perl. (Because both comments and cpp directives begin with the # character, you should avoid starting comments with any words -recognized by the C preprocessor such as "if", "else" or "define".) +recognized by the C preprocessor such as "if", "else", or "define".) =item B<-s> @@ -352,7 +431,7 @@ this is used to emulate #! startup on machines that don't support #!, in the following manner: #!/usr/bin/perl - eval "exec /usr/bin/perl -S $0 $*" + eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell; The system ignores the first line and feeds the script to /bin/sh, @@ -364,15 +443,15 @@ script if necessary. After Perl locates the script, it parses the lines and ignores them because the variable $running_under_some_shell is never true. A better construct than C<$*> would be C<${1+"$@"}>, which handles embedded spaces and such in the filenames, but doesn't work if -the script is being interpreted by csh. In order to start up sh rather +the script is being interpreted by csh. To start up sh rather than csh, some systems may have to replace the #! line with a line containing just a colon, which will be politely ignored by Perl. Other systems can't control that, and need a totally devious construct that -will work under any of csh, sh or Perl, such as the following: +will work under any of csh, sh, or Perl, such as the following: eval '(exit $?0)' && eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' & eval 'exec /usr/bin/perl -S $0 $argv:q' - if 0; + if $running_under_some_shell; =item B<-T> @@ -415,27 +494,104 @@ Prints to STDOUT the value of the named configuration variable. =item B<-w> -prints warnings about identifiers that are mentioned only once, and +prints warnings about variable names that are mentioned only once, and scalar variables that are used before being set. Also warns about redefined subroutines, and references to undefined filehandles or -filehandles opened readonly that you are attempting to write on. Also -warns you if you use values as a number that doesn't look like numbers, using -an array as though it were a scalar, if -your subroutines recurse more than 100 deep, and innumerable other things. -See L<perldiag> and L<perltrap>. +filehandles opened read-only that you are attempting to write on. Also +warns you if you use values as a number that doesn't look like numbers, +using an array as though it were a scalar, if your subroutines recurse +more than 100 deep, and innumerable other things. + +You can disable specific warnings using C<__WARN__> hooks, as described +in L<perlvar> and L<perlfunc/warn>. See also L<perldiag> and L<perltrap>. =item B<-x> I<directory> tells Perl that the script is embedded in a message. Leading garbage will be discarded until the first line that starts with #! and contains the string "perl". Any meaningful switches on that line will -be applied (but only one group of switches, as with normal #! -processing). If a directory name is specified, Perl will switch to -that directory before running the script. The B<-x> switch only -controls the the disposal of leading garbage. The script must be +be applied. If a directory name is specified, Perl will switch to +that directory before running the script. The B<-x> switch controls +only the disposal of leading garbage. The script must be terminated with C<__END__> if there is trailing garbage to be ignored (the script can process any or all of the trailing garbage via the DATA filehandle if desired). +=back + +=head1 ENVIRONMENT + +=over 12 + +=item HOME + +Used if chdir has no argument. + +=item LOGDIR + +Used if chdir has no argument and HOME is not set. + +=item PATH + +Used in executing subprocesses, and in finding the script if B<-S> is +used. + +=item PERL5LIB + +A colon-separated list of directories in which to look for Perl library +files before looking in the standard library and the current +directory. If PERL5LIB is not defined, PERLLIB is used. When running +taint checks (because the script was running setuid or setgid, or the +B<-T> switch was used), neither variable is used. The script should +instead say + + use lib "/my/directory"; + +=item PERL5OPT + +Command-line options (switches). Switches in this variable are taken +as if they were on every Perl command line. Only the B<-[DIMUdmw]> +switches are allowed. When running taint checks (because the script +was running setuid or setgid, or the B<-T> switch was used), this +variable is ignored. + +=item PERLLIB + +A colon-separated list of directories in which to look for Perl library +files before looking in the standard library and the current directory. +If PERL5LIB is defined, PERLLIB is not used. + +=item PERL5DB + +The command used to load the debugger code. The default is: + + BEGIN { require 'perl5db.pl' } + +=item PERL_DEBUG_MSTATS + +Relevant only if your perl executable was built with B<-DDEBUGGING_MSTATS>, +if set, this causes memory statistics to be dumped after execution. If set +to an integer greater than one, also causes memory statistics to be dumped +after compilation. + +=item PERL_DESTRUCT_LEVEL + +Relevant only if your perl executable was built with B<-DDEBUGGING>, +this controls the behavior of global destruction of objects and other +references. =back + +Perl also has environment variables that control how Perl handles data +specific to particular natural languages. See L<perllocale>. + +Apart from these, Perl uses no other environment variables, except +to make them available to the script being executed, and to child +processes. However, scripts running setuid would do well to execute +the following lines before doing anything else, just to keep people +honest: + + $ENV{PATH} = '/bin:/usr/bin'; # or whatever you need + $ENV{SHELL} = '/bin/sh' if exists $ENV{SHELL}; + delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; + diff --git a/pod/perlsec.pod b/pod/perlsec.pod index ccae6e82a9..1a1ae21e81 100644 --- a/pod/perlsec.pod +++ b/pod/perlsec.pod @@ -4,144 +4,324 @@ perlsec - Perl security =head1 DESCRIPTION -Perl is designed to make it easy to write secure setuid and setgid -scripts. Unlike shells, which are based on multiple substitution -passes on each line of the script, Perl uses a more conventional -evaluation scheme with fewer hidden "gotchas". Additionally, since the -language has more built-in functionality, it has to rely less upon -external (and possibly untrustworthy) programs to accomplish its -purposes. +Perl is designed to make it easy to program securely even when running +with extra privileges, like setuid or setgid programs. Unlike most +command line shells, which are based on multiple substitution passes on +each line of the script, Perl uses a more conventional evaluation scheme +with fewer hidden snags. Additionally, because the language has more +builtin functionality, it can rely less upon external (and possibly +untrustworthy) programs to accomplish its purposes. -Beyond the obvious problems that stem from giving special privileges to -such flexible systems as scripts, on many operating systems, setuid -scripts are inherently insecure right from the start. This is because -that between the time that the kernel opens up the file to see what to -run, and when the now setuid interpreter it ran turns around and reopens -the file so it can interpret it, things may have changed, especially if -you have symbolic links on your system. +Perl automatically enables a set of special security checks, called I<taint +mode>, when it detects its program running with differing real and effective +user or group IDs. The setuid bit in Unix permissions is mode 04000, the +setgid bit mode 02000; either or both may be set. You can also enable taint +mode explicitly by using the B<-T> command line flag. This flag is +I<strongly> suggested for server programs and any program run on behalf of +someone else, such as a CGI script. -Fortunately, sometimes this kernel "feature" can be disabled. -Unfortunately, there are two ways to disable it. The system can simply -outlaw scripts with the setuid bit set, which doesn't help much. -Alternately, it can simply ignore the setuid bit on scripts. If the -latter is true, Perl can emulate the setuid and setgid mechanism when it -notices the otherwise useless setuid/gid bits on Perl scripts. It does -this via a special executable called B<suidperl> that is automatically -invoked for you if it's needed. +While in this mode, Perl takes special precautions called I<taint +checks> to prevent both obvious and subtle traps. Some of these checks +are reasonably simple, such as verifying that path directories aren't +writable by others; careful programmers have always used checks like +these. Other checks, however, are best supported by the language itself, +and it is these checks especially that contribute to making a setuid Perl +program more secure than the corresponding C program. + +You may not use data derived from outside your program to affect something +else outside your program--at least, not by accident. All command line +arguments, environment variables, locale information (see L<perllocale>), +and file input are marked as "tainted". Tainted data may not be used +directly or indirectly in any command that invokes a sub-shell, nor in any +command that modifies files, directories, or processes. Any variable set +within an expression that has previously referenced a tainted value itself +becomes tainted, even if it is logically impossible for the tainted value +to influence the variable. Because taintedness is associated with each +scalar value, some elements of an array can be tainted and others not. -If, however, the kernel setuid script feature isn't disabled, Perl will -complain loudly that your setuid script is insecure. You'll need to -either disable the kernel setuid script feature, or put a C wrapper around -the script. See the program B<wrapsuid> in the F<eg> directory of your -Perl distribution for how to go about doing this. - -There are some systems on which setuid scripts are free of this inherent -security bug. For example, recent releases of Solaris are like this. On -such systems, when the kernel passes the name of the setuid script to open -to the interpreter, rather than using a pathname subject to mettling, it -instead passes /dev/fd/3. This is a special file already opened on the -script, so that there can be no race condition for evil scripts to -exploit. On these systems, Perl should be compiled with -C<-DSETUID_SCRIPTS_ARE_SECURE_NOW>. The B<Configure> program that builds -Perl tries to figure this out for itself. - -When executing a setuid script, or when you have turned on taint checking -explicitly using the B<-T> flag, Perl takes special precautions to -prevent you from falling into any obvious traps. (In some ways, a Perl -script is more secure than the corresponding C program.) Any command line -argument, environment variable, or input is marked as "tainted", and may -not be used, directly or indirectly, in any command that invokes a -subshell, or in any command that modifies files, directories, or -processes. Any variable that is set within an expression that has -previously referenced a tainted value also becomes tainted (even if it is -logically impossible for the tainted value to influence the variable). For example: - $foo = shift; # $foo is tainted - $bar = $foo,'bar'; # $bar is also tainted - $xxx = <>; # Tainted + $arg = shift; # $arg is tainted + $hid = $arg, 'bar'; # $hid is also tainted + $line = <>; # Tainted + $line = <STDIN>; # Also tainted + open FOO, "/home/me/bar" or die $!; + $line = <FOO>; # Still tainted $path = $ENV{'PATH'}; # Tainted, but see below - $abc = 'abc'; # Not tainted + $data = 'abc'; # Not tainted - system "echo $foo"; # Insecure - system "/bin/echo", $foo; # Secure (doesn't use sh) - system "echo $bar"; # Insecure - system "echo $abc"; # Insecure until PATH set + system "echo $arg"; # Insecure + system "/bin/echo", $arg; # Secure (doesn't use sh) + system "echo $hid"; # Insecure + system "echo $data"; # Insecure until PATH set + + $path = $ENV{'PATH'}; # $path now tainted $ENV{'PATH'} = '/bin:/usr/bin'; - $ENV{'IFS'} = '' if $ENV{'IFS'} ne ''; + delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; - $path = $ENV{'PATH'}; # Not tainted - system "echo $abc"; # Is secure now! + $path = $ENV{'PATH'}; # $path now NOT tainted + system "echo $data"; # Is secure now! - open(FOO,"$foo"); # OK - open(FOO,">$foo"); # Not OK + open(FOO, "< $arg"); # OK - read-only file + open(FOO, "> $arg"); # Not OK - trying to write - open(FOO,"echo $foo|"); # Not OK, but... - open(FOO,"-|") || exec 'echo', $foo; # OK + open(FOO,"echo $arg|"); # Not OK, but... + open(FOO,"-|") + or exec 'echo', $arg; # OK - $zzz = `echo $foo`; # Insecure, zzz tainted + $shout = `echo $arg`; # Insecure, $shout now tainted - unlink $abc,$foo; # Insecure - umask $foo; # Insecure + unlink $data, $arg; # Insecure + umask $arg; # Insecure - exec "echo $foo"; # Insecure - exec "echo", $foo; # Secure (doesn't use sh) - exec "sh", '-c', $foo; # Considered secure, alas + exec "echo $arg"; # Insecure + exec "echo", $arg; # Secure (doesn't use the shell) + exec "sh", '-c', $arg; # Considered secure, alas! -The taintedness is associated with each scalar value, so some elements -of an array can be tainted, and others not. + @files = <*.c>; # Always insecure (uses csh) + @files = glob('*.c'); # Always insecure (uses csh) If you try to do something insecure, you will get a fatal error saying something like "Insecure dependency" or "Insecure PATH". Note that you -can still write an insecure system call or exec, but only by explicitly -doing something like the last example above. You can also bypass the -tainting mechanism by referencing subpatterns--Perl presumes that if -you reference a substring using $1, $2, etc, you knew what you were -doing when you wrote the pattern: - - $ARGV[0] =~ /^-P(\w+)$/; - $printer = $1; # Not tainted - -This is fairly secure since C<\w+> doesn't match shell metacharacters. -Use of C</.+/> would have been insecure, but Perl doesn't check for that, -so you must be careful with your patterns. This is the I<ONLY> mechanism -for untainting user supplied filenames if you want to do file operations -on them (unless you make C<$E<gt>> equal to C<$E<lt>> ). - -For "Insecure $ENV{PATH}" messages, you need to set C<$ENV{'PATH'}> to a known -value, and each directory in the path must be non-writable by the world. -A frequently voiced gripe is that you can get this message even -if the pathname to an executable is fully qualified. But Perl can't -know that the executable in question isn't going to execute some other -program depending on the PATH. +can still write an insecure B<system> or B<exec>, but only by explicitly +doing something like the last example above. + +=head2 Laundering and Detecting Tainted Data + +To test whether a variable contains tainted data, and whose use would thus +trigger an "Insecure dependency" message, you can use the following +I<is_tainted()> function. + + sub is_tainted { + return ! eval { + join('',@_), kill 0; + 1; + }; + } + +This function makes use of the fact that the presence of tainted data +anywhere within an expression renders the entire expression tainted. It +would be inefficient for every operator to test every argument for +taintedness. Instead, the slightly more efficient and conservative +approach is used that if any tainted value has been accessed within the +same expression, the whole expression is considered tainted. + +But testing for taintedness gets you only so far. Sometimes you have just +to clear your data's taintedness. The only way to bypass the tainting +mechanism is by referencing subpatterns from a regular expression match. +Perl presumes that if you reference a substring using $1, $2, etc., that +you knew what you were doing when you wrote the pattern. That means using +a bit of thought--don't just blindly untaint anything, or you defeat the +entire mechanism. It's better to verify that the variable has only good +characters (for certain values of "good") rather than checking whether it +has any bad characters. That's because it's far too easy to miss bad +characters that you never thought of. + +Here's a test to make sure that the data contains nothing but "word" +characters (alphabetics, numerics, and underscores), a hyphen, an at sign, +or a dot. + + if ($data =~ /^([-\@\w.]+)$/) { + $data = $1; # $data now untainted + } else { + die "Bad data in $data"; # log this somewhere + } + +This is fairly secure because C</\w+/> doesn't normally match shell +metacharacters, nor are dot, dash, or at going to mean something special +to the shell. Use of C</.+/> would have been insecure in theory because +it lets everything through, but Perl doesn't check for that. The lesson +is that when untainting, you must be exceedingly careful with your patterns. +Laundering data using regular expression is the I<ONLY> mechanism for +untainting dirty data, unless you use the strategy detailed below to fork +a child of lesser privilege. + +The example does not untaint $data if C<use locale> is in effect, +because the characters matched by C<\w> are determined by the locale. +Perl considers that locale definitions are untrustworthy because they +contain data from outside the program. If you are writing a +locale-aware program, and want to launder data with a regular expression +containing C<\w>, put C<no locale> ahead of the expression in the same +block. See L<perllocale/SECURITY> for further discussion and examples. + +=head2 Switches On the "#!" Line + +When you make a script executable, in order to make it usable as a +command, the system will pass switches to perl from the script's #! +line. Perl checks that any command line switches given to a setuid +(or setgid) script actually match the ones set on the #! line. Some +Unix and Unix-like environments impose a one-switch limit on the #! +line, so you may need to use something like C<-wU> instead of C<-w -U> +under such systems. (This issue should arise only in Unix or +Unix-like environments that support #! and setuid or setgid scripts.) + +=head2 Cleaning Up Your Path + +For "Insecure C<$ENV{PATH}>" messages, you need to set C<$ENV{'PATH'}> to a +known value, and each directory in the path must be non-writable by others +than its owner and group. You may be surprised to get this message even +if the pathname to your executable is fully qualified. This is I<not> +generated because you didn't supply a full path to the program; instead, +it's generated because you never set your PATH environment variable, or +you didn't set it to something that was safe. Because Perl can't +guarantee that the executable in question isn't itself going to turn +around and execute some other program that is dependent on your PATH, it +makes sure you set the PATH. It's also possible to get into trouble with other operations that don't care whether they use tainted values. Make judicious use of the file tests in dealing with any user-supplied filenames. When possible, do opens and such after setting C<$E<gt> = $E<lt>>. (Remember group IDs, -too!) Perl doesn't prevent you from opening tainted filenames for reading, +too!) Perl doesn't prevent you from opening tainted filenames for reading, so be careful what you print out. The tainting mechanism is intended to prevent stupid mistakes, not to remove the need for thought. -This gives us a reasonably safe way to open a file or pipe: just reset the -id set to the original IDs. Here's a way to do backticks reasonably -safely. Notice how the exec() is not called with a string that the shell -could expand. By the time we get to the exec(), tainting is turned off, -however, so be careful what you call and what you pass it. +Perl does not call the shell to expand wild cards when you pass B<system> +and B<exec> explicit parameter lists instead of strings with possible shell +wildcards in them. Unfortunately, the B<open>, B<glob>, and +backtick functions provide no such alternate calling convention, so more +subterfuge will be required. + +Perl provides a reasonably safe way to open a file or pipe from a setuid +or setgid program: just create a child process with reduced privilege who +does the dirty work for you. First, fork a child using the special +B<open> syntax that connects the parent and child by a pipe. Now the +child resets its ID set and any other per-process attributes, like +environment variables, umasks, current working directories, back to the +originals or known safe values. Then the child process, which no longer +has any special permissions, does the B<open> or other system call. +Finally, the child passes the data it managed to access back to the +parent. Because the file or pipe was opened in the child while running +under less privilege than the parent, it's not apt to be tricked into +doing something it shouldn't. + +Here's a way to do backticks reasonably safely. Notice how the B<exec> is +not called with a string that the shell could expand. This is by far the +best way to call something that might be subjected to shell escapes: just +never call the shell at all. By the time we get to the B<exec>, tainting +is turned off, however, so be careful what you call and what you pass it. + use English; die unless defined $pid = open(KID, "-|"); if ($pid) { # parent while (<KID>) { # do something - } + } close KID; } else { - $> = $<; - $) = $(; # BUG: initgroups() not called - exec 'program', 'arg1', 'arg2'; - die "can't exec program: $!"; - } - -For those even more concerned about safety, see the I<Safe> and I<Safe CGI> -modules at a CPAN site near you. See L<perlmod> for a list of CPAN sites. + $EUID = $UID; + $EGID = $GID; # XXX: initgroups() not called + $ENV{PATH} = "/bin:/usr/bin"; + exec 'myprog', 'arg1', 'arg2'; + die "can't exec myprog: $!"; + } + +A similar strategy would work for wildcard expansion via C<glob>. + +Taint checking is most useful when although you trust yourself not to have +written a program to give away the farm, you don't necessarily trust those +who end up using it not to try to trick it into doing something bad. This +is the kind of security checking that's useful for setuid programs and +programs launched on someone else's behalf, like CGI programs. + +This is quite different, however, from not even trusting the writer of the +code not to try to do something evil. That's the kind of trust needed +when someone hands you a program you've never seen before and says, "Here, +run this." For that kind of safety, check out the Safe module, +included standard in the Perl distribution. This module allows the +programmer to set up special compartments in which all system operations +are trapped and namespace access is carefully controlled. + +=head2 Security Bugs + +Beyond the obvious problems that stem from giving special privileges to +systems as flexible as scripts, on many versions of Unix, setuid scripts +are inherently insecure right from the start. The problem is a race +condition in the kernel. Between the time the kernel opens the file to +see which interpreter to run and when the (now-setuid) interpreter turns +around and reopens the file to interpret it, the file in question may have +changed, especially if you have symbolic links on your system. + +Fortunately, sometimes this kernel "feature" can be disabled. +Unfortunately, there are two ways to disable it. The system can simply +outlaw scripts with the setuid bit set, which doesn't help much. +Alternately, it can simply ignore the setuid bit on scripts. If the +latter is true, Perl can emulate the setuid and setgid mechanism when it +notices the otherwise useless setuid/gid bits on Perl scripts. It does +this via a special executable called B<suidperl> that is automatically +invoked for you if it's needed. + +However, if the kernel setuid script feature isn't disabled, Perl will +complain loudly that your setuid script is insecure. You'll need to +either disable the kernel setuid script feature, or put a C wrapper around +the script. A C wrapper is just a compiled program that does nothing +except call your Perl program. Compiled programs are not subject to the +kernel bug that plagues setuid scripts. Here's a simple wrapper, written +in C: + + #define REAL_PATH "/path/to/script" + main(ac, av) + char **av; + { + execv(REAL_PATH, av); + } + +Compile this wrapper into a binary executable and then make I<it> rather +than your script setuid or setgid. + +See the program B<wrapsuid> in the F<eg> directory of your Perl +distribution for a convenient way to do this automatically for all your +setuid Perl programs. It moves setuid scripts into files with the same +name plus a leading dot, and then compiles a wrapper like the one above +for each of them. + +In recent years, vendors have begun to supply systems free of this +inherent security bug. On such systems, when the kernel passes the name +of the setuid script to open to the interpreter, rather than using a +pathname subject to meddling, it instead passes I</dev/fd/3>. This is a +special file already opened on the script, so that there can be no race +condition for evil scripts to exploit. On these systems, Perl should be +compiled with C<-DSETUID_SCRIPTS_ARE_SECURE_NOW>. The B<Configure> +program that builds Perl tries to figure this out for itself, so you +should never have to specify this yourself. Most modern releases of +SysVr4 and BSD 4.4 use this approach to avoid the kernel race condition. + +Prior to release 5.003 of Perl, a bug in the code of B<suidperl> could +introduce a security hole in systems compiled with strict POSIX +compliance. + +=head2 Protecting Your Programs + +There are a number of ways to hide the source to your Perl programs, +with varying levels of "security". + +First of all, however, you I<can't> take away read permission, because +the source code has to be readable in order to be compiled and +interpreted. (That doesn't mean that a CGI script's source is +readable by people on the web, though.) So you have to leave the +permissions at the socially friendly 0755 level. + +Some people regard this as a security problem. If your program does +insecure things, and relies on people not knowing how to exploit those +insecurities, it is not secure. It is often possible for someone to +determine the insecure things and exploit them without viewing the +source. Security through obscurity, the name for hiding your bugs +instead of fixing them, is little security indeed. + +You can try using encryption via source filters (Filter::* from CPAN). +But crackers might be able to decrypt it. You can try using the +byte code compiler and interpreter described below, but crackers might +be able to de-compile it. You can try using the native-code compiler +described below, but crackers might be able to disassemble it. These +pose varying degrees of difficulty to people wanting to get at your +code, but none can definitively conceal it (this is true of every +language, not just Perl). + +If you're concerned about people profiting from your code, then the +bottom line is that nothing but a restrictive licence will give you +legal security. License your software and pepper it with threatening +statements like "This is unpublished proprietary software of XYZ Corp. +Your access to it does not give you permission to use it blah blah +blah." You should see a lawyer to be sure your licence's wording will +stand up in court. diff --git a/pod/perlstyle.pod b/pod/perlstyle.pod index e4a5aab41f..bfc94a9eaa 100644 --- a/pod/perlstyle.pod +++ b/pod/perlstyle.pod @@ -6,13 +6,13 @@ perlstyle - Perl style guide Each programmer will, of course, have his or her own preferences in regards to formatting, but there are some general guidelines that will -make your programs easier to read, understand, and maintain. +make your programs easier to read, understand, and maintain. The most important thing is to run your programs under the B<-w> flag at all times. You may turn it off explicitly for particular portions of code via the C<$^W> variable if you must. You should also always run under C<use strict> or know the reason why not. -The <use sigtrap> and even <use diagnostics> pragmas may also prove +The C<use sigtrap> and even C<use diagnostics> pragmas may also prove useful. Regarding aesthetics of code lay out, about the only thing Larry @@ -32,7 +32,7 @@ Opening curly on same line as keyword, if possible, otherwise line up. =item * -Space before the opening curly of a multiline BLOCK. +Space before the opening curly of a multi-line BLOCK. =item * @@ -64,7 +64,7 @@ Uncuddled elses. =item * -No space between function name and its opening paren. +No space between function name and its opening parenthesis. =item * @@ -76,7 +76,7 @@ Long lines broken after an operator (except "and" and "or"). =item * -Space after last paren matching on current line. +Space after last parenthesis matching on current line. =item * @@ -88,7 +88,7 @@ Omit redundant punctuation as long as clarity doesn't suffer. =back -Larry has his reasons for each of these things, but he doen't claim that +Larry has his reasons for each of these things, but he doesn't claim that everyone else's mind works the same as his does. Here are some other more substantive style issues to think about: @@ -117,7 +117,7 @@ is better than $verbose && print "Starting analysis\n"; -since the main point isn't whether the user typed B<-v> or not. +because the main point isn't whether the user typed B<-v> or not. Similarly, just because an operator lets you assume default arguments doesn't mean that you have to make use of the defaults. The defaults @@ -135,7 +135,7 @@ schmuck bounce on the % key in B<vi>. Even if you aren't in doubt, consider the mental welfare of the person who has to maintain the code after you, and who will probably put -parens in the wrong place. +parentheses in the wrong place. =item * @@ -154,13 +154,13 @@ the middle. Just "outdent" it a little to make it more visible: =item * Don't be afraid to use loop labels--they're there to enhance -readability as well as to allow multi-level loop breaks. See the +readability as well as to allow multilevel loop breaks. See the previous example. =item * Avoid using grep() (or map()) or `backticks` in a void context, that is, -when you just throw away their return values. Those functions all +when you just throw away their return values. Those functions all have return values, so use them. Otherwise use a foreach() loop or the system() function instead. @@ -169,7 +169,7 @@ the system() function instead. For portability, when using features that may not be implemented on every machine, test the construct in an eval to see if it fails. If you know what version or patchlevel a particular feature was -implemented, you can test C<$]> ($PERL_VERSION in C<English>) to see if it +implemented, you can test C<$]> (C<$PERL_VERSION> in C<English>) to see if it will be there. The C<Config> module will also let you interrogate values determined by the B<Configure> program when Perl was installed. @@ -178,7 +178,7 @@ determined by the B<Configure> program when Perl was installed. Choose mnemonic identifiers. If you can't remember what mnemonic means, you've got a problem. -=item * +=item * While short identifiers like $gotit are probably ok, use underscores to separate words. It is generally easier to read $var_names_like_this than @@ -189,20 +189,20 @@ Package names are sometimes an exception to this rule. Perl informally reserves lowercase module names for "pragma" modules like C<integer> and C<strict>. Other modules should begin with a capital letter and use mixed case, but probably without underscores due to limitations in primitive -filesystems' representations of module names as files that must fit into a -few sparse bites. +file systems' representations of module names as files that must fit into a +few sparse bytes. =item * -You may find it helpful to use letter case to indicate the scope -or nature of a variable. For example: +You may find it helpful to use letter case to indicate the scope +or nature of a variable. For example: - $ALL_CAPS_HERE constants only (beware clashes with perl vars!) - $Some_Caps_Here package-wide global/static - $no_caps_here function scope my() or local() variables + $ALL_CAPS_HERE constants only (beware clashes with perl vars!) + $Some_Caps_Here package-wide global/static + $no_caps_here function scope my() or local() variables -Function and method names seem to work best as all lowercase. -E.g., $obj->as_string(). +Function and method names seem to work best as all lowercase. +E.g., $obj-E<gt>as_string(). You can use a leading underscore to indicate that a variable or function should not be used outside the package that defined it. @@ -216,9 +216,9 @@ Don't use slash as a delimiter when your regexp has slashes or backslashes. =item * Use the new "and" and "or" operators to avoid having to parenthesize -list operators so much, and to reduce the incidence of punctuational +list operators so much, and to reduce the incidence of punctuation operators like C<&&> and C<||>. Call your subroutines as if they were -functions or list operators to avoid excessive ampersands and parens. +functions or list operators to avoid excessive ampersands and parentheses. =item * @@ -227,12 +227,12 @@ Use here documents instead of repeated print() statements. =item * Line up corresponding things vertically, especially if it'd be too long -to fit on one line anyway. +to fit on one line anyway. - $IDX = $ST_MTIME; - $IDX = $ST_ATIME if $opt_u; - $IDX = $ST_CTIME if $opt_c; - $IDX = $ST_SIZE if $opt_s; + $IDX = $ST_MTIME; + $IDX = $ST_ATIME if $opt_u; + $IDX = $ST_CTIME if $opt_c; + $IDX = $ST_SIZE if $opt_s; mkdir $tmpdir, 0700 or die "can't mkdir $tmpdir: $!"; chdir($tmpdir) or die "can't chdir $tmpdir: $!"; diff --git a/pod/perlsub.pod b/pod/perlsub.pod index b308298858..c124f21c6a 100644 --- a/pod/perlsub.pod +++ b/pod/perlsub.pod @@ -22,8 +22,8 @@ To import subroutines: To call subroutines: - NAME(LIST); # & is optional with parens. - NAME LIST; # Parens optional if predeclared/imported. + NAME(LIST); # & is optional with parentheses. + NAME LIST; # Parentheses optional if predeclared/imported. &NAME; # Passes current @_ to subroutine. =head1 DESCRIPTION @@ -47,21 +47,33 @@ there's really no difference from the language's perspective.) Any arguments passed to the routine come in as the array @_. Thus if you called a function with two arguments, those would be stored in C<$_[0]> -and C<$_[1]>. The array @_ is a local array, but its values are implicit -references (predating L<perlref>) to the actual scalar parameters. The -return value of the subroutine is the value of the last expression -evaluated. Alternatively, a return statement may be used to specify the -returned value and exit the subroutine. If you return one or more arrays -and/or hashes, these will be flattened together into one large -indistinguishable list. +and C<$_[1]>. The array @_ is a local array, but its elements are +aliases for the actual scalar parameters. In particular, if an element +C<$_[0]> is updated, the corresponding argument is updated (or an error +occurs if it is not updatable). If an argument is an array or hash +element which did not exist when the function was called, that element is +created only when (and if) it is modified or if a reference to it is +taken. (Some earlier versions of Perl created the element whether or not +it was assigned to.) Note that assigning to the whole array @_ removes +the aliasing, and does not update any arguments. + +The return value of the subroutine is the value of the last expression +evaluated. Alternatively, a return statement may be used exit the +subroutine, optionally specifying the returned value, which will be +evaluated in the appropriate context (list, scalar, or void) depending +on the context of the subroutine call. If you specify no return value, +the subroutine will return an empty list in a list context, an undefined +value in a scalar context, or nothing in a void context. If you return +one or more arrays and/or hashes, these will be flattened together into +one large indistinguishable list. Perl does not have named formal parameters, but in practice all you do is assign to a my() list of these. Any variables you use in the function that aren't declared private are global variables. For the gory details -on creating private variables, see the sections below on L<"Private -Variables via my()"> and L</"Temporary Values via local()">. To create -protected environments for a set of functions in a separate package (and -probably a separate file), see L<perlmod/"Packages">. +on creating private variables, see +L<"Private Variables via my()"> and L<"Temporary Values via local()">. +To create protected environments for a set of functions in a separate +package (and probably a separate file), see L<perlmod/"Packages">. Example: @@ -81,7 +93,7 @@ Example: sub get_line { $thisline = $lookahead; # GLOBAL VARIABLES!! - LINE: while ($lookahead = <STDIN>) { + LINE: while (defined($lookahead = <STDIN>)) { if ($lookahead =~ /^[ \t]/) { $thisline .= $lookahead; } @@ -105,13 +117,13 @@ Use array assignment to a local list to name your formal arguments: } This also has the effect of turning call-by-reference into call-by-value, -since the assignment copies the values. Otherwise a function is free to -do in-place modifications of @_ and change its callers values. +because the assignment copies the values. Otherwise a function is free to +do in-place modifications of @_ and change its caller's values. upcase_in($v1, $v2); # this changes $v1 and $v2 sub upcase_in { - for (@_) { tr/a-z/A-Z/ } - } + for (@_) { tr/a-z/A-Z/ } + } You aren't allowed to modify constants in this way, of course. If an argument were actually literal and you tried to change it, you'd take a @@ -119,17 +131,17 @@ argument were actually literal and you tried to change it, you'd take a upcase_in("frederick"); -It would be much safer if the upcase_in() function +It would be much safer if the upcase_in() function were written to return a copy of its parameters instead of changing them in place: ($v3, $v4) = upcase($v1, $v2); # this doesn't sub upcase { + return unless defined wantarray; # void context, do nothing my @parms = @_; - for (@parms) { tr/a-z/A-Z/ } - # wantarray checks if we were called in list context + for (@parms) { tr/a-z/A-Z/ } return wantarray ? @parms : $parms[0]; - } + } Notice how this (unprototyped) function doesn't care whether it was passed real scalars or arrays. Perl will see everything as one big long flat @_ @@ -149,13 +161,14 @@ Because like its flat incoming parameter list, the return list is also flat. So all you have managed to do here is stored everything in @a and made @b an empty list. See L</"Pass by Reference"> for alternatives. -A subroutine may be called using the "&" prefix. The "&" is optional in -Perl 5, and so are the parens if the subroutine has been predeclared. -(Note, however, that the "&" is I<NOT> optional when you're just naming -the subroutine, such as when it's used as an argument to defined() or -undef(). Nor is it optional when you want to do an indirect subroutine -call with a subroutine name or reference using the C<&$subref()> or -C<&{$subref}()> constructs. See L<perlref> for more on that.) +A subroutine may be called using the "&" prefix. The "&" is optional +in modern Perls, and so are the parentheses if the subroutine has been +predeclared. (Note, however, that the "&" is I<NOT> optional when +you're just naming the subroutine, such as when it's used as an +argument to defined() or undef(). Nor is it optional when you want to +do an indirect subroutine call with a subroutine name or reference +using the C<&$subref()> or C<&{$subref}()> constructs. See L<perlref> +for more on that.) Subroutines may be called recursively. If a subroutine is called using the "&" form, the argument list is optional, and if omitted, no @_ array is @@ -170,7 +183,7 @@ new users may wish to avoid. &foo(); # the same &foo; # foo() get current args, like foo(@_) !! - foo; # like foo() IFF sub foo pre-declared, else "foo" + foo; # like foo() IFF sub foo predeclared, else "foo" Not only does the "&" form make the argument list optional, but it also disables any prototype checking on the arguments you do provide. This @@ -187,11 +200,12 @@ Synopsis: my @oof = @bar; # declare @oof lexical, and init it A "my" declares the listed variables to be confined (lexically) to the -enclosing block, subroutine, C<eval>, or C<do/require/use>'d file. If -more than one value is listed, the list must be placed in parens. All -listed elements must be legal lvalues. Only alphanumeric identifiers may -be lexically scoped--magical builtins like $/ must currently be localized with -"local" instead. +enclosing block, conditional (C<if/unless/elsif/else>), loop +(C<for/foreach/while/until/continue>), subroutine, C<eval>, or +C<do/require/use>'d file. If more than one value is listed, the list +must be placed in parentheses. All listed elements must be legal lvalues. +Only alphanumeric identifiers may be lexically scoped--magical +builtins like $/ must currently be localized with "local" instead. Unlike dynamic variables created by the "local" statement, lexical variables declared with "my" are totally hidden from the outside world, @@ -216,7 +230,7 @@ this is used to name the parameters to a subroutine. Examples: my $arg = shift; # name doesn't matter $arg **= 1/3; return $arg; - } + } The "my" is simply a modifier on something you might assign to. So when you do assign to the variables in its argument list, the "my" doesn't @@ -225,11 +239,11 @@ change whether those variables is viewed as a scalar or an array. So my ($foo) = <STDIN>; my @FOO = <STDIN>; -both supply a list context to the righthand side, while +both supply a list context to the right-hand side, while my $foo = <STDIN>; -supplies a scalar context. But the following only declares one variable: +supplies a scalar context. But the following declares only one variable: my $foo, $bar = 1; @@ -243,13 +257,56 @@ the current statement. Thus, my $x = $x; -can be used to initialize the new $x with the value of the old $x, and +can be used to initialize the new $x with the value of the old $x, and the expression my $x = 123 and $x == 123 is false unless the old $x happened to have the value 123. +Lexical scopes of control structures are not bounded precisely by the +braces that delimit their controlled blocks; control expressions are +part of the scope, too. Thus in the loop + + while (defined(my $line = <>)) { + $line = lc $line; + } continue { + print $line; + } + +the scope of $line extends from its declaration throughout the rest of +the loop construct (including the C<continue> clause), but not beyond +it. Similarly, in the conditional + + if ((my $answer = <STDIN>) =~ /^yes$/i) { + user_agrees(); + } elsif ($answer =~ /^no$/i) { + user_disagrees(); + } else { + chomp $answer; + die "'$answer' is neither 'yes' nor 'no'"; + } + +the scope of $answer extends from its declaration throughout the rest +of the conditional (including C<elsif> and C<else> clauses, if any), +but not beyond it. + +(None of the foregoing applies to C<if/unless> or C<while/until> +modifiers appended to simple statements. Such modifiers are not +control structures and have no effect on scoping.) + +The C<foreach> loop defaults to scoping its index variable dynamically +(in the manner of C<local>; see below). However, if the index +variable is prefixed with the keyword "my", then it is lexically +scoped instead. Thus in the loop + + for my $i (1, 2, 3) { + some_function(); + } + +the scope of $i extends to the end of the loop, but not beyond it, and +so the value of $i is unavailable in some_function(). + Some users may wish to encourage the use of lexically scoped variables. As an aid to catching implicit references to package variables, if you say @@ -263,8 +320,9 @@ otherwise. An inner block may countermand this with S<"no strict 'vars'">. A my() has both a compile-time and a run-time effect. At compile time, the compiler takes notice of it; the principle usefulness of this is to -quiet C<use strict 'vars'>. The actual initialization doesn't happen -until run time, so gets executed every time through a loop. +quiet C<use strict 'vars'>. The actual initialization is delayed until +run time, so it gets executed appropriately; every time through a loop, +for example. Variables declared with "my" are not part of any package and are therefore never fully qualified with the package name. In particular, you're not @@ -284,9 +342,9 @@ lexical of the same name is also visible: That will print out 20 and 10. -You may declare "my" variables at the outer most scope of a file to -totally hide any such identifiers from the outside world. This is similar -to a C's static variables at the file level. To do this with a subroutine +You may declare "my" variables at the outermost scope of a file to +hide any such identifiers totally from the outside world. This is similar +to C's static variables at the file level. To do this with a subroutine requires the use of a closure (anonymous function). If a block (such as an eval(), function, or C<package>) wants to create a private subroutine that cannot be called from outside that block, it can declare a lexical @@ -297,7 +355,7 @@ variable containing an anonymous sub reference: &$secret_sub(); As long as the reference is never returned by any function within the -module, no outside module can see the subroutine, since its name is not in +module, no outside module can see the subroutine, because its name is not in any package's symbol table. Remember that it's not I<REALLY> called $some_pack::secret_version or anything; it's just $secret_version, unqualified and unqualifiable. @@ -314,35 +372,35 @@ just enclose the whole function in an extra block, and put the static variable outside the function but in the block. { - my $secret_val = 0; + my $secret_val = 0; sub gimme_another { return ++$secret_val; - } - } + } + } # $secret_val now becomes unreachable by the outside # world, but retains its value between calls to gimme_another -If this function is being sourced in from a separate file +If this function is being sourced in from a separate file via C<require> or C<use>, then this is probably just fine. If it's -all in the main program, you'll need to arrange for the my() +all in the main program, you'll need to arrange for the my() to be executed early, either by putting the whole block above -your pain program, or more likely, merely placing a BEGIN +your main program, or more likely, placing merely a BEGIN sub around it to make sure it gets executed before your program starts to run: sub BEGIN { - my $secret_val = 0; + my $secret_val = 0; sub gimme_another { return ++$secret_val; - } - } + } + } See L<perlrun> about the BEGIN function. =head2 Temporary Values via local() B<NOTE>: In general, you should be using "my" instead of "local", because -it's faster and safer. Execeptions to this include the global punctuation +it's faster and safer. Exceptions to this include the global punctuation variables, filehandles and formats, and direct manipulation of the Perl symbol table itself. Format variables often use "local" though, as do other variables whose current value must be visible to called @@ -359,18 +417,18 @@ Synopsis: local *merlyn = *randal; # now $merlyn is really $randal, plus # @merlyn is really @randal, etc local *merlyn = 'randal'; # SAME THING: promote 'randal' to *randal - local *merlyn = \$randal; # just alias $merlyn, not @merlyn etc + local *merlyn = \$randal; # just alias $merlyn, not @merlyn etc A local() modifies its listed variables to be local to the enclosing -block, (or subroutine, C<eval{}> or C<do>) and I<the any called from +block, (or subroutine, C<eval{}>, or C<do>) and I<any called from within that block>. A local() just gives temporary values to global (meaning package) variables. This is known as dynamic scoping. Lexical scoping is done with "my", which works more like C's auto declarations. If more than one variable is given to local(), they must be placed in -parens. All listed elements must be legal lvalues. This operator works +parentheses. All listed elements must be legal lvalues. This operator works by saving the current values of those variables in its argument list on a -hidden stack and restoring them upon exiting the block, subroutine or +hidden stack and restoring them upon exiting the block, subroutine, or eval. This means that called subroutines can also reference the local variable, but not the global one. The argument list may be assigned to if desired, which allows you to initialize your local variables. (If no @@ -380,9 +438,9 @@ subroutine. Examples: for $i ( 0 .. 9 ) { $digits{$i} = $i; - } + } # assume this function uses global %digits hash - parse_num(); + parse_num(); # now temporarily add to %digits hash if ($base12) { @@ -392,7 +450,7 @@ subroutine. Examples: } # old %digits restored here -Because local() is a run-time command, and so gets executed every time +Because local() is a run-time command, it gets executed every time through a loop. In releases of Perl previous to 5.0, this used more stack storage each time until the loop was exited. Perl now reclaims the space each time through, but it's still more efficient to declare your variables @@ -405,7 +463,7 @@ as a scalar or an array. So local($foo) = <STDIN>; local @FOO = <STDIN>; -both supply a list context to the righthand side, while +both supply a list context to the right-hand side, while local $foo = <STDIN>; @@ -422,12 +480,12 @@ Sometimes you don't want to pass the value of an array to a subroutine but rather the name of it, so that the subroutine can modify the global copy of it rather than working with a local copy. In perl you can refer to all objects of a particular name by prefixing the name -with a star: C<*foo>. This is often known as a "type glob", since the +with a star: C<*foo>. This is often known as a "typeglob", because the star on the front can be thought of as a wildcard match for all the funny prefix characters on variables and subroutines and such. -When evaluated, the type glob produces a scalar value that represents -all the objects of that name, including any filehandle, format or +When evaluated, the typeglob produces a scalar value that represents +all the objects of that name, including any filehandle, format, or subroutine. When assigned to, it causes the name mentioned to refer to whatever "*" value was assigned to it. Example: @@ -442,22 +500,23 @@ whatever "*" value was assigned to it. Example: Note that scalars are already passed by reference, so you can modify scalar arguments without using this mechanism by referring explicitly -to $_[0] etc. You can modify all the elements of an array by passing +to C<$_[0]> etc. You can modify all the elements of an array by passing all the elements as scalars, but you have to use the * mechanism (or -the equivalent reference mechanism) to push, pop or change the size of +the equivalent reference mechanism) to push, pop, or change the size of an array. It will certainly be faster to pass the typeglob (or reference). Even if you don't want to modify an array, this mechanism is useful for -passing multiple arrays in a single LIST, since normally the LIST +passing multiple arrays in a single LIST, because normally the LIST mechanism will merge all the array values so that you can't extract out -the individual arrays. For more on typeglobs, see L<perldata/"Typeglobs">. +the individual arrays. For more on typeglobs, see +L<perldata/"Typeglobs and Filehandles">. =head2 Pass by Reference -If you want to pass more than one array or hash into a function--or -return them from it--and have them maintain their integrity, -then you're going to have to use an explicit pass-by-reference. -Before you do that, you need to understand references as detailed in L<perlref>. +If you want to pass more than one array or hash into a function--or +return them from it--and have them maintain their integrity, then +you're going to have to use an explicit pass-by-reference. Before you +do that, you need to understand references as detailed in L<perlref>. This section may not make much sense to you otherwise. Here are a few simple examples. First, let's pass in several @@ -471,29 +530,29 @@ list of all their former last elements: my @retlist = (); foreach $aref ( @_ ) { push @retlist, pop @$aref; - } + } return @retlist; - } + } -Here's how you might write a function that returns a +Here's how you might write a function that returns a list of keys occurring in all the hashes passed to it: - @common = inter( \%foo, \%bar, \%joe ); + @common = inter( \%foo, \%bar, \%joe ); sub inter { my ($k, $href, %seen); # locals foreach $href (@_) { while ( $k = each %$href ) { $seen{$k}++; - } - } + } + } return grep { $seen{$_} == @_ } keys %seen; - } + } -So far, we're just using the normal list return mechanism. -What happens if you want to pass or return a hash? Well, -if you're only using one of them, or you don't mind them +So far, we're using just the normal list return mechanism. +What happens if you want to pass or return a hash? Well, +if you're using only one of them, or you don't mind them concatenating, then the normal calling convention is ok, although -a little expensive. +a little expensive. Where people get into trouble is here: @@ -501,7 +560,7 @@ Where people get into trouble is here: or (%a, %b) = func(%c, %d); -That syntax simply won't work. It just sets @a or %a and clears the @b or +That syntax simply won't work. It sets just @a or %a and clears the @b or %b. Plus the function didn't get passed into two separate arrays or hashes: it got one long list in @_, as always. @@ -518,8 +577,8 @@ in order of how many elements they have in them: return ($cref, $dref); } else { return ($dref, $cref); - } - } + } + } It turns out that you can actually do this also: @@ -531,12 +590,12 @@ It turns out that you can actually do this also: return (\@c, \@d); } else { return (\@d, \@c); - } - } + } + } Here we're using the typeglobs to do symbol table aliasing. It's a tad subtle, though, and also won't work if you're using my() -variables, since only globals (well, and local()s) are in the symbol table. +variables, because only globals (well, and local()s) are in the symbol table. If you're passing around filehandles, you could usually just use the bare typeglob, like *STDOUT, but typeglobs references would be better because @@ -554,17 +613,20 @@ they'll still work properly under C<use strict 'refs'>. For example: return scalar <$fh>; } +Another way to do this is using *HANDLE{IO}, see L<perlref> for usage +and caveats. + If you're planning on generating new filehandles, you could do this: sub openit { my $name = shift; local *FH; - return open (FH, $path) ? \*FH : undef; - } + return open (FH, $path) ? *FH : undef; + } Although that will actually produce a small memory leak. See the bottom -of L<perlfunc/open()> for a somewhat cleaner way using the FileHandle -functions supplied with the POSIX package. +of L<perlfunc/open()> for a somewhat cleaner way using the IO::Handle +package. =head2 Prototypes @@ -574,7 +636,7 @@ As of the 5.002 release of perl, if you declare then mypush() takes arguments exactly like push() does. The declaration of the function to be called must be visible at compile time. The prototype -only affects the interpretation of new-style calls to the function, where +affects only the interpretation of new-style calls to the function, where new-style is defined as not using the C<&> character. In other words, if you call it like a builtin function, then it behaves like a builtin function. If you call it like an old-fashioned subroutine, then it @@ -583,10 +645,10 @@ this rule that prototypes have no influence on subroutine references like C<\&foo> or on indirect subroutine calls like C<&{$subref}>. Method calls are not influenced by prototypes either, because the -function to be called is indeterminate at compile time, since it depends +function to be called is indeterminate at compile time, because it depends on inheritance. -Since the intent is primarily to let you define subroutines that work +Because the intent is primarily to let you define subroutines that work like builtin commands, here are the prototypes for some other functions that parse almost exactly like the corresponding builtins. @@ -627,7 +689,7 @@ A semicolon separates mandatory arguments from optional arguments. Note how the last three examples above are treated specially by the parser. mygrep() is parsed as a true list operator, myrand() is parsed as a true unary operator with unary precedence the same as rand(), and -mytime() is truly argumentless, just like time(). That is, if you +mytime() is truly without arguments, just like time(). That is, if you say mytime +2; @@ -637,7 +699,7 @@ without the prototype. The interesting thing about & is that you can generate new syntax with it: - sub try (&$) { + sub try (&@) { my($try,$catch) = @_; eval { &$try }; if ($@) { @@ -645,7 +707,7 @@ The interesting thing about & is that you can generate new syntax with it: &$catch; } } - sub catch (&) { @_ } + sub catch (&) { $_[0] } try { die "phooey"; @@ -657,7 +719,7 @@ That prints "unphooey". (Yes, there are still unresolved issues having to do with the visibility of @_. I'm ignoring that question for the moment. (But note that if we make @_ lexically scoped, those anonymous subroutines can act like closures... (Gee, -is this sounding a little Lispish? (Nevermind.)))) +is this sounding a little Lispish? (Never mind.)))) And here's a reimplementation of grep: @@ -687,7 +749,7 @@ if you decide that a function should take just one parameter, like this: sub func ($) { my $n = shift; print "you gave me $n\n"; - } + } and someone has been calling it with an array or expression returning a list: @@ -698,21 +760,74 @@ returning a list: Then you've just supplied an automatic scalar() in front of their argument, which can be more than a bit surprising. The old @foo which used to hold one thing doesn't get passed in. Instead, -the func() now gets passed in 1, that is, the number of elments +the func() now gets passed in 1, that is, the number of elements in @foo. And the split() gets called in a scalar context and starts scribbling on your @_ parameter list. -This is all very powerful, of course, and should only be used in moderation -to make the world a better place. +This is all very powerful, of course, and should be used only in moderation +to make the world a better place. + +=head2 Constant Functions + +Functions with a prototype of C<()> are potential candidates for +inlining. If the result after optimization and constant folding is +either a constant or a lexically-scoped scalar which has no other +references, then it will be used in place of function calls made +without C<&> or C<do>. Calls made using C<&> or C<do> are never +inlined. (See constant.pm for an easy way to declare most +constants.) + +All of the following functions would be inlined. + + sub pi () { 3.14159 } # Not exact, but close. + sub PI () { 4 * atan2 1, 1 } # As good as it gets, + # and it's inlined, too! + sub ST_DEV () { 0 } + sub ST_INO () { 1 } + + sub FLAG_FOO () { 1 << 8 } + sub FLAG_BAR () { 1 << 9 } + sub FLAG_MASK () { FLAG_FOO | FLAG_BAR } + + sub OPT_BAZ () { not (0x1B58 & FLAG_MASK) } + sub BAZ_VAL () { + if (OPT_BAZ) { + return 23; + } + else { + return 42; + } + } + + sub N () { int(BAZ_VAL) / 3 } + BEGIN { + my $prod = 1; + for (1..N) { $prod *= $_ } + sub N_FACTORIAL () { $prod } + } + +If you redefine a subroutine which was eligible for inlining you'll get +a mandatory warning. (You can use this warning to tell whether or not a +particular subroutine is considered constant.) The warning is +considered severe enough not to be optional because previously compiled +invocations of the function will still be using the old value of the +function. If you need to be able to redefine the subroutine you need to +ensure that it isn't inlined, either by dropping the C<()> prototype +(which changes the calling semantics, so beware) or by thwarting the +inlining mechanism in some other way, such as + + sub not_inlined () { + 23 if $]; + } =head2 Overriding Builtin Functions -Many builtin functions may be overridden, though this should only be -tried occasionally and for good reason. Typically this might be +Many builtin functions may be overridden, though this should be tried +only occasionally and for good reason. Typically this might be done by a package attempting to emulate missing builtin functionality on a non-Unix system. -Overriding may only be done by importing the name from a +Overriding may be done only by importing the name from a module--ordinary predeclaration isn't good enough. However, the C<subs> pragma (compiler directive) lets you, in effect, predeclare subs via the import syntax, and these names may then override the builtin ones: @@ -722,7 +837,7 @@ via the import syntax, and these names may then override the builtin ones: sub chdir { ... } Library modules should not in general export builtin names like "open" -or "chdir" as part of their default @EXPORT list, since these may +or "chdir" as part of their default @EXPORT list, because these may sneak into someone else's namespace and change the semantics unexpectedly. Instead, if the module adds the name to the @EXPORT_OK list, then it's possible for a user to import the name explicitly, but not implicitly. @@ -762,12 +877,12 @@ should just call system() with those arguments. All you'd do is this: my $program = $AUTOLOAD; $program =~ s/.*:://; system($program, @_); - } + } date(); - who('am', i'); + who('am', 'i'); ls('-l'); -In fact, if you preclare the functions you want to call that way, you don't +In fact, if you predeclare the functions you want to call that way, you don't even need the parentheses: use subs qw(date who ls); @@ -779,13 +894,14 @@ A more complete example of this is the standard Shell module, which can treat undefined subroutine calls as calls to Unix programs. Mechanisms are available for modules writers to help split the modules -up into autoloadable files. See the standard AutoLoader module described -in L<Autoloader>, the standard SelfLoader modules in L<SelfLoader>, and -the document on adding C functions to perl code in L<perlxs>. +up into autoloadable files. See the standard AutoLoader module +described in L<AutoLoader> and in L<AutoSplit>, the standard +SelfLoader modules in L<SelfLoader>, and the document on adding C +functions to perl code in L<perlxs>. =head1 SEE ALSO See L<perlref> for more on references. See L<perlxs> if you'd -like to learn about calling C subroutines from perl. See -L<perlmod> to learn about bundling up your functions in +like to learn about calling C subroutines from perl. See +L<perlmod> to learn about bundling up your functions in separate files. diff --git a/pod/perlsyn.pod b/pod/perlsyn.pod index c3ef4501dd..9c3f6617bd 100644 --- a/pod/perlsyn.pod +++ b/pod/perlsyn.pod @@ -32,20 +32,23 @@ that. A declaration can be put anywhere a statement can, but has no effect on the execution of the primary sequence of statements--declarations all take effect at compile time. Typically all the declarations are put at -the beginning or the end of the script. However, if you're using +the beginning or the end of the script. However, if you're using lexically-scoped private variables created with my(), you'll have to make sure your format or subroutine definition is within the same block scope -as the my if you expect to to be able to access those private variables. +as the my if you expect to be able to access those private variables. Declaring a subroutine allows a subroutine name to be used as if it were a list operator from that point forward in the program. You can declare a -subroutine (prototyped to take one scalar parameter) without defining it by saying just: +subroutine without defining it by saying C<sub name>, thus: - sub myname ($); + sub myname; $me = myname $0 or die "can't get myname"; -Note that it functions as a list operator though, not as a unary -operator, so be careful to use C<or> instead of C<||> there. +Note that it functions as a list operator, not as a unary operator; so +be careful to use C<or> instead of C<||> in this case. However, if +you were to declare the subroutine as C<sub myname ($)>, then +C<myname> would functonion as a unary operator, so either C<or> or +C<||> would work. Subroutines declarations can also be loaded up with the C<require> statement or both loaded and imported into your namespace with a C<use> statement. @@ -63,9 +66,9 @@ The only kind of simple statement is an expression evaluated for its side effects. Every simple statement must be terminated with a semicolon, unless it is the final statement in a block, in which case the semicolon is optional. (A semicolon is still encouraged there if the -block takes up more than one line, since you may eventually add another line.) +block takes up more than one line, because you may eventually add another line.) Note that there are some operators like C<eval {}> and C<do {}> that look -like compound statements, but aren't (they're just TERMs in an expression), +like compound statements, but aren't (they're just TERMs in an expression), and thus need an explicit termination if used as the last item in a statement. Any simple statement may optionally be followed by a I<SINGLE> modifier, @@ -91,7 +94,7 @@ can write loops like: } until $line eq ".\n"; See L<perlfunc/do>. Note also that the loop control -statements described later will I<NOT> work in this construct, since +statements described later will I<NOT> work in this construct, because modifiers don't take loop labels. Sorry. You can always wrap another block around it to do that sort of thing. @@ -128,7 +131,7 @@ all do the same thing: open(FOO) ? 'hi mom' : die "Can't open $FOO: $!"; # a bit exotic, that last one -The C<if> statement is straightforward. Since BLOCKs are always +The C<if> statement is straightforward. Because BLOCKs are always bounded by curly brackets, there is never any ambiguity about which C<if> an C<else> goes with. If you use C<unless> in place of C<if>, the sense of the test is reversed. @@ -178,25 +181,26 @@ want to skip ahead and get the next record. while (<>) { chomp; - if (s/\\$//) { - $_ .= <>; + if (s/\\$//) { + $_ .= <>; redo unless eof(); } # now process $_ - } + } which is Perl short-hand for the more explicitly written version: - LINE: while ($line = <ARGV>) { + LINE: while (defined($line = <ARGV>)) { chomp($line); - if ($line =~ s/\\$//) { - $line .= <ARGV>; + if ($line =~ s/\\$//) { + $line .= <ARGV>; redo LINE unless eof(); # not eof(ARGV)! } # now process $line - } + } -Or here's a a simpleminded Pascal comment stripper (warning: assumes no { or } in strings) +Or here's a simpleminded Pascal comment stripper (warning: assumes no +{ or } in strings). LINE: while (<STDIN>) { while (s|({.*}.*){.*}|$1 |) {} @@ -220,11 +224,8 @@ If the word C<while> is replaced by the word C<until>, the sense of the test is reversed, but the conditional is still tested before the first iteration. -In either the C<if> or the C<while> statement, you may replace "(EXPR)" -with a BLOCK, and the conditional is true if the value of the last -statement in that block is true. While this "feature" continues to work in -version 5, it has been deprecated, so please change any occurrences of "if BLOCK" to -"if (do BLOCK)". +The form C<while/if BLOCK BLOCK>, available in Perl 4, is no longer +available. Replace any occurrence of C<if BLOCK> by C<if (do BLOCK)>. =head2 For Loops @@ -244,27 +245,32 @@ is the same as this: $i++; } +(There is one minor difference: The first form implies a lexical scope +for variables declared with C<my> in the initialization expression.) + Besides the normal array index looping, C<for> can lend itself to many other interesting applications. Here's one that avoids the -problem you get into if you explicitly test for end-of-file on -an interactive file descriptor causing your program to appear to +problem you get into if you explicitly test for end-of-file on +an interactive file descriptor causing your program to appear to hang. $on_a_tty = -t STDIN && -t STDOUT; sub prompt { print "yes? " if $on_a_tty } for ( prompt(); <STDIN>; prompt() ) { # do something - } + } =head2 Foreach Loops The C<foreach> loop iterates over a normal list value and sets the -variable VAR to be each element of the list in turn. The variable is -implicitly local to the loop and regains its former value upon exiting the -loop. If the variable was previously declared with C<my>, it uses that -variable instead of the global one, but it's still localized to the loop. -This can cause problems if you have subroutine or format declarations -within that block's scope. +variable VAR to be each element of the list in turn. If the variable +is preceded with the keyword C<my>, then it is lexically scoped, and +is therefore visible only within the loop. Otherwise, the variable is +implicitly local to the loop and regains its former value upon exiting +the loop. If the variable was previously declared with C<my>, it uses +that variable instead of the global one, but it's still localized to +the loop. (Note that a lexically scoped variable can cause problems +with you have subroutine or format declarations.) The C<foreach> keyword is actually a synonym for the C<for> keyword, so you can use C<foreach> for readability or C<for> for brevity. If VAR is @@ -278,7 +284,7 @@ Examples: for (@ary) { s/foo/bar/ } - foreach $elem (@elements) { + foreach my $elem (@elements) { $elem *= 2; } @@ -294,8 +300,8 @@ Examples: Here's how a C programmer might code up a particular algorithm in Perl: - for ($i = 0; $i < @ary1; $i++) { - for ($j = 0; $j < @ary2; $j++) { + for (my $i = 0; $i < @ary1; $i++) { + for (my $j = 0; $j < @ary2; $j++) { if ($ary1[$i] > $ary2[$j]) { last; # can't go to outer :-( } @@ -304,32 +310,32 @@ Here's how a C programmer might code up a particular algorithm in Perl: # this is where that last takes me } -Whereas here's how a Perl programmer more confortable with the idiom might +Whereas here's how a Perl programmer more comfortable with the idiom might do it: - OUTER: foreach $wid (@ary1) { - INNER: foreach $jet (@ary2) { + OUTER: foreach my $wid (@ary1) { + INNER: foreach my $jet (@ary2) { next OUTER if $wid > $jet; $wid += $jet; - } - } + } + } See how much easier this is? It's cleaner, safer, and faster. It's cleaner because it's less noisy. It's safer because if code gets added between the inner and outer loops later on, the new code won't be -accidentally executed, the C<next> explicitly iterates the other loop +accidentally executed. The C<next> explicitly iterates the other loop rather than merely terminating the inner one. And it's faster because Perl executes a C<foreach> statement more rapidly than it would the equivalent C<for> loop. =head2 Basic BLOCKs and Switch Statements -A BLOCK by itself (labeled or not) is semantically equivalent to a loop -that executes once. Thus you can use any of the loop control -statements in it to leave or restart the block. (Note that this -is I<NOT> true in C<eval{}>, C<sub{}>, or contrary to popular belief C<do{}> blocks, -which do I<NOT> count as loops.) The C<continue> block -is optional. +A BLOCK by itself (labeled or not) is semantically equivalent to a +loop that executes once. Thus you can use any of the loop control +statements in it to leave or restart the block. (Note that this is +I<NOT> true in C<eval{}>, C<sub{}>, or contrary to popular belief +C<do{}> blocks, which do I<NOT> count as loops.) The C<continue> +block is optional. The BLOCK construct is particularly nice for doing case structures. @@ -368,19 +374,19 @@ or or formatted so it stands out more as a "proper" switch statement: SWITCH: { - /^abc/ && do { - $abc = 1; - last SWITCH; + /^abc/ && do { + $abc = 1; + last SWITCH; }; - /^def/ && do { - $def = 1; - last SWITCH; + /^def/ && do { + $def = 1; + last SWITCH; }; - /^xyz/ && do { - $xyz = 1; - last SWITCH; + /^xyz/ && do { + $xyz = 1; + last SWITCH; }; $nothing = 1; } @@ -414,14 +420,14 @@ a temporary assignment to $_ for convenient matching: /Anywhere/ && do { push @flags, '-h'; last; }; /In Rulings/ && do { last; }; die "unknown value for form variable where: `$where'"; - } + } Another interesting approach to a switch statement is arrange for a C<do> block to return the proper value: $amode = do { - if ($flag & O_RDONLY) { "r" } - elsif ($flag & O_WRONLY) { ($flag & O_APPEND) ? "a" : "w" } + if ($flag & O_RDONLY) { "r" } + elsif ($flag & O_WRONLY) { ($flag & O_APPEND) ? "a" : "w" } elsif ($flag & O_RDWR) { if ($flag & O_CREAT) { "w+" } else { ($flag & O_APPEND) ? "a+" : "r+" } @@ -473,14 +479,14 @@ encounters a line that begins with an equal sign and a word, like this Then that text and all remaining text up through and including a line beginning with C<=cut> will be ignored. The format of the intervening -text is described in L<perlpod>. +text is described in L<perlpod>. This allows you to intermix your source code and your documentation text freely, as in =item snazzle($) - The snazzle() function will behave in the most spectacular + The snazzle() function will behave in the most spectacular form that you can possibly imagine, not even excepting cybernetic pyrotechnics. @@ -489,11 +495,11 @@ and your documentation text freely, as in sub snazzle($) { my $thingie = shift; ......... - } + } -Note that pod translators should only look at paragraphs beginning -with a pod diretive (it makes parsing easier), whereas the compiler -actually knows to look for pod escapes even in the middle of a +Note that pod translators should look at only paragraphs beginning +with a pod directive (it makes parsing easier), whereas the compiler +actually knows to look for pod escapes even in the middle of a paragraph. This means that the following secret stuff will be ignored by both the compiler and the translators. @@ -506,3 +512,47 @@ ignored by both the compiler and the translators. You probably shouldn't rely upon the warn() being podded out forever. Not all pod translators are well-behaved in this regard, and perhaps the compiler will become pickier. + +One may also use pod directives to quickly comment out a section +of code. + +=head2 Plain Old Comments (Not!) + +Much like the C preprocessor, perl can process line directives. Using +this, one can control perl's idea of filenames and line numbers in +error or warning messages (especially for strings that are processed +with eval()). The syntax for this mechanism is the same as for most +C preprocessors: it matches the regular expression +C</^#\s*line\s+(\d+)\s*(?:\s"([^"]*)")?/> with C<$1> being the line +number for the next line, and C<$2> being the optional filename +(specified within quotes). + +Here are some examples that you should be able to type into your command +shell: + + % perl + # line 200 "bzzzt" + # the `#' on the previous line must be the first char on line + die 'foo'; + __END__ + foo at bzzzt line 201. + + % perl + # line 200 "bzzzt" + eval qq[\n#line 2001 ""\ndie 'foo']; print $@; + __END__ + foo at - line 2001. + + % perl + eval qq[\n#line 200 "foo bar"\ndie 'foo']; print $@; + __END__ + foo at foo bar line 200. + + % perl + # line 345 "goop" + eval "\n#line " . __LINE__ . ' "' . __FILE__ ."\"\ndie 'foo'"; + print $@; + __END__ + foo at goop line 345. + +=cut diff --git a/pod/perltie.pod b/pod/perltie.pod index 96f61eb436..c6eb7156ce 100644 --- a/pod/perltie.pod +++ b/pod/perltie.pod @@ -13,8 +13,8 @@ perltie - how to hide an object class in a simple variable =head1 DESCRIPTION Prior to release 5.0 of Perl, a programmer could use dbmopen() -to magically connect an on-disk database in the standard Unix dbm(3x) -format to a %HASH in their program. However, their Perl was either +to connect an on-disk database in the standard Unix dbm(3x) +format magically to a %HASH in their program. However, their Perl was either built with one particular dbm library or another, but not both, and you couldn't extend this mechanism to other packages or types of variables. @@ -33,13 +33,14 @@ In the tie() call, C<VARIABLE> is the name of the variable to be enchanted. C<CLASSNAME> is the name of a class implementing objects of the correct type. Any additional arguments in the C<LIST> are passed to the appropriate constructor method for that class--meaning TIESCALAR(), -TIEARRAY(), or TIEHASH(). (Typically these are arguments such as might be -passed to the dbminit() function of C.) The object returned by the "new" -method is also returned by the tie() function, which would be useful if -you wanted to access other methods in C<CLASSNAME>. (You don't actually -have to return a reference to a right "type" (e.g. HASH or C<CLASSNAME>) -so long as it's a properly blessed object.) You can also retrieve -a reference to the underlying object using the tied() function. +TIEARRAY(), TIEHASH(), or TIEHANDLE(). (Typically these are arguments +such as might be passed to the dbminit() function of C.) The object +returned by the "new" method is also returned by the tie() function, +which would be useful if you wanted to access other methods in +C<CLASSNAME>. (You don't actually have to return a reference to a right +"type" (e.g., HASH or C<CLASSNAME>) so long as it's a properly blessed +object.) You can also retrieve a reference to the underlying object +using the tied() function. Unlike dbmopen(), the tie() function will not C<use> or C<require> a module for you--you need to do that explicitly yourself. @@ -59,10 +60,10 @@ And now whenever either of those variables is accessed, its current system priority is retrieved and returned. If those variables are set, then the process's priority is changed! -We'll use Jarkko Hietaniemi F<E<lt>Jarkko.Hietaniemi@hut.fiE<gt>>'s -BSD::Resource class (not included) to access the PRIO_PROCESS, PRIO_MIN, -and PRIO_MAX constants from your system, as well as the getpriority() and -setpriority() system calls. Here's the preamble of the class. +We'll use Jarkko Hietaniemi <F<jhi@iki.fi>>'s BSD::Resource class (not +included) to access the PRIO_PROCESS, PRIO_MIN, and PRIO_MAX constants +from your system, as well as the getpriority() and setpriority() system +calls. Here's the preamble of the class. package Nice; use Carp; @@ -104,8 +105,8 @@ variable C<$^W> to see whether to emit a bit of noise anyway. This method will be triggered every time the tied variable is accessed (read). It takes no arguments beyond its self reference, which is the -object representing the scalar we're dealing with. Since in this case -we're just using a SCALAR ref for the tied scalar object, a simple $$self +object representing the scalar we're dealing with. Because in this case +we're using just a SCALAR ref for the tied scalar object, a simple $$self allows the method to get at the real value stored there. In our example below, that real value is the process ID to which we've tied our variable. @@ -159,7 +160,7 @@ argument--the new value the user is trying to assign. =item DESTROY this This method will be triggered when the tied variable needs to be destructed. -As with other object classes, such a method is seldom ncessary, since Perl +As with other object classes, such a method is seldom necessary, because Perl deallocates its moribund object's memory for you automatically--this isn't C++, you know. We'll use a DESTROY method here for debugging purposes only. @@ -172,7 +173,7 @@ C++, you know. We'll use a DESTROY method here for debugging purposes only. =back That's about all there is to it. Actually, it's more than all there -is to it, since we've done a few nice things here for the sake +is to it, because we've done a few nice things here for the sake of completeness, robustness, and general aesthetics. Simpler TIESCALAR classes are certainly possible. @@ -192,7 +193,7 @@ take an exception. (Well, if you access an individual element; an aggregate assignment would be missed.) For example: require Bounded_Array; - tie @ary, Bounded_Array, 2; + tie @ary, 'Bounded_Array', 2; $| = 1; for $i (0 .. 10) { print "setting index $i: "; @@ -252,7 +253,7 @@ As you may have noticed, the name of the FETCH method (et al.) is the same for all accesses, even though the constructors differ in names (TIESCALAR vs TIEARRAY). While in theory you could have the same class servicing several tied types, in practice this becomes cumbersome, and it's easiest -to simply keep them at one tie type per class. +to keep them at simply one tie type per class. =item STORE this, index, value @@ -273,7 +274,7 @@ there. For example: =item DESTROY this This method will be triggered when the tied variable needs to be destructed. -As with the sclar tie class, this is almost never needed in a +As with the scalar tie class, this is almost never needed in a language that does its own garbage collection, so this time we'll just leave it out. @@ -292,19 +293,18 @@ the following output demonstrates: =head2 Tying Hashes -As the first Perl data type to be tied (see dbmopen()), associative arrays -have the most complete and useful tie() implementation. A class -implementing a tied associative array should define the following -methods: TIEHASH is the constructor. FETCH and STORE access the key and -value pairs. EXISTS reports whether a key is present in the hash, and -DELETE deletes one. CLEAR empties the hash by deleting all the key and -value pairs. FIRSTKEY and NEXTKEY implement the keys() and each() -functions to iterate over all the keys. And DESTROY is called when the -tied variable is garbage collected. +As the first Perl data type to be tied (see dbmopen()), hashes have the +most complete and useful tie() implementation. A class implementing a +tied hash should define the following methods: TIEHASH is the constructor. +FETCH and STORE access the key and value pairs. EXISTS reports whether a +key is present in the hash, and DELETE deletes one. CLEAR empties the +hash by deleting all the key and value pairs. FIRSTKEY and NEXTKEY +implement the keys() and each() functions to iterate over all the keys. +And DESTROY is called when the tied variable is garbage collected. -If this seems like a lot, then feel free to merely inherit -from the standard Tie::Hash module for most of your methods, redefining only -the interesting ones. See L<Tie::Hash> for details. +If this seems like a lot, then feel free to inherit from merely the +standard Tie::Hash module for most of your methods, redefining only the +interesting ones. See L<Tie::Hash> for details. Remember that Perl distinguishes between a key not existing in the hash, and the key existing in the hash but having a corresponding value of @@ -312,22 +312,22 @@ C<undef>. The two possibilities can be tested with the C<exists()> and C<defined()> functions. Here's an example of a somewhat interesting tied hash class: it gives you -a hash representing a particular user's dotfiles. You index into the hash -with the name of the file (minus the dot) and you get back that dotfile's +a hash representing a particular user's dot files. You index into the hash +with the name of the file (minus the dot) and you get back that dot file's contents. For example: use DotFiles; - tie %dot, DotFiles; + tie %dot, 'DotFiles'; if ( $dot{profile} =~ /MANPATH/ || $dot{login} =~ /MANPATH/ || $dot{cshrc} =~ /MANPATH/ ) { - print "you seem to set your manpath\n"; + print "you seem to set your MANPATH\n"; } Or here's another sample of using our tied class: - tie %him, DotFiles, 'daemon'; + tie %him, 'DotFiles', 'daemon'; foreach $f ( keys %him ) { printf "daemon dot file %s is size %d\n", $f, length $him{$f}; @@ -346,7 +346,7 @@ whose dot files this object represents =item HOME -where those dotfiles live +where those dot files live =item CLOBBER @@ -354,7 +354,7 @@ whether we should try to change or remove those dot files =item LIST -the hash of dotfile names and content mappings +the hash of dot file names and content mappings =back @@ -366,7 +366,7 @@ Here's the start of F<Dotfiles.pm>: my $DEBUG = 0; sub debug { $DEBUG = @_ ? shift : 1 } -For our example, we want to able to emit debugging info to help in tracing +For our example, we want to be able to emit debugging info to help in tracing during development. We keep also one convenience function around internally to help print out warnings; whowasi() returns the function name that calls it. @@ -412,8 +412,8 @@ Here's the constructor: It's probably worth mentioning that if you're going to filetest the return values out of a readdir, you'd better prepend the directory -in question. Otherwise, since we didn't chdir() there, it would -have been testing the wrong file. +in question. Otherwise, because we didn't chdir() there, it would +have been testing the wrong file. =item FETCH this, key @@ -444,7 +444,7 @@ Here's the fetch for our DotFiles example. It was easy to write by having it call the Unix cat(1) command, but it would probably be more portable to open the file manually (and somewhat -more efficient). Of course, since dot files are a Unixy concept, we're +more efficient). Of course, because dot files are a Unixy concept, we're not that concerned. =item STORE this, key, value @@ -509,22 +509,30 @@ be careful to check whether they really want to clobber files. croak "@{[&whowasi]}: won't remove file $file" unless $self->{CLOBBER}; delete $self->{LIST}->{$dot}; - unlink($file) || carp "@{[&whowasi]}: can't unlink $file: $!"; + my $success = unlink($file); + carp "@{[&whowasi]}: can't unlink $file: $!" unless $success; + $success; } +The value returned by DELETE becomes the return value of the call +to delete(). If you want to emulate the normal behavior of delete(), +you should return whatever FETCH would have returned for this key. +In this example, we have chosen instead to return a value which tells +the caller whether the file was successfully deleted. + =item CLEAR this This method is triggered when the whole hash is to be cleared, usually by assigning the empty list to it. -In our example, that would remove all the user's dotfiles! It's such a +In our example, that would remove all the user's dot files! It's such a dangerous thing that they'll have to set CLOBBER to something higher than 1 to make it happen. sub CLEAR { carp &whowasi if $DEBUG; my $self = shift; - croak "@{[&whowasi]}: won't remove all dotfiles for $self->{USER}" + croak "@{[&whowasi]}: won't remove all dot files for $self->{USER}" unless $self->{CLOBBER} > 1; my $dot; foreach $dot ( keys %{$self->{LIST}}) { @@ -565,8 +573,8 @@ second argument which is the last key that had been accessed. This is useful if you're carrying about ordering or calling the iterator from more than one sequence, or not really storing things in a hash anywhere. -For our example, we our using a real hash so we'll just do the simple -thing, but we'll have to indirect through the LIST field. +For our example, we're using a real hash so we'll do just the simple +thing, but we'll have to go through the LIST field indirectly. sub NEXTKEY { carp &whowasi if $DEBUG; @@ -592,7 +600,7 @@ use the each() function to iterate over such. Example: # print out history file offsets use NDBM_File; - tie(%HIST, NDBM_File, '/usr/lib/news/history', 1, 0); + tie(%HIST, 'NDBM_File', '/usr/lib/news/history', 1, 0); while (($key,$val) = each %HIST) { print $key, ' = ', unpack('L',$val), "\n"; } @@ -600,7 +608,220 @@ use the each() function to iterate over such. Example: =head2 Tying FileHandles -This isn't implemented yet. Sorry; maybe someday. +This is partially implemented now. + +A class implementing a tied filehandle should define the following +methods: TIEHANDLE, at least one of PRINT, PRINTF, READLINE, GETC, or READ, +and possibly DESTROY. + +It is especially useful when perl is embedded in some other program, +where output to STDOUT and STDERR may have to be redirected in some +special way. See nvi and the Apache module for examples. + +In our example we're going to create a shouting handle. + + package Shout; + +=over + +=item TIEHANDLE classname, LIST + +This is the constructor for the class. That means it is expected to +return a blessed reference of some sort. The reference can be used to +hold some internal information. + + sub TIEHANDLE { print "<shout>\n"; my $i; bless \$i, shift } + +=item PRINT this, LIST + +This method will be triggered every time the tied handle is printed to +with the C<print()> function. +Beyond its self reference it also expects the list that was passed to +the print function. + + sub PRINT { $r = shift; $$r++; print join($,,map(uc($_),@_)),$\ } + +=item PRINTF this, LIST + +This method will be triggered every time the tied handle is printed to +with the C<printf()> function. +Beyond its self reference it also expects the format and list that was +passed to the printf function. + + sub PRINTF { + shift; + my $fmt = shift; + print sprintf($fmt, @_)."\n"; + } + +=item READ this LIST + +This method will be called when the handle is read from via the C<read> +or C<sysread> functions. + + sub READ { + $r = shift; + my($buf,$len,$offset) = @_; + print "READ called, \$buf=$buf, \$len=$len, \$offset=$offset"; + } + +=item READLINE this + +This method will be called when the handle is read from via <HANDLE>. +The method should return undef when there is no more data. + + sub READLINE { $r = shift; "PRINT called $$r times\n"; } + +=item GETC this + +This method will be called when the C<getc> function is called. + + sub GETC { print "Don't GETC, Get Perl"; return "a"; } + +=item DESTROY this + +As with the other types of ties, this method will be called when the +tied handle is about to be destroyed. This is useful for debugging and +possibly cleaning up. + + sub DESTROY { print "</shout>\n" } + +=back + +Here's how to use our little example: + + tie(*FOO,'Shout'); + print FOO "hello\n"; + $a = 4; $b = 6; + print FOO $a, " plus ", $b, " equals ", $a + $b, "\n"; + print <FOO>; + +=head2 The C<untie> Gotcha + +If you intend making use of the object returned from either tie() or +tied(), and if the tie's target class defines a destructor, there is a +subtle gotcha you I<must> guard against. + +As setup, consider this (admittedly rather contrived) example of a +tie; all it does is use a file to keep a log of the values assigned to +a scalar. + + package Remember; + + use strict; + use IO::File; + + sub TIESCALAR { + my $class = shift; + my $filename = shift; + my $handle = new IO::File "> $filename" + or die "Cannot open $filename: $!\n"; + + print $handle "The Start\n"; + bless {FH => $handle, Value => 0}, $class; + } + + sub FETCH { + my $self = shift; + return $self->{Value}; + } + + sub STORE { + my $self = shift; + my $value = shift; + my $handle = $self->{FH}; + print $handle "$value\n"; + $self->{Value} = $value; + } + + sub DESTROY { + my $self = shift; + my $handle = $self->{FH}; + print $handle "The End\n"; + close $handle; + } + + 1; + +Here is an example that makes use of this tie: + + use strict; + use Remember; + + my $fred; + tie $fred, 'Remember', 'myfile.txt'; + $fred = 1; + $fred = 4; + $fred = 5; + untie $fred; + system "cat myfile.txt"; + +This is the output when it is executed: + + The Start + 1 + 4 + 5 + The End + +So far so good. Those of you who have been paying attention will have +spotted that the tied object hasn't been used so far. So lets add an +extra method to the Remember class to allow comments to be included in +the file -- say, something like this: + + sub comment { + my $self = shift; + my $text = shift; + my $handle = $self->{FH}; + print $handle $text, "\n"; + } + +And here is the previous example modified to use the C<comment> method +(which requires the tied object): + + use strict; + use Remember; + + my ($fred, $x); + $x = tie $fred, 'Remember', 'myfile.txt'; + $fred = 1; + $fred = 4; + comment $x "changing..."; + $fred = 5; + untie $fred; + system "cat myfile.txt"; + +When this code is executed there is no output. Here's why: + +When a variable is tied, it is associated with the object which is the +return value of the TIESCALAR, TIEARRAY, or TIEHASH function. This +object normally has only one reference, namely, the implicit reference +from the tied variable. When untie() is called, that reference is +destroyed. Then, as in the first example above, the object's +destructor (DESTROY) is called, which is normal for objects that have +no more valid references; and thus the file is closed. + +In the second example, however, we have stored another reference to +the tied object in C<$x>. That means that when untie() gets called +there will still be a valid reference to the object in existence, so +the destructor is not called at that time, and thus the file is not +closed. The reason there is no output is because the file buffers +have not been flushed to disk. + +Now that you know what the problem is, what can you do to avoid it? +Well, the good old C<-w> flag will spot any instances where you call +untie() and there are still valid references to the tied object. If +the second script above is run with the C<-w> flag, Perl prints this +warning message: + + untie attempted while 1 inner references still exist + +To get the script to work properly and silence the warning make sure +there are no valid references to the tied object I<before> untie() is +called: + + undef $x; + untie $fred; =head1 SEE ALSO @@ -617,10 +838,12 @@ You cannot easily tie a multilevel data structure (such as a hash of hashes) to a dbm file. The first problem is that all but GDBM and Berkeley DB have size limitations, but beyond that, you also have problems with how references are to be represented on disk. One experimental -module that does attempt to partially address this need is the MLDBM -module. Check your nearest CPAN site as described in L<perlmod> for +module that does attempt to address this need partially is the MLDBM +module. Check your nearest CPAN site as described in L<perlmodlib> for source code to MLDBM. =head1 AUTHOR Tom Christiansen + +TIEHANDLE by Sven Verdoolaege <F<skimo@dns.ufsia.ac.be>> and Doug MacEachern <F<dougm@osf.org>> diff --git a/pod/perltoc.pod b/pod/perltoc.pod index d761fcb150..d58f12cf86 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -6,3125 +6,4950 @@ perltoc - perl documentation table of contents =head1 DESCRIPTION This page provides a brief table of contents for the rest of the Perl -documentation set. It is meant to be be quickly scanned or grepped +documentation set. It is meant to be scanned quickly or grepped through to locate the proper section you're looking for. =head1 BASIC DOCUMENTATION - - - =head2 perl - Practical Extraction and Report Language =item SYNOPSIS - =item DESCRIPTION - Many usability enhancements, Simplified grammar, Lexical scoping, Arbitrarily nested data structures, Modularity and reusability, -Object-oriented programming, Embeddable and Extensible, POSIX -compliant, Package constructors and destructors, Multiple simultaneous -DBM implementations, Subroutine definitions may now be autoloaded, -Regular expression enhancements +Object-oriented programming, Embeddable and Extensible, POSIX compliant, +Package constructors and destructors, Multiple simultaneous DBM +implementations, Subroutine definitions may now be autoloaded, Regular +expression enhancements, Innumerable Unbundled Modules, Compilability =item ENVIRONMENT - -HOME, LOGDIR, PATH, PERL5LIB, PERL5DB, PERLLIB - =item AUTHOR - =item FILES - =item SEE ALSO - =item DIAGNOSTICS - =item BUGS - =item NOTES +=head2 perlfaq - frequently asked questions about Perl ($Date: 1997/04/24 +22:46:06 $) +=item DESCRIPTION +perlfaq: Structural overview of the FAQ, L<perlfaq1>: General Questions +About Perl, L<perlfaq2>: Obtaining and Learning about Perl, L<perlfaq3>: +Programming Tools, L<perlfaq4>: Data Manipulation, L<perlfaq5>: Files and +Formats, L<perlfaq6>: Regexps, L<perlfaq7>: General Perl Language Issues, +L<perlfaq8>: System Interaction, L<perlfaq9>: Networking +=over -=head2 perldata - Perl data types +=item Where to get this document -=item DESCRIPTION +=item How to contribute to this document +=item What will happen if you mail your Perl programming problems to the +authors + +=back + +=item Credits + +=item Author and Copyright Information =over -=item Variable names +=item Noncommercial Reproduction +=item Commercial Reproduction -=item Context +=item Disclaimer +=back -=item Scalar values +=item Changes +24/April/97, 23/April/97, 25/March/97, 18/March/97, 17/March/97 Version, +Initial Release: 11/March/97 -=item Scalar value constructors +=head2 perlfaq1 - General Questions About Perl ($Revision: 1.12 $, $Date: +1997/04/24 22:43:34 $) +=item DESCRIPTION -=item List value constructors +=over +=item What is Perl? -=item Typeglobs and FileHandles +=item Who supports Perl? Who develops it? Why is it free? +=item Which version of Perl should I use? +=item What are perl4 and perl5? +=item How stable is Perl? -=back +=item Is Perl difficult to learn? +=item How does Perl compare with other languages like Java, Python, REXX, +Scheme, or Tcl? +=item Can I do [task] in Perl? +=item When shouldn't I program in Perl? -=head2 perlsyn - Perl syntax +=item What's the difference between "perl" and "Perl"? -=item DESCRIPTION +=item Is it a Perl program or a Perl script? + +=item What is a JAPH? +=item Where can I get a list of Larry Wall witticisms? + +=item How can I convince my sysadmin/supervisor/employees to use version +(5/5.004/Perl instead of some other language)? + +=back + +=item AUTHOR AND COPYRIGHT + +=head2 perlfaq2 - Obtaining and Learning about Perl ($Revision: 1.16 $, +$Date: 1997/04/23 18:04:09 $) + +=item DESCRIPTION =over -=item Declarations +=item What machines support Perl? Where do I get it? +=item How can I get a binary version of Perl? -=item Simple statements +=item I don't have a C compiler on my system. How can I compile perl? +=item I copied the Perl binary from one machine to another, but scripts +don't work. -=item Compound statements +=item I grabbed the sources and tried to compile but gdbm/dynamic +loading/malloc/linking/... failed. How do I make it work? +=item What modules and extensions are available for Perl? What is CPAN? +What does CPAN/src/... mean? -=item Loop Control +=item Is there an ISO or ANSI certified version of Perl? +=item Where can I get information on Perl? -=item For Loops +=item What are the Perl newsgroups on USENET? Where do I post questions? +=item Where should I post source code? -=item Foreach Loops +=item Perl Books +=item Perl in Magazines -=item Basic BLOCKs and Switch Statements +=item Perl on the Net: FTP and WWW Access +=item What mailing lists are there for perl? -=item Goto +MacPerl, Perl5-Porters, NTPerl, Perl-Packrats +=item Archives of comp.lang.perl.misc -=item PODs: Embedded Documentation +=item Perl Training +=item Where can I buy a commercial version of Perl? +=item Where do I send bug reports? +=item What is perl.com? perl.org? The Perl Institute? + +=item How do I learn about object-oriented Perl programming? =back +=item AUTHOR AND COPYRIGHT +=head2 perlfaq3 - Programming Tools ($Revision: 1.22 $, $Date: 1997/04/24 +22:43:42 $) +=item DESCRIPTION -=head2 perlop - Perl operators and precedence +=over -=item SYNOPSIS +=item How do I do (anything)? +=item How can I use Perl interactively? -=item DESCRIPTION +=item Is there a Perl shell? +=item How do I debug my Perl programs? -=over +=item How do I profile my Perl programs? -=item Terms and List Operators (Leftward) +=item How do I cross-reference my Perl programs? +=item Is there a pretty-printer (formatter) for Perl? -=item The Arrow Operator +=item Is there a ctags for Perl? +=item Where can I get Perl macros for vi? -=item Autoincrement and Autodecrement +=item Where can I get perl-mode for emacs? +=item How can I use curses with Perl? -=item Exponentiation +=item How can I use X or Tk with Perl? +=item How can I generate simple menus without using CGI or Tk? -=item Symbolic Unary Operators +=item Can I dynamically load C routines into Perl? +=item What is undump? -=item Binding Operators +=item How can I make my Perl program run faster? +=item How can I make my Perl program take less memory? -=item Multiplicative Operators +=item Is it unsafe to return a pointer to local data? +=item How can I free an array or hash so my program shrinks? -=item Additive Operators +=item How can I make my CGI script more efficient? +=item How can I hide the source for my Perl program? -=item Shift Operators +=item How can I compile my Perl program into byte code or C? +=item How can I get '#!perl' to work on [MS-DOS,NT,...]? -=item Named Unary Operators +=item Can I write useful perl programs on the command line? +=item Why don't perl one-liners work on my DOS/Mac/VMS system? -=item Relational Operators +=item Where can I learn about CGI or Web programming in Perl? +=item Where can I learn about object-oriented Perl programming? -=item Equality Operators +=item Where can I learn about linking C with Perl? [h2xs, xsubpp] +=item I've read perlembed, perlguts, etc., but I can't embed perl in +my C program, what am I doing wrong? -=item Bitwise And +=item When I tried to run my script, I got this message. What does it +mean? +=item What's MakeMaker? -=item Bitwise Or and Exclusive Or +=back +=item AUTHOR AND COPYRIGHT -=item C-style Logical And +=head2 perlfaq4 - Data Manipulation ($Revision: 1.19 $, $Date: 1997/04/24 +22:43:57 $) +=item DESCRIPTION -=item C-style Logical Or +=item Data: Numbers +=over -=item Range Operator +=item Why am I getting long decimals (eg, 19.9499999999999) instead of the +numbers I should be getting (eg, 19.95)? +=item Why isn't my octal data interpreted correctly? -=item Conditional Operator +=item Does perl have a round function? What about ceil() and floor()? +Trig functions? +=item How do I convert bits into ints? -=item Assignment Operators +=item How do I multiply matrices? +=item How do I perform an operation on a series of integers? -=item Comma Operator +=item How can I output Roman numerals? +=item Why aren't my random numbers random? -=item List Operators (Rightward) +=back +=item Data: Dates -=item Logical Not +=over +=item How do I find the week-of-the-year/day-of-the-year? -=item Logical And +=item How can I compare two date strings? +=item How can I take a string and turn it into epoch seconds? -=item Logical or and Exclusive Or +=item How can I find the Julian Day? +=item Does Perl have a year 2000 problem? -=item C Operators Missing From Perl +=back +=item Data: Strings -unary &, unary *, (TYPE) +=over -=item Quote and Quotelike Operators +=item How do I validate input? +=item How do I unescape a string? -=item Regexp Quotelike Operators +=item How do I remove consecutive pairs of characters? +=item How do I expand function calls in a string? -?PATTERN?, m/PATTERN/gimosx, /PATTERN/gimosx, q/STRING/, C<'STRING'>, -qq/STRING/, "STRING", qx/STRING/, `STRING`, qw/STRING/, -s/PATTERN/REPLACEMENT/egimosx, tr/SEARCHLIST/REPLACEMENTLIST/cds, -y/SEARCHLIST/REPLACEMENTLIST/cds +=item How do I find matching/nesting anything? -=item I/O Operators +=item How do I reverse a string? +=item How do I expand tabs in a string? -=item Constant Folding +=item How do I reformat a paragraph? +=item How can I access/change the first N letters of a string? -=item Integer arithmetic +=item How do I change the Nth occurrence of something? +=item How can I count the number of occurrences of a substring within a +string? +=item How do I capitalize all the words on one line? +=item How can I split a [character] delimited string except when inside +[character]? (Comma-separated files) -=back +=item How do I strip blank space from the beginning/end of a string? +=item How do I extract selected columns from a string? +=item How do I find the soundex value of a string? +=item How can I expand variables in text strings? -=head2 perlre - Perl regular expressions +=item What's wrong with always quoting "$vars"? -=item DESCRIPTION +=item Why don't my <<HERE documents work? +1. There must be no space after the << part, 2. There (probably) should be +a semicolon at the end, 3. You can't (easily) have any space in front of +the tag + +=back + +=item Data: Arrays =over -=item Regular Expressions +=item What is the difference between $array[1] and @array[1]? +=item How can I extract just the unique elements of an array? -(?#text), (?:regexp), (?=regexp), (?!regexp), (?imsx) +a) If @in is sorted, and you want @out to be sorted:, b) If you don't know +whether @in is sorted:, c) Like (b), but @in contains only small integers:, +d) A way to do (b) without any loops or greps:, e) Like (d), but @in +contains only small positive integers: -=item Backtracking +=item How can I tell whether an array contains a certain element? +=item How do I compute the difference of two arrays? How do I compute the +intersection of two arrays? -=item Version 8 Regular Expressions +=item How do I find the first array element for which a condition is true? +=item How do I handle linked lists? -=item WARNING on \1 vs $1 +=item How do I handle circular lists? +=item How do I shuffle an array randomly? +=item How do I process/modify each element of an array? +=item How do I select a random element from an array? + +=item How do I permute N elements of a list? + +=item How do I sort an array by (anything)? + +=item How do I manipulate arrays of bits? + +=item Why does defined() return true on empty arrays and hashes? =back +=item Data: Hashes (Associative Arrays) +=over +=item How do I process an entire hash? -=head2 perlrun - how to execute the Perl interpreter +=item What happens if I add or remove keys from a hash while iterating over +it? -=item SYNOPSIS +=item How do I look up a hash element by value? +=item How can I know how many entries are in a hash? -=item DESCRIPTION +=item How do I sort a hash (optionally by value instead of key)? +=item How can I always keep my hash sorted? -=over +=item What's the difference between "delete" and "undef" with hashes? -=item Switches +=item Why don't my tied hashes make the defined/exists distinction? + +=item How do I reset an each() operation part-way through? + +=item How can I get the unique keys from two hashes? +=item How can I store a multidimensional array in a DBM file? -B<-0>I<digits>, B<-a>, B<-c>, B<-d>, B<-d:foo>, B<-D>I<number>, -B<-D>I<list>, B<-e> I<commandline>, B<-F>I<regexp>, B<-i>I<extension>, -B<-I>I<directory>, B<-l>I<octnum>, B<-m>I<module>, B<-M>I<module>, -B<-n>, B<-p>, B<-P>, B<-s>, B<-S>, B<-T>, B<-u>, B<-U>, B<-v>, B<-V>, -B<-V:name>, B<-w>, B<-x> I<directory> +=item How can I make my hash remember the order I put elements into it? +=item Why does passing a subroutine an undefined element in a hash create +it? +=item How can I make the Perl equivalent of a C structure/C++ class/hash or +array of hashes or arrays? + +=item How can I use a reference as a hash key? =back +=item Data: Misc +=over +=item How do I handle binary data correctly? -=head2 perlfunc - Perl builtin functions +=item How do I determine whether a scalar is a number/whole/integer/float? -=item DESCRIPTION +=item How do I keep persistent data across program calls? +=item How do I print out or copy a recursive data structure? +=item How do I define methods for every class/object? +=item How do I verify a credit card checksum? + +=back + +=item AUTHOR AND COPYRIGHT + +=head2 perlfaq5 - Files and Formats ($Revision: 1.22 $, $Date: 1997/04/24 +22:44:02 $) + +=item DESCRIPTION =over -=item Perl Functions by Category +=item How do I flush/unbuffer a filehandle? Why must I do this? +=item How do I change one line in a file/delete a line in a file/insert a +line in the middle of a file/append to the beginning of a file? -Functions for SCALARs or strings, Regular expressions and pattern -matching, Numeric functions, Functions for real @ARRAYs, Functions for -list data, Functions for real %HASHes, Input and output functions, -Functions for fixed length data or records, Functions for filehandles, -files, or directories, Keywords related to the control flow of your -perl program, Keywords related to scoping, Miscellaneous functions, -Functions for processes and process groups, Keywords related to perl -modules, Keywords related to classes and object-orientedness, Low-level -socket functions, System V interprocess communication functions, -Fetching user and group info, Fetching network info, Time-related -functions +=item How do I count the number of lines in a file? -=item Alphabetical Listing of Perl Functions +=item How do I make a temporary file name? +=item How can I manipulate fixed-record-length files? --X FILEHANDLE, -X EXPR, -X, abs VALUE, accept NEWSOCKET,GENERICSOCKET, -alarm SECONDS, atan2 Y,X, bind SOCKET,NAME, binmode FILEHANDLE, bless -REF,CLASSNAME, bless REF, caller EXPR, caller, chdir EXPR, chmod LIST, -chomp VARIABLE, chomp LIST, chomp, chop VARIABLE, chop LIST, chop, -chown LIST, chr NUMBER, chroot FILENAME, close FILEHANDLE, closedir -DIRHANDLE, connect SOCKET,NAME, continue BLOCK, cos EXPR, crypt -PLAINTEXT,SALT, dbmclose ASSOC_ARRAY, dbmopen ASSOC,DBNAME,MODE, -defined EXPR, delete EXPR, die LIST, do BLOCK, do SUBROUTINE(LIST), do -EXPR, dump LABEL, each ASSOC_ARRAY, eof FILEHANDLE, eof (), eof, eval -EXPR, eval BLOCK, exec LIST, exists EXPR, exit EXPR, exp EXPR, fcntl -FILEHANDLE,FUNCTION,SCALAR, fileno FILEHANDLE, flock -FILEHANDLE,OPERATION, fork, format, formline PICTURE, LIST, getc -FILEHANDLE, getc, getlogin, getpeername SOCKET, getpgrp PID, getppid, -getpriority WHICH,WHO, getpwnam NAME, getgrnam NAME, gethostbyname -NAME, getnetbyname NAME, getprotobyname NAME, getpwuid UID, getgrgid -GID, getservb +=item How can I make a filehandle local to a subroutine? How do I pass +filehandles between subroutines? How do I make an array of filehandles? +=item How can I set up a footer format to be used with write()? +=item How can I write() into a string? -=back +=item How can I output my numbers with commas added? +=item How can I translate tildes (~) in a filename? +=item How come when I open the file read-write it wipes it out? +=item Why do I sometimes get an "Argument list too long" when I use <*>? -=head2 perlvar - Perl predefined variables +=item Is there a leak/bug in glob()? -=item DESCRIPTION +=item How can I open a file with a leading "E<gt>" or trailing blanks? +=item How can I reliably rename a file? -=over +=item How can I lock a file? -=item Predefined Names +=item What can't I just open(FH, ">file.lock")? +=item I still don't get locking. I just want to increment the number in +the file. How can I do this? -$ARG, $_, $<I<digit>>, $MATCH, $&, $PREMATCH, $`, $POSTMATCH, $', -$LAST_PAREN_MATCH, $+, $MULTILINE_MATCHING, $*, input_line_number -HANDLE EXPR, $INPUT_LINE_NUMBER, $NR, $, input_record_separator HANDLE -EXPR, $INPUT_RECORD_SEPARATOR, $RS, $/, autoflush HANDLE EXPR, -$OUTPUT_AUTOFLUSH, $|, output_field_separator HANDLE EXPR, -$OUTPUT_FIELD_SEPARATOR, $OFS, $,, output_record_separator HANDLE EXPR, -$OUTPUT_RECORD_SEPARATOR, $ORS, $\, $LIST_SEPARATOR, $", -$SUBSCRIPT_SEPARATOR, $SUBSEP, $;, $OFMT, $#, format_page_number HANDLE -EXPR, $FORMAT_PAGE_NUMBER, $%, format_lines_per_page HANDLE EXPR, -$FORMAT_LINES_PER_PAGE, $=, format_lines_left HANDLE EXPR, -$FORMAT_LINES_LEFT, $-, format_name HANDLE EXPR, $FORMAT_NAME, $~, -format_top_name HANDLE EXPR, $FORMAT_TOP_NAME, $^, -format_line_break_characters HANDLE EXPR, -$FORMAT_LINE_BREAK_CHARACTERS, $:, format_formfeed HANDLE EXPR, -$FORMAT_FORMFEED, $^L, $ACCUMULATOR, $^A, $CHILD_ERROR, $?, $OS_ERROR, -$ERRNO, $!, $EVAL_ERROR, $@, $PROCESS_ID, $PID, $$, $REAL_USER_ID, -$UID, $<, +=item How do I randomly update a binary file? +=item How do I get a file's timestamp in perl? +=item How do I set a file's timestamp in perl? -=back +=item How do I print to more than one file at once? +=item How can I read in a file by paragraphs? +=item How can I read a single character from a file? From the keyboard? +=item How can I tell if there's a character waiting on a filehandle? -=head2 perlsub - Perl subroutines +=item How do I open a file without blocking? -=item SYNOPSIS +=item How do I create a file only if it doesn't exist? +=item How do I do a C<tail -f> in perl? -=item DESCRIPTION +=item How do I dup() a filehandle in Perl? +=item How do I close a file descriptor by number? -=over +=item Why can't I use "C:\temp\foo" in DOS paths? What doesn't +`C:\temp\foo.exe` work? -=item Private Variables via my() +=item Why doesn't glob("*.*") get all the files? +=item Why does Perl let me delete read-only files? Why does C<-i> clobber +protected files? Isn't this a bug in Perl? -=item Temporary Values via local() +=item How do I select a random line from a file? +=back -=item Passing Symbol Table Entries (typeglobs) +=item AUTHOR AND COPYRIGHT +=head2 perlfaq6 - Regexps ($Revision: 1.17 $, $Date: 1997/04/24 22:44:10 $) -=item Pass by Reference +=item DESCRIPTION +=over -=item Prototypes +=item How can I hope to use regular expressions without creating illegible +and unmaintainable code? +Comments Outside the Regexp, Comments Inside the Regexp, Different +Delimiters -=item Overriding Builtin Functions +=item I'm having trouble matching over more than one line. What's wrong? +=item How can I pull out lines between two patterns that are themselves on +different lines? -=item Autoloading +=item I put a regular expression into $/ but it didn't work. What's wrong? +=item How do I substitute case insensitively on the LHS, but preserving +case on the RHS? +=item How can I make C<\w> match accented characters? +=item How can I match a locale-smart version of C</[a-zA-Z]/>? -=back +=item How can I quote a variable to use in a regexp? -=item SEE ALSO +=item What is C</o> really for? +=item How do I use a regular expression to strip C style comments from a +file? +=item Can I use Perl regular expressions to match balanced text? +=item What does it mean that regexps are greedy? How can I get around it? +=item How do I process each word on each line? -=head2 perlmod - Perl modules (packages) +=item How can I print out a word-frequency or line-frequency summary? -=item DESCRIPTION +=item How can I do approximate matching? +=item How do I efficiently match many regular expressions at once? -=over +=item Why don't word-boundary searches with C<\b> work for me? -=item Packages +=item Why does using $&, $`, or $' slow my program down? +=item What good is C<\G> in a regular expression? -=item Symbol Tables +=item Are Perl regexps DFAs or NFAs? Are they POSIX compliant? +=item What's wrong with using grep or map in a void context? -=item Package Constructors and Destructors +=item How can I match strings with multibyte characters? +=back -=item Perl Classes +=item AUTHOR AND COPYRIGHT +=head2 perlfaq7 - Perl Language Issues ($Revision: 1.18 $, $Date: +1997/04/24 22:44:14 $) -=item Perl Modules +=item DESCRIPTION +=over +=item Can I get a BNF/yacc/RE for the Perl language? +=item What are all these $@%* punctuation signs, and how do I know when to +use them? -=back +=item Do I always/never have to quote my strings or use semicolons and +commas? -=item NOTE +=item How do I skip some return values? +=item How do I temporarily block warnings? -=item THE PERL MODULE LIBRARY +=item What's an extension? +=item Why do Perl operators have different precedence than C operators? -=over +=item How do I declare/create a structure? -=item Pragmatic Modules +=item How do I create a module? +=item How do I create a class? -diagnostics, integer, less, overload, sigtrap, strict, subs +=item How can I tell if a variable is tainted? -=item Standard Modules +=item What's a closure? +=item What is variable suicide and how can I prevent it? -AnyDBM_File, AutoLoader, AutoSplit, Benchmark, Carp, Config, Cwd, -DB_File, Devel::SelfStubber, DynaLoader, English, Env, Exporter, -ExtUtils::Liblist, ExtUtils::MakeMaker, ExtUtils::Manifest, -ExtUtils::Mkbootstrap, ExtUtils::Miniperl, Fcntl, File::Basename, -File::CheckTree, File::Find, FileHandle, File::Path, Getopt::Long, -Getopt::Std, I18N::Collate, IPC::Open2, IPC::Open3, Net::Ping, POSIX, -SelfLoader, Safe, Socket, Test::Harness, Text::Abbrev +=item How can I pass/return a {Function, FileHandle, Array, Hash, Method, +Regexp}? -=item Extension Modules +Passing Variables and Functions, Passing Filehandles, Passing Regexps, +Passing Methods +=item How do I create a static variable? +=item What's the difference between dynamic and lexical (static) scoping? +Between local() and my()? +=item How can I access a dynamic variable while a similarly named lexical +is in scope? -=back +=item What's the difference between deep and shallow binding? -=item CPAN +=item Why doesn't "local($foo) = <FILE>;" work right? + +=item How do I redefine a builtin function, operator, or method? + +=item What's the difference between calling a function as &foo and foo()? + +=item How do I create a switch or case statement? + +=item How can I catch accesses to undefined variables/functions/methods? + +=item Why can't a method included in this same file be found? + +=item How can I find out my current package? +=item How can I comment out a large block of perl code? -Language Extensions and Documentation Tools, Development Support, -Operating System Interfaces, Networking, Device Control (modems) and -InterProcess Communication, Data Types and Data Type Utilities, -Database Interfaces, User Interfaces, Interfaces to / Emulations of -Other Programming Languages, File Names, File Systems and File Locking -(see also File Handles), String Processing, Language Text Processing, -Parsing and Searching, Option, Argument, Parameter and Configuration -File Processing, Internationalization and Locale, Authentication, -Security and Encryption, World Wide Web, HTML, HTTP, CGI, MIME, Server -and Daemon Utilities, Archiving and Compression, Images, Pixmap and -Bitmap Manipulation, Drawing and Graphing, Mail and Usenet News, -Control Flow Utilities (callbacks and exceptions etc), File Handle and -Input/Output Stream Utilities, Miscellaneous Modules +=back + +=item AUTHOR AND COPYRIGHT -=item Modules: Creation, Use and Abuse +=head2 perlfaq8 - System Interaction ($Revision: 1.21 $, $Date: 1997/04/24 +22:44:19 $) +=item DESCRIPTION =over -=item Guidelines for Module Creation +=item How do I find out which operating system I'm running under? +=item How come exec() doesn't return? -Do similar modules already exist in some form?, Try to design the new -module to be easy to extend and reuse, Some simple style guidelines, -Select what to export, Select a name for the module, Have you got it -right?, README and other Additional Files, A description of the -module/package/extension etc, A copyright notice - see below, -Prerequisites - what else you may need to have, How to build it - -possible changes to Makefile.PL etc, How to install it, Recent changes -in this release, especially incompatibilities, Changes / enhancements -you plan to make in the future, Adding a Copyright Notice, Give the -module a version/issue/release number, How to release and distribute a -module, Take care when changing a released module +=item How do I do fancy stuff with the keyboard/screen/mouse? -=item Guidelines for Converting Perl 4 Library Scripts into Modules +Keyboard, Screen, Mouse +=item How do I ask the user for a password? -There is no requirement to convert anything, Consider the implications, -Make the most of the opportunity, The pl2pm utility will get you -started, Adds the standard Module prologue lines, Converts package -specifiers from ' to ::, Converts die(...) to croak(...), Several other -minor changes +=item How do I read and write the serial port? -=item Guidelines for Reusing Application Code +lockfiles, open mode, end of line, flushing output, non-blocking input +=item How do I decode encrypted password files? -Complete applications rarely belong in the Perl Module Library, Many -applications contain some perl code which could be reused, Break-out -the reusable code into one or more separate module files, Take the -opportunity to reconsider and redesign the interfaces, In some cases -the 'application' can then be reduced to a small +=item How do I start a process in the background? +STDIN, STDOUT and STDERR are shared, Signals, Zombies +=item How do I trap control characters/signals? -=back +=item How do I modify the shadow password file on a Unix system? +=item How do I set the time and date? +=item How can I sleep() or alarm() for under a second? +=item How can I measure time under a second? -=head2 perlref - Perl references and nested data structures +=item How can I do an atexit() or setjmp()/longjmp()? (Exception handling) -=item DESCRIPTION +=item Why doesn't my sockets program work under System V (Solaris)? What +does the error message "Protocol not supported" mean? +=item How can I call my system's unique C functions from Perl? -=over +=item Where do I get the include files to do ioctl() or syscall()? -=item Symbolic references +=item Why do setuid perl scripts complain about kernel problems? +=item How can I open a pipe both to and from a command? -=item Not-so-symbolic references +=item Why can't I get the output of a command with system()? +=item How can I capture STDERR from an external command? +=item Why doesn't open() return an error when a pipe open fails? +=item What's wrong with using backticks in a void context? -=back +=item How can I call backticks without shell processing? -=item WARNING +=item Why can't my script read from STDIN after I gave it EOF (^D on Unix, +^Z on MS-DOS)? +=item How can I convert my shell script to perl? -=item SEE ALSO +=item Can I use perl to run a telnet or ftp session? +=item How can I write expect in Perl? +=item Is there a way to hide perl's command line from programs such as +"ps"? +=item I {changed directory, modified my environment} in a perl script. How +come the change disappeared when I exited the script? How do I get my +changes to be visible? +Unix, VMS -=head2 perldsc - Perl Data Structures Cookbook +=item How do I close a process's filehandle without waiting for it to +complete? -=item DESCRIPTION +=item How do I fork a daemon process? +=item How do I make my program run with sh and csh? -arrays of arrays, hashes of arrays, arrays of hashes, hashes of hashes, -more elaborate constructs, recursive and self-referential data -structures, objects +=item How do I find out if I'm running interactively or not? -=item REFERENCES +=item How do I timeout a slow event? +=item How do I set CPU limits? -=item COMMON MISTAKES +=item How do I avoid zombies on a Unix system? +=item How do I use an SQL database? -=item CAVEAT ON PRECEDENCE +=item How do I make a system() exit on control-C? +=item How do I open a file without blocking? -=item WHY YOU SHOULD ALWAYS C<use strict> +=item How do I install a CPAN module? +=item How do I keep my own module/library directory? -=item DEBUGGING +=item How do I add the directory my program lives in to the module/library +search path? +=item How do I add a directory to my include path at runtime? -=item CODE EXAMPLES +=back +=item How do I get one key from the terminal at a time, under POSIX? -=item LISTS OF LISTS +=item AUTHOR AND COPYRIGHT +=item DESCRIPTION =over -=item Declaration of a LIST OF LISTS +=item My CGI script runs from the command line but not the browser. Can +you help me fix it? +=item How do I remove HTML from a string? -=item Generation of a LIST OF LISTS +=item How do I extract URLs? +=item How do I download a file from the user's machine? How do I open a +file on another machine? -=item Access and Printing of a LIST OF LISTS +=item How do I make a pop-up menu in HTML? +=item How do I fetch an HTML file? +=item how do I decode or create those %-encodings on the web? +=item How do I redirect to another page? -=back +=item How do I put a password on my web pages? -=item HASHES OF LISTS +=item How do I edit my .htpasswd and .htgroup files with Perl? +=item How do I make sure users can't enter values into a form that cause my +CGI script to do bad things? -=over +=item How do I parse an email header? -=item Declaration of a HASH OF LISTS +=item How do I decode a CGI form? +=item How do I check a valid email address? -=item Generation of a HASH OF LISTS +=item How do I decode a MIME/BASE64 string? +=item How do I return the user's email address? -=item Access and Printing of a HASH OF LISTS +=item How do I send/read mail? + +=item How do I find out my hostname/domainname/IP address? +=item How do I fetch a news article or the active newsgroups? +=item How do I fetch/put an FTP file? +=item How can I do RPC in Perl? =back -=item LISTS OF HASHES +=item AUTHOR AND COPYRIGHT + +=head2 perlfaq9 - Networking ($Revision: 1.16 $, $Date: 1997/04/23 18:12:06 +$) +=item DESCRIPTION =over -=item Declaration of a LIST OF HASHES +=item My CGI script runs from the command line but not the browser. Can +you help me fix it? +=item How do I remove HTML from a string? -=item Generation of a LIST OF HASHES +=item How do I extract URLs? +=item How do I download a file from the user's machine? How do I open a +file on another machine? -=item Access and Printing of a LIST OF HASHES +=item How do I make a pop-up menu in HTML? +=item How do I fetch an HTML file? +=item how do I decode or create those %-encodings on the web? +=item How do I redirect to another page? -=back +=item How do I put a password on my web pages? -=item HASHES OF HASHES +=item How do I edit my .htpasswd and .htgroup files with Perl? +=item How do I make sure users can't enter values into a form that cause my +CGI script to do bad things? -=over +=item How do I parse an email header? -=item Declaration of a HASH OF HASHES +=item How do I decode a CGI form? +=item How do I check a valid email address? -=item Generation of a HASH OF HASHES +=item How do I decode a MIME/BASE64 string? +=item How do I return the user's email address? -=item Access and Printing of a HASH OF HASHES +=item How do I send/read mail? +=item How do I find out my hostname/domainname/IP address? +=item How do I fetch a news article or the active newsgroups? +=item How do I fetch/put an FTP file? + +=item How can I do RPC in Perl? =back -=item MORE ELABORATE RECORDS +=item AUTHOR AND COPYRIGHT + +=head2 perldelta - what's new for perl5.004 + +=item DESCRIPTION + +=item Supported Environments +=item Core Changes =over -=item Declaration of MORE ELABORATE RECORDS +=item Compilation option: Binary compatibility with 5.003 +=item $PERL5OPT environment variable -=item Declaration of a HASH OF COMPLEX RECORDS +=item Limitations on B<-M>, B<-m>, and B<-T> options +=item More precise warnings -=item Generation of a HASH OF COMPLEX RECORDS +=item Deprecated: Inherited C<AUTOLOAD> for non-methods +=item Subroutine arguments created only when they're modified +=item Group vector changeable with C<$)> +=item Fixed parsing of $$<digit>, &$<digit>, etc. -=back +=item No resetting of $. on implicit close -=item Database Ties +=item C<wantarray> may return undef +=item Changes to tainting checks -=item SEE ALSO +No glob() or <*>, No spawning if tainted $CDPATH, $ENV, $BASH_ENV, No +spawning if tainted $TERM doesn't look like a terminal name +=item New Opcode module and revised Safe module -=item AUTHOR +=item Embedding improvements +=item Internal change: FileHandle class based on IO::* classes +=item Internal change: PerlIO abstraction interface +=item New and changed syntax +$coderef->(PARAMS) -=head2 perllol, perlLoL - Manipulating Lists of Lists in Perl +=item New and changed builtin constants -=item DESCRIPTION +__PACKAGE__ +=item New and changed builtin variables -=item Declaration and Access of Lists of Lists +$^E, $^H, $^M +=item New and changed builtin functions -=item Growing Your Own +delete on slices, flock, printf and sprintf, keys as an lvalue, my() in +Control Structures, pack() and unpack(), sysseek(), use VERSION, use Module +VERSION LIST, prototype(FUNCTION), srand, $_ as Default, C<m//gc> does not +reset search position on failure, C<m//x> ignores whitespace before ?*+{}, +nested C<sub{}> closures work now, formats work right on changing lexicals +=item New builtin methods -=item Access and Printing +isa(CLASS), can(METHOD), VERSION( [NEED] ) +=item TIEHANDLE now supported -=item Slices +TIEHANDLE classname, LIST, PRINT this, LIST, PRINTF this, LIST, READ this +LIST, READLINE this, GETC this, DESTROY this +=item Malloc enhancements -=item SEE ALSO +-DDEBUGGING_MSTATS, -DEMERGENCY_SBRK, -DPACK_MALLOC, -DTWO_POT_OPTIMIZE +=item Miscellaneous efficiency enhancements -=item AUTHOR +=back +=item Support for More Operating Systems +=over +=item Win32 +=item Plan 9 -=head2 perlobj - Perl objects +=item QNX -=item DESCRIPTION +=item AmigaOS +=back -=over +=item Pragmata -=item An Object is Simply a Reference +use autouse MODULE => qw(sub1 sub2 sub3), use blib, use blib 'dir', use +constant NAME => VALUE, use locale, use ops, use vmsish +=item Modules -=item A Class is Simply a Package +=over +=item Required Updates -=item A Method is Simply a Subroutine +=item Installation directories +=item Module information summary -=item Method Invocation +=item Fcntl +=item IO -=item Destructors +=item Math::Complex +=item Math::Trig -=item WARNING +=item DB_File +=item Net::Ping -=item Summary +=item Object-oriented overrides for builtin operators + +=back +=item Utility Changes -=item Two-Phased Garbage Collection +=over + +=item pod2html +Sends converted HTML to standard output +=item xsubpp +C<void> XSUBs now default to returning nothing =back +=item C Language API Changes + +C<gv_fetchmethod> and C<perl_call_sv>, C<perl_eval_pv>, Extended API for +manipulating hashes + +=item Documentation Changes + +L<perldelta>, L<perlfaq>, L<perllocale>, L<perltoot>, L<perlapio>, +L<perlmodlib>, L<perldebug>, L<perlsec> + +=item New Diagnostics + +"my" variable %s masks earlier declaration in same scope, %s argument is +not a HASH element or slice, Allocation too large: %lx, Allocation too +large, Applying %s to %s will act on scalar(%s), Attempt to free +nonexistent shared string, Attempt to use reference as lvalue in substr, +Can't redefine active sort subroutine %s, Can't use bareword ("%s") as %s +ref while "strict refs" in use, Cannot resolve method `%s' overloading `%s' +in package `%s', Constant subroutine %s redefined, Constant subroutine %s +undefined, Copy method did not return a reference, Died, Exiting +pseudo-block via %s, Identifier too long, Illegal character %s (carriage +return), Illegal switch in PERL5OPT: %s, Integer overflow in hex number, +Integer overflow in octal number, internal error: glob failed, Invalid +conversion in %s: "%s", Invalid type in pack: '%s', Invalid type in unpack: +'%s', Name "%s::%s" used only once: possible typo, Null picture in +formline, Offset outside string, Out of memory!, Out of memory during +request for %s, panic: frexp, Possible attempt to put comments in qw() +list, Possible attempt to separate words with commas, Scalar value @%s{%s} +better written as $%s{%s}, Stub found while resolving method `%s' +overloading `%s' in package `%s', Too late for "B<-T>" option, untie +attempted while %d inner references still exist, Unrecognized character %s, +Unsupported function fork, Use of "$$<digit>" to mean "${$}<digit>" is +deprecated, Value of %s can be "0"; test with defined(), Variable "%s" may +be unavailable, Variable "%s" will not stay shared, Warning: something's +wrong, Ill-formed logical name |%s| in prime_env_iter, Got an error from +DosAllocMem, Malformed PERLLIB_PREFIX, PERL_SH_DIR too long, Process +terminated by SIG%s + +=item BUGS + =item SEE ALSO +=item HISTORY +=head2 perldata - Perl data types +=item DESCRIPTION +=over -=head2 perltie - how to hide an object class in a simple variable +=item Variable names -=item SYNOPSIS +=item Context + +=item Scalar values + +=item Scalar value constructors + +=item List value constructors + +=item Typeglobs and Filehandles + +=back +=head2 perlsyn - Perl syntax =item DESCRIPTION +=over + +=item Declarations + +=item Simple statements + +=item Compound statements + +=item Loop Control + +=item For Loops + +=item Foreach Loops + +=item Basic BLOCKs and Switch Statements + +=item Goto + +=item PODs: Embedded Documentation + +=item Plain Old Comments (Not!) + +=back + +=head2 perlop - Perl operators and precedence + +=item SYNOPSIS + +=item DESCRIPTION =over -=item Tying Scalars +=item Terms and List Operators (Leftward) +=item The Arrow Operator -TIESCALAR classname, LIST, FETCH this, STORE this, value, DESTROY this +=item Auto-increment and Auto-decrement -=item Tying Arrays +=item Exponentiation +=item Symbolic Unary Operators -TIEARRAY classname, LIST, FETCH this, index, STORE this, index, value, -DESTROY this +=item Binding Operators -=item Tying Hashes +=item Multiplicative Operators +=item Additive Operators -USER, HOME, CLOBBER, LIST, TIEHASH classname, LIST, FETCH this, key, -STORE this, key, value, DELETE this, key, CLEAR this, EXISTS this, key, -FIRSTKEY this, NEXTKEY this, lastkey, DESTROY this +=item Shift Operators -=item Tying FileHandles +=item Named Unary Operators +=item Relational Operators +=item Equality Operators +=item Bitwise And -=back +=item Bitwise Or and Exclusive Or -=item SEE ALSO +=item C-style Logical And +=item C-style Logical Or -=item BUGS +=item Range Operator +=item Conditional Operator -=item AUTHOR +=item Assignment Operators +=item Comma Operator +=item List Operators (Rightward) +=item Logical Not +=item Logical And -=head2 perlbot - Bag'o Object Tricks (the BOT) +=item Logical or and Exclusive Or -=item DESCRIPTION +=item C Operators Missing From Perl +unary &, unary *, (TYPE) -=item OO SCALING TIPS +=item Quote and Quote-like Operators +=item Regexp Quote-Like Operators -=item INSTANCE VARIABLES +?PATTERN?, m/PATTERN/gimosx, /PATTERN/gimosx, q/STRING/, C<'STRING'>, +qq/STRING/, "STRING", qx/STRING/, `STRING`, qw/STRING/, +s/PATTERN/REPLACEMENT/egimosx, tr/SEARCHLIST/REPLACEMENTLIST/cds, +y/SEARCHLIST/REPLACEMENTLIST/cds +=item I/O Operators -=item SCALAR INSTANCE VARIABLES +=item Constant Folding +=item Integer Arithmetic -=item INSTANCE VARIABLE INHERITANCE +=item Floating-point Arithmetic +=back -=item OBJECT RELATIONSHIPS +=head2 perlre - Perl regular expressions +=item DESCRIPTION -=item OVERRIDING SUPERCLASS METHODS +i, m, s, x +=over -=item USING RELATIONSHIP WITH SDBM +=item Regular Expressions +(?#text), (?:regexp), (?=regexp), (?!regexp), (?imsx) -=item THINKING OF CODE REUSE +=item Backtracking +=item Version 8 Regular Expressions -=item CLASS CONTEXT AND THE OBJECT +=item WARNING on \1 vs $1 +=item SEE ALSO -=item INHERITING A CONSTRUCTOR +=back + +=head2 perlrun - how to execute the Perl interpreter +=item SYNOPSIS -=item DELEGATION +=item DESCRIPTION +=over + +=item #! and quoting on non-Unix systems +OS/2, MS-DOS, Win95/NT, Macintosh +=item Switches +B<-0>[I<digits>], B<-a>, B<-c>, B<-d>, B<-d:>I<foo>, B<-D>I<number>, +B<-D>I<list>, B<-e> I<commandline>, B<-F>I<pattern>, B<-h>, +B<-i>[I<extension>], B<-I>I<directory>, B<-l>[I<octnum>], +B<-m>[B<->]I<module>, B<-M>[B<->]I<module>, B<-M>[B<->]I<'module ...'>, +B<-[mM]>[B<->]I<module=arg[,arg]...>, B<-n>, B<-p>, B<-P>, B<-s>, B<-S>, +B<-T>, B<-u>, B<-U>, B<-v>, B<-V>, B<-V:>I<name>, B<-w>, B<-x> I<directory> -=head2 perldebug - Perl debugging +=back + +=item ENVIRONMENT + +HOME, LOGDIR, PATH, PERL5LIB, PERL5OPT, PERLLIB, PERL5DB, +PERL_DEBUG_MSTATS, PERL_DESTRUCT_LEVEL + +=head2 perlfunc - Perl builtin functions =item DESCRIPTION + I<THERE IS NO GENERAL RULE FOR CONVERTING A LIST INTO A SCALAR!> =over -=item Debugging +=item Perl Functions by Category +Functions for SCALARs or strings, Regular expressions and pattern matching, +Numeric functions, Functions for real @ARRAYs, Functions for list data, +Functions for real %HASHes, Input and output functions, Functions for fixed +length data or records, Functions for filehandles, files, or directories, +Keywords related to the control flow of your perl program, Keywords related +to scoping, Miscellaneous functions, Functions for processes and process +groups, Keywords related to perl modules, Keywords related to classes and +object-orientedness, Low-level socket functions, System V interprocess +communication functions, Fetching user and group info, Fetching network +info, Time-related functions, Functions new in perl5, Functions obsoleted +in perl5 -h, T, s, n, f, c, c line, <CR>, l min+incr, l min-max, l line, l, -, w -line, l subname, /pattern/, ?pattern?, L, S, t, b line [ condition ], b -subname [ condition ], d line, D, a line command, A, < command, > -command, V package [symbols], X [symbols], ! number, ! -number, H --number, q or ^D, command, p expr +=item Alphabetical Listing of Perl Functions -=item Customization +-I<X> FILEHANDLE, -I<X> EXPR, -I<X>, abs VALUE, abs, accept +NEWSOCKET,GENERICSOCKET, alarm SECONDS, alarm, atan2 Y,X, bind SOCKET,NAME, +binmode FILEHANDLE, bless REF,CLASSNAME, bless REF, caller EXPR, caller, +chdir EXPR, chmod LIST, chomp VARIABLE, chomp LIST, chomp, chop VARIABLE, +chop LIST, chop, chown LIST, chr NUMBER, chr, chroot FILENAME, chroot, +close FILEHANDLE, closedir DIRHANDLE, connect SOCKET,NAME, continue BLOCK, +cos EXPR, crypt PLAINTEXT,SALT, dbmclose HASH, dbmopen HASH,DBNAME,MODE, +defined EXPR, defined, delete EXPR, die LIST, do BLOCK, do +SUBROUTINE(LIST), do EXPR, dump LABEL, each HASH, eof FILEHANDLE, eof (), +eof, eval EXPR, eval BLOCK, exec LIST, exists EXPR, exit EXPR, exp EXPR, +exp, fcntl FILEHANDLE,FUNCTION,SCALAR, fileno FILEHANDLE, flock +FILEHANDLE,OPERATION, fork, format, formline PICTURE,LIST, getc FILEHANDLE, +getc, getlogin, getpeername SOCKET, getpgrp PID, getppid, getpriority +WHICH,WHO, getpwnam NAME, getgrnam NAME, gethostbyname NAME, getnetbyname +NAME, getprotobyname NAME, getpwuid UID, getgrgid GID, getservbyname +NAME,PROTO, gethostbyaddr ADDR,ADDRTYPE, getnetbyaddr ADDR,ADDRTYPE, +getprotobynumber NUMBER, getservbyport PORT,PROTO, getpwent, getgrent, +gethostent, getnetent, getprotoent, getservent, setpwent, setgrent, +sethostent STAYOPEN, setnetent STAYOPEN, setprotoent STAYOPEN, setservent +STAYOPEN, endpwent, endgrent, endhostent, endnetent, endprotoent, +endservent, getsockname SOCKET, getsockopt SOCKET,LEVEL,OPTNAME, glob EXPR, +glob, gmtime EXPR, goto LABEL, goto EXPR, goto &NAME, grep BLOCK LIST, grep +EXPR,LIST, hex EXPR, hex, import, index STR,SUBSTR,POSITION, index +STR,SUBSTR, int EXPR, int, ioctl FILEHANDLE,FUNCTION,SCALAR, join +EXPR,LIST, keys HASH, kill LIST, last LABEL, last, lc EXPR, lc, lcfirst +EXPR, lcfirst, length EXPR, length, link OLDFILE,NEWFILE, listen +SOCKET,QUEUESIZE, local EXPR, localtime EXPR, log EXPR, log, lstat +FILEHANDLE, lstat EXPR, lstat, m//, map BLOCK LIST, map EXPR,LIST, mkdir +FILENAME,MODE, msgctl ID,CMD,ARG, msgget KEY,FLAGS, msgsnd ID,MSG,FLAGS, +msgrcv ID,VAR,SIZE,TYPE,FLAGS, my EXPR, next LABEL, next, no Module LIST, +oct EXPR, oct, open FILEHANDLE,EXPR, open FILEHANDLE, opendir +DIRHANDLE,EXPR, ord EXPR, ord, pack TEMPLATE,LIST, package NAMESPACE, pipe +READHANDLE,WRITEHANDLE, pop ARRAY, pop, pos SCALAR, pos, print FILEHANDLE +LIST, print LIST, print, printf FILEHANDLE FORMAT, LIST, printf FORMAT, +LIST, prototype FUNCTION, push ARRAY,LIST, q/STRING/, qq/STRING/, +qx/STRING/, qw/STRING/, quotemeta EXPR, quotemeta, rand EXPR, rand, read +FILEHANDLE,SCALAR,LENGTH,OFFSET, read FILEHANDLE,SCALAR,LENGTH, readdir +DIRHANDLE, readlink EXPR, readlink, recv SOCKET,SCALAR,LEN,FLAGS, redo +LABEL, redo, ref EXPR, ref, rename OLDNAME,NEWNAME, require EXPR, require, +reset EXPR, reset, return EXPR, return, reverse LIST, rewinddir DIRHANDLE, +rindex STR,SUBSTR,POSITION, rindex STR,SUBSTR, rmdir FILENAME, rmdir, s///, +scalar EXPR, seek FILEHANDLE,POSITION,WHENCE, seekdir DIRHANDLE,POS, select +FILEHANDLE, select, select RBITS,WBITS,EBITS,TIMEOUT, semctl +ID,SEMNUM,CMD,ARG, semget KEY,NSEMS,FLAGS, semop KEY,OPSTRING, send +SOCKET,MSG,FLAGS,TO, send SOCKET,MSG,FLAGS, setpgrp PID,PGRP, setpriority +WHICH,WHO,PRIORITY, setsockopt SOCKET,LEVEL,OPTNAME,OPTVAL, shift ARRAY, +shift, shmctl ID,CMD,ARG, shmget KEY,SIZE,FLAGS, shmread ID,VAR,POS,SIZE, +shmwrite ID,STRING,POS,SIZE, shutdown SOCKET,HOW, sin EXPR, sin, sleep +EXPR, sleep, socket SOCKET,DOMAIN,TYPE,PROTOCOL, socketpair +SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL, sort SUBNAME LIST, sort BLOCK LIST, +sort LIST, splice ARRAY,OFFSET,LENGTH,LIST, splice ARRAY,OFFSET,LENGTH, +splice ARRAY,OFFSET, split /PATTERN/,EXPR,LIMIT, split /PATTERN/,EXPR, +split /PATTERN/, split, sprintf FORMAT, LIST, sqrt EXPR, sqrt, srand EXPR, +srand, stat FILEHANDLE, stat EXPR, stat, study SCALAR, study, sub BLOCK, +sub NAME, sub NAME BLOCK, substr EXPR,OFFSET,LEN, substr EXPR,OFFSET, +symlink OLDFILE,NEWFILE, syscall LIST, sysopen FILEHANDLE,FILENAME,MODE, +sysopen FILEHANDLE,FILENAME,MODE,PERMS, sysread +FILEHANDLE,SCALAR,LENGTH,OFFSET, sysread FILEHANDLE,SCALAR,LENGTH, sysseek +FILEHANDLE,POSITION,WHENCE, system LIST, syswrite +FILEHANDLE,SCALAR,LENGTH,OFFSET, syswrite FILEHANDLE,SCALAR,LENGTH, tell +FILEHANDLE, tell, telldir DIRHANDLE, tie VARIABLE,CLASSNAME,LIST, tied +VARIABLE, time, times, tr///, truncate FILEHANDLE,LENGTH, truncate +EXPR,LENGTH, uc EXPR, uc, ucfirst EXPR, ucfirst, umask EXPR, umask, undef +EXPR, undef, unlink LIST, unlink, unpack TEMPLATE,EXPR, untie VARIABLE, +unshift ARRAY,LIST, use Module LIST, use Module, use Module VERSION LIST, +use VERSION, utime LIST, values HASH, vec EXPR,OFFSET,BITS, wait, waitpid +PID,FLAGS, wantarray, warn LIST, write FILEHANDLE, write EXPR, write, y/// +=back -=item Other resources +=head2 perlvar - Perl predefined variables +=item DESCRIPTION + +=over +=item Predefined Names +$ARG, $_, $E<lt>I<digit>E<gt>, $MATCH, $&, $PREMATCH, $`, $POSTMATCH, $', +$LAST_PAREN_MATCH, $+, $MULTILINE_MATCHING, $*, input_line_number HANDLE +EXPR, $INPUT_LINE_NUMBER, $NR, $, input_record_separator HANDLE EXPR, +$INPUT_RECORD_SEPARATOR, $RS, $/, autoflush HANDLE EXPR, $OUTPUT_AUTOFLUSH, +$|, output_field_separator HANDLE EXPR, $OUTPUT_FIELD_SEPARATOR, $OFS, $,, +output_record_separator HANDLE EXPR, $OUTPUT_RECORD_SEPARATOR, $ORS, $\, +$LIST_SEPARATOR, $", $SUBSCRIPT_SEPARATOR, $SUBSEP, $;, $OFMT, $#, +format_page_number HANDLE EXPR, $FORMAT_PAGE_NUMBER, $%, +format_lines_per_page HANDLE EXPR, $FORMAT_LINES_PER_PAGE, $=, +format_lines_left HANDLE EXPR, $FORMAT_LINES_LEFT, $-, format_name HANDLE +EXPR, $FORMAT_NAME, $~, format_top_name HANDLE EXPR, $FORMAT_TOP_NAME, $^, +format_line_break_characters HANDLE EXPR, $FORMAT_LINE_BREAK_CHARACTERS, +$:, format_formfeed HANDLE EXPR, $FORMAT_FORMFEED, $^L, $ACCUMULATOR, $^A, +$CHILD_ERROR, $?, $OS_ERROR, $ERRNO, $!, $EXTENDED_OS_ERROR, $^E, +$EVAL_ERROR, $@, $PROCESS_ID, $PID, $$, $REAL_USER_ID, $UID, $<, +$EFFECTIVE_USER_ID, $EUID, $>, $REAL_GROUP_ID, $GID, $(, +$EFFECTIVE_GROUP_ID, $EGID, $), $PROGRAM_NAME, $0, $[, $PERL_VERSION, $], +$DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, $INPLACE_EDIT, $^I, $OSNAME, +$^O, $PERLDB, $^P, $BASETIME, $^T, $WARNING, $^W, $EXECUTABLE_NAME, $^X, +$ARGV, @ARGV, @INC, %INC, $ENV{expr}, $SIG{expr}, $^M =back -=item BUGS +=head2 perlsub - Perl subroutines +=item SYNOPSIS +=item DESCRIPTION +=over +=item Private Variables via my() -=head2 perldiag - various Perl diagnostics +=item Temporary Values via local() + +=item Passing Symbol Table Entries (typeglobs) + +=item Pass by Reference + +=item Prototypes + +=item Constant Functions + +=item Overriding Builtin Functions + +=item Autoloading + +=back + +=item SEE ALSO + +=head2 perlmod - Perl modules (packages and symbol tables) =item DESCRIPTION +=over + +=item Packages +=item Symbol Tables +=item Package Constructors and Destructors +=item Perl Classes -=head2 perlform - Perl formats +=item Perl Modules + +=back + +=item SEE ALSO + +=head2 perlmodlib - constructing new Perl modules and finding existing ones =item DESCRIPTION +=item THE PERL MODULE LIBRARY =over -=item Format Variables +=item Pragmatic Modules +use autouse MODULE => qw(sub1 sub2 sub3), blib, diagnostics, integer, less, +lib, locale, ops, overload, sigtrap, strict, subs, vmsish, vars +=item Standard Modules + +AnyDBM_File, AutoLoader, AutoSplit, Benchmark, CPAN, CPAN::FirstTime, +CPAN::Nox, Carp, Class::Struct, Config, Cwd, DB_File, Devel::SelfStubber, +DirHandle, DynaLoader, English, Env, Exporter, ExtUtils::Embed, +ExtUtils::Install, ExtUtils::Liblist, ExtUtils::MM_OS2, ExtUtils::MM_Unix, +ExtUtils::MM_VMS, ExtUtils::MakeMaker, ExtUtils::Manifest, +ExtUtils::Mkbootstrap, ExtUtils::Mksymlists, ExtUtils::testlib, Fcntl, +File::Basename, File::CheckTree, File::Compare, File::Copy, File::Find, +File::Path, File::stat, FileCache, FileHandle, FindBin, GDBM_File, +Getopt::Long, Getopt::Std, I18N::Collate, IO, IO::File, IO::Handle, +IO::Pipe, IO::Seekable, IO::Select, IO::Socket, IPC::Open2, IPC::Open3, +Math::BigFloat, Math::BigInt, Math::Complex, Math::Trig, NDBM_File, +Net::Ping, Net::hostent, Net::netent, Net::protoent, Net::servent, Opcode, +Pod::Text, POSIX, SDBM_File, Safe, Search::Dict, SelectSaver, SelfLoader, +Shell, Socket, Symbol, Sys::Hostname, Sys::Syslog, Term::Cap, +Term::Complete, Term::ReadLine, Test::Harness, Text::Abbrev, +Text::ParseWords, Text::Soundex, Text::Tabs, Text::Wrap, Tie::Hash, +Tie::RefHash, Tie::Scalar, Tie::SubstrHash, Time::Local, Time::gmtime, +Time::localtime, Time::tm, UNIVERSAL, User::grent, User::pwent +=item Extension Modules =back -=item NOTES +=item CPAN +Language Extensions and Documentation Tools, Development Support, Operating +System Interfaces, Networking, Device Control (modems) and InterProcess +Communication, Data Types and Data Type Utilities, Database Interfaces, +User Interfaces, Interfaces to / Emulations of Other Programming Languages, +File Names, File Systems and File Locking (see also File Handles), String +Processing, Language Text Processing, Parsing, and Searching, Option, +Argument, Parameter, and Configuration File Processing, +Internationalization and Locale, Authentication, Security, and Encryption, +World Wide Web, HTML, HTTP, CGI, MIME, Server and Daemon Utilities, +Archiving and Compression, Images, Pixmap and Bitmap Manipulation, Drawing, +and Graphing, Mail and Usenet News, Control Flow Utilities (callbacks and +exceptions etc), File Handle and Input/Output Stream Utilities, +Miscellaneous Modules, Africa, Asia, Australasia, Europe, North America, +South America + +=item Modules: Creation, Use, and Abuse =over -=item Footers +=item Guidelines for Module Creation +Do similar modules already exist in some form?, Try to design the new +module to be easy to extend and reuse, Some simple style guidelines, Select +what to export, Select a name for the module, Have you got it right?, +README and other Additional Files, A description of the +module/package/extension etc, A copyright notice - see below, Prerequisites +- what else you may need to have, How to build it - possible changes to +Makefile.PL etc, How to install it, Recent changes in this release, +especially incompatibilities, Changes / enhancements you plan to make in +the future, Adding a Copyright Notice, Give the module a +version/issue/release number, How to release and distribute a module, Take +care when changing a released module -=item Accessing Formatting Internals +=item Guidelines for Converting Perl 4 Library Scripts into Modules +There is no requirement to convert anything, Consider the implications, +Make the most of the opportunity, The pl2pm utility will get you started, +Adds the standard Module prologue lines, Converts package specifiers from ' +to ::, Converts die(...) to croak(...), Several other minor changes +=item Guidelines for Reusing Application Code +Complete applications rarely belong in the Perl Module Library, Many +applications contain some perl code which could be reused, Break-out the +reusable code into one or more separate module files, Take the opportunity +to reconsider and redesign the interfaces, In some cases the 'application' +can then be reduced to a small =back -=item WARNING +=item NOTE + +=head2 perlform - Perl formats + +=item DESCRIPTION + +=over + +=item Format Variables + +=back + +=item NOTES + +=over +=item Footers +=item Accessing Formatting Internals +=back +=item WARNINGS -=head2 perlipc - Perl interprocess communication (signals, fifos, -pipes, safe subprocceses, sockets, and semaphores) +=head2 perllocale - Perl locale handling (internationalization and +localization) =item DESCRIPTION +=item PREPARING TO USE LOCALES -=item Signals +=item USING LOCALES +=over -=item Named Pipes +=item The use locale pragma +=item The setlocale function -=item Using open() for IPC +=item The localeconv function + +=back +=item LOCALE CATEGORIES =over -=item Safe Pipe Opens +=item Category LC_COLLATE: Collation +=item Category LC_CTYPE: Character Types -=item Bidirectional Communication +=item Category LC_NUMERIC: Numeric Formatting +=item Category LC_MONETARY: Formatting of monetary amounts +=item LC_TIME +=item Other categories =back -=item Sockets: Client/Server Communication +=item SECURITY + +B<Comparison operators> (C<lt>, C<le>, C<ge>, C<gt> and C<cmp>):, +B<Case-mapping interpolation> (with C<\l>, C<\L>, C<\u> or <\U>), +B<Matching operator> (C<m//>):, B<Substitution operator> (C<s///>):, +B<In-memory formatting function> (sprintf()):, B<Output formatting +functions> (printf() and write()):, B<Case-mapping functions> (lc(), +lcfirst(), uc(), ucfirst()):, B<POSIX locale-dependent functions> +(localeconv(), strcoll(),strftime(), strxfrm()):, B<POSIX character class +tests> (isalnum(), isalpha(), isdigit(),isgraph(), islower(), isprint(), +ispunct(), isspace(), isupper(), +isxdigit()): + +=item ENVIRONMENT +PERL_BADLANG, LC_ALL, LC_CTYPE, LC_COLLATE, LC_MONETARY, LC_NUMERIC, +LC_TIME, LANG + +=item NOTES =over -=item Internet TCP Clients and Servers +=item Backward compatibility +=item I18N:Collate obsolete -=item Unix-Domain TCP Clients and Servers +=item Sort speed and memory use impacts +=item write() and LC_NUMERIC -=item UDP: Message Passing +=item Freely available locale definitions + +=item I18n and l10n + +=item An imperfect standard + +=back +=item BUGS +=over +=item Broken systems =back -=item SysV IPC +=item SEE ALSO + +=item HISTORY + +=head2 perlref - Perl references and nested data structures + +=item DESCRIPTION + +=over + +=item Symbolic references + +=item Not-so-symbolic references +=back =item WARNING +=item SEE ALSO -=item NOTES +=head2 perldsc - Perl Data Structures Cookbook +=item DESCRIPTION -=item BUGS +arrays of arrays, hashes of arrays, arrays of hashes, hashes of hashes, +more elaborate constructs +=item REFERENCES -=item AUTHOR +=item COMMON MISTAKES +=item CAVEAT ON PRECEDENCE -=item SEE ALSO +=item WHY YOU SHOULD ALWAYS C<use strict> +=item DEBUGGING +=item CODE EXAMPLES +=item LISTS OF LISTS +=over -=head2 perlsec - Perl security +=item Declaration of a LIST OF LISTS -=item DESCRIPTION +=item Generation of a LIST OF LISTS +=item Access and Printing of a LIST OF LISTS +=back +=item HASHES OF LISTS +=over -=head2 perltrap - Perl traps for the unwary +=item Declaration of a HASH OF LISTS -=item DESCRIPTION +=item Generation of a HASH OF LISTS + +=item Access and Printing of a HASH OF LISTS +=back + +=item LISTS OF HASHES =over -=item Awk Traps +=item Declaration of a LIST OF HASHES +=item Generation of a LIST OF HASHES -=item C Traps +=item Access and Printing of a LIST OF HASHES +=back -=item Sed Traps +=item HASHES OF HASHES + +=over +=item Declaration of a HASH OF HASHES -=item Shell Traps +=item Generation of a HASH OF HASHES +=item Access and Printing of a HASH OF HASHES -=item Perl Traps +=back +=item MORE ELABORATE RECORDS -=item Perl4 Traps +=over +=item Declaration of MORE ELABORATE RECORDS +=item Declaration of a HASH OF COMPLEX RECORDS +=item Generation of a HASH OF COMPLEX RECORDS =back +=item Database Ties +=item SEE ALSO +=item AUTHOR -=head2 perlstyle - Perl style guide +=head2 perllol, perlLoL - Manipulating Lists of Lists in Perl =item DESCRIPTION +=item Declaration and Access of Lists of Lists + +=item Growing Your Own +=item Access and Printing +=item Slices +=item SEE ALSO -=head2 perlxs - XS language reference manual +=item AUTHOR + +=head2 perltoot - Tom's object-oriented tutorial for perl =item DESCRIPTION +=item Creating a Class =over -=item Introduction +=item Object Representation +=item Class Interface -=item On The Road +=item Constructors and Instance Methods +=item Planning for the Future: Better Constructors -=item The Anatomy of an XSUB +=item Destructors +=item Other Object Methods -=item The Argument Stack +=back +=item Class Data -=item The RETVAL Variable +=over +=item Accessing Class Data -=item The MODULE Keyword +=item Debugging Methods +=item Class Destructors -=item The PACKAGE Keyword +=item Documenting the Interface +=back -=item The PREFIX Keyword +=item Aggregation +=item Inheritance -=item The OUTPUT: Keyword +=over +=item Overridden Methods -=item The CODE: Keyword +=item Multiple Inheritance +=item UNIVERSAL: The Root of All Objects -=item The INIT: Keyword +=back +=item Alternate Object Representations -=item The NO_INIT Keyword +=over +=item Arrays as Objects -=item Initializing Function Parameters +=item Closures as Objects +=back -=item Default Parameter Values +=item AUTOLOAD: Proxy Methods +=over -=item The PREINIT: Keyword +=item Autoloaded Data Methods +=item Inherited Autoloaded Data Methods -=item The INPUT: Keyword +=back +=item Metaclassical Tools -=item Variable-length Parameter Lists +=over +=item Class::Struct -=item The PPCODE: Keyword +=item Data Members as Variables +=item NOTES -=item Returning Undef And Empty Lists +=item Object Terminology +=back -=item The REQUIRE: Keyword +=item SEE ALSO +=item COPYRIGHT -=item The CLEANUP: Keyword +=over +=item Acknowledgments -=item The BOOT: Keyword +=back +=head2 perlobj - Perl objects -=item The VERSIONCHECK: Keyword +=item DESCRIPTION +=over -=item The PROTOTYPES: Keyword +=item An Object is Simply a Reference +=item A Class is Simply a Package -=item The PROTOTYPE: Keyword +=item A Method is Simply a Subroutine +=item Method Invocation -=item The ALIAS: Keyword +=item Default UNIVERSAL methods +isa(CLASS), can(METHOD), VERSION( [NEED] ) -=item The INCLUDE: Keyword +=item Destructors +=item WARNING -=item The CASE: Keyword +=item Summary +=item Two-Phased Garbage Collection -=item The & Unary Operator +=back +=item SEE ALSO -=item Inserting Comments and C Preprocessor Directives +=head2 perltie - how to hide an object class in a simple variable +=item SYNOPSIS -=item Using XS With C++ +=item DESCRIPTION +=over -=item Interface Strategy +=item Tying Scalars +TIESCALAR classname, LIST, FETCH this, STORE this, value, DESTROY this -=item Perl Objects And C Structures +=item Tying Arrays + +TIEARRAY classname, LIST, FETCH this, index, STORE this, index, value, +DESTROY this +=item Tying Hashes -=item The Typemap +USER, HOME, CLOBBER, LIST, TIEHASH classname, LIST, FETCH this, key, STORE +this, key, value, DELETE this, key, CLEAR this, EXISTS this, key, FIRSTKEY +this, NEXTKEY this, lastkey, DESTROY this +=item Tying FileHandles +TIEHANDLE classname, LIST, PRINT this, LIST, PRINTF this, LIST, READ this +LIST, READLINE this, GETC this, DESTROY this +=item The C<untie> Gotcha =back -=item EXAMPLES +=item SEE ALSO + +=item BUGS +=item AUTHOR -=item XS VERSION +=head2 perlbot - Bag'o Object Tricks (the BOT) +=item DESCRIPTION -=item AUTHOR +=item OO SCALING TIPS + +=item INSTANCE VARIABLES + +=item SCALAR INSTANCE VARIABLES + +=item INSTANCE VARIABLE INHERITANCE + +=item OBJECT RELATIONSHIPS + +=item OVERRIDING SUPERCLASS METHODS + +=item USING RELATIONSHIP WITH SDBM +=item THINKING OF CODE REUSE +=item CLASS CONTEXT AND THE OBJECT +=item INHERITING A CONSTRUCTOR +=item DELEGATION -=head2 perlxstut, perlXStut - Tutorial for XSUB's +=head2 perlipc - Perl interprocess communication (signals, fifos, pipes, +safe subprocesses, sockets, and semaphores) =item DESCRIPTION +=item Signals + +=item Named Pipes + +=item Using open() for IPC =over -=item VERSION CAVEAT +=item Filehandles +=item Background Processes -=item DYNAMIC VERSUS STATIC +=item Complete Dissociation of Child from Parent +=item Safe Pipe Opens -=item EXAMPLE 1 +=item Bidirectional Communication with Another Process +=back -=item EXAMPLE 2 +=item Sockets: Client/Server Communication +=over -=item WHAT HAS GONE ON? +=item Internet TCP Clients and Servers +=item Unix-Domain TCP Clients and Servers -=item EXAMPLE 3 +=back +=item TCP Clients with IO::Socket -=item WHAT'S NEW HERE? +=over +=item A Simple Client -=item INPUT AND OUTPUT PARAMETERS +C<Proto>, C<PeerAddr>, C<PeerPort> +=item A Webget Client -=item THE XSUBPP COMPILER +=item Interactive Client with IO::Socket +=back -=item THE TYPEMAP FILE +=item TCP Servers with IO::Socket +Proto, LocalPort, Listen, Reuse -=item WARNING +=item UDP: Message Passing +=item SysV IPC -=item SPECIFYING ARGUMENTS TO XSUBPP +=item NOTES +=item BUGS -=item THE ARGUMENT STACK +=item AUTHOR + +=item SEE ALSO +=head2 perldebug - Perl debugging -=item EXTENDING YOUR EXTENSION +=item DESCRIPTION +=item The Perl Debugger -=item DOCUMENTING YOUR EXTENSION +=over +=item Debugger Commands -=item INSTALLING YOUR EXTENSION +h [command], p expr, x expr, V [pkg [vars]], X [vars], T, s [expr], n +[expr], E<lt>CRE<gt>, c [line|sub], l, l min+incr, l min-max, l line, l +subname, -, w [line], f filename, /pattern/, ?pattern?, L, S [[!]pattern], +t, t expr, b [line] [condition], b subname [condition], b postpone subname +[condition], b load filename, b compile subname, d [line], D, a [line] +command, A, O [opt[=val]] [opt"val"] [opt?].., C<recallCommand>, +C<ShellBang>, C<pager>, C<tkRunning>, C<signalLevel>, C<warnLevel>, +C<dieLevel>, C<AutoTrace>, C<LineInfo>, C<inhibit_exit>, C<PrintRet>, +C<ornaments>, C<frame>, C<maxTraceLen>, C<arrayDepth>, C<hashDepth>, +C<compactDump>, C<veryCompact>, C<globPrint>, C<DumpDBFiles>, +C<DumpPackages>, C<quote>, C<HighBit>, C<undefPrint>, C<UsageOnly>, C<TTY>, +C<noTTY>, C<ReadLine>, C<NonStop>, E<lt> [ command ], E<lt>E<lt> command, +E<gt> command, E<gt>E<gt> command, { [ command ], {{ command, ! number, ! +-number, ! pattern, !! cmd, H -number, q or ^D, R, |dbcmd, ||dbcmd, = +[alias value], command, m expr, m package +=item Debugger input/output -=item SEE ALSO +Prompt, Multiline commands, Stack backtrace, Listing, Frame listing +=item Debugging compile-time statements -=item Author +=item Debugger Customization +=item Readline Support -=item Last Changed +=item Editor Support for Debugging +=item The Perl Profiler +=item Debugger support in perl +=item Debugger Internals + +=item Other resources =back +=item BUGS +=head2 perldiag - various Perl diagnostics +=item DESCRIPTION -=head2 perlguts - Perl's Internal Functions +=head2 perlsec - Perl security =item DESCRIPTION +=over -=item Datatypes +=item Laundering and Detecting Tainted Data +=item Switches On the "#!" Line -=over +=item Cleaning Up Your Path -=item What is an "IV"? +=item Security Bugs +=item Protecting Your Programs -=item Working with SV's +=back +=head2 perltrap - Perl traps for the unwary -=item What's Really Stored in an SV? +=item DESCRIPTION +=over -=item Working with AV's +=item Awk Traps +=item C Traps -=item Working with HV's +=item Sed Traps +=item Shell Traps -=item References +=item Perl Traps +=item Perl4 to Perl5 Traps -=item Blessed References and Class Objects +Discontinuance, Deprecation, and BugFix traps, Parsing Traps, Numerical +Traps, General data type traps, Context Traps - scalar, list contexts, +Precedence Traps, General Regular Expression Traps using s///, etc, +Subroutine, Signal, Sorting Traps, OS Traps, DBM Traps, Unclassified Traps +=item Discontinuance, Deprecation, and BugFix traps +Discontinuance, Deprecation, BugFix, Discontinuance, Discontinuance, +Discontinuance, BugFix, Discontinuance, Discontinuance, BugFix, +Discontinuance, Discontinuance, Deprecation, Discontinuance +=item Parsing Traps -=back +Parsing, Parsing, Parsing -=item Creating New Variables +=item Numerical Traps +Numerical, Numerical, Numerical -=item XSUB's and the Argument Stack +=item General data type traps +(Arrays), (Arrays), (Hashes), (Globs), (Scalar String), (Constants), +(Scalars), (Variable Suicide) -=item Mortality +=item Context Traps - scalar, list contexts +(list context), (scalar context), (scalar context), (list, builtin) -=item Stashes +=item Precedence Traps +Precedence, Precedence, Precedence, Precedence, Precedence, Precedence, +Precedence -=item Magic +=item General Regular Expression Traps using s///, etc. +Regular Expression, Regular Expression, Regular Expression, Regular +Expression, Regular Expression, Regular Expression, Regular Expression, +Regular Expression, Regular Expression -=over +=item Subroutine, Signal, Sorting Traps -=item Assigning Magic +(Signals), (Sort Subroutine), warn() won't let you specify a filehandle +=item OS Traps -=item Magic Virtual Tables +(SysV), (SysV) +=item Interpolation Traps -=item Finding Magic +Interpolation, Interpolation, Interpolation, Interpolation, Interpolation, +Interpolation, Interpolation, Interpolation, Interpolation + +=item DBM Traps +DBM, DBM +=item Unclassified Traps +Unclassified =back -=item Double-Typed SV's +=head2 perlstyle - Perl style guide +=item DESCRIPTION -=item Calling Perl Routines from within C Programs +=head2 perlpod - plain old documentation +=item DESCRIPTION -=item Memory Allocation +=item Embedding Pods in Perl Modules +=item Common Pod Pitfalls -=item API LISTING +=item SEE ALSO +=item AUTHOR -AvFILL, av_clear, av_extend, av_fetch, av_len, av_make, av_pop, -av_push, av_shift, av_store, av_undef, av_unshift, CLASS, Copy, croak, -CvSTASH, DBsingle, DBsub, dMARK, dORIGMARK, dSP, dXSARGS, ENTER, -EXTEND, FREETMPS, G_ARRAY, G_DISCARD, G_EVAL, GIMME, G_NOARGS, -G_SCALAR, gv_stashpv, gv_stashsv, GvSV, he_free, hv_clear, hv_delete, -hv_exists, hv_fetch, hv_iterinit, hv_iterkey, hv_iternext, -hv_iternextsv, hv_iterval, hv_magic, HvNAME, hv_store, hv_undef, -isALNUM, isALPHA, isDIGIT, isLOWER, isSPACE, isUPPER, items, LEAVE, -MARK, mg_clear, mg_copy, mg_find, mg_free, mg_get, mg_len, mg_magical, -mg_set, Move, na, New, Newc, Newz, newAV, newHV, newRV, newSV, newSViv, -newSVnv, newSVpv, newSVrv, newSVsv, newXS, newXSproto, Nullav, Nullch, -Nullcv, Nullhv, Nullsv, ORIGMARK, perl_alloc, perl_call_argv, -perl_call_method, perl_call_pv, perl_call_sv, perl_construct, -perl_destruct, perl_eval_sv, perl_free, perl_get_av, perl_get_cv, -perl_get_hv, perl_get_sv, perl_parse, perl_require_pv, perl_run, POPi, -POPl, POPp, POPn, POPs, +=head2 perlbook - Perl book information -=item AUTHOR +=item DESCRIPTION +=head2 perlembed - how to embed perl in your C program -=item DATE +=item DESCRIPTION +=over + +=item PREAMBLE +B<Use C from Perl?>, B<Use a Unix program from Perl?>, B<Use Perl from +Perl?>, B<Use C from C?>, B<Use Perl from C?> +=item ROADMAP +=item Compiling your C program -=head2 perlcall - Perl calling conventions from C +=item Adding a Perl interpreter to your C program + +=item Calling a Perl subroutine from your C program + +=item Evaluating a Perl statement from your C program + +=item Performing Perl pattern matches and substitutions from your C program + +=item Fiddling with the Perl stack from your C program + +=item Maintaining a persistent interpreter + +=item Maintaining multiple interpreter instances + +=item Using Perl modules, which themselves use C libraries, from your C +program + +=back + +=item MORAL + +=item AUTHOR + +=item COPYRIGHT + +=head2 perlapio - perl's IO abstraction interface. + +=item SYNOPSIS =item DESCRIPTION +B<PerlIO *>, B<PerlIO_stdin()>, B<PerlIO_stdout()>, B<PerlIO_stderr()>, +B<PerlIO_open(path, mode)>, B<PerlIO_fdopen(fd,mode)>, +B<PerlIO_printf(f,fmt,...)>, B<PerlIO_vprintf(f,fmt,a)>, +B<PerlIO_stdoutf(fmt,...)>, B<PerlIO_read(f,buf,count)>, +B<PerlIO_write(f,buf,count)>, B<PerlIO_close(f)>, B<PerlIO_puts(s,f)>, +B<PerlIO_putc(c,f)>, B<PerlIO_ungetc(c,f)>, B<PerlIO_getc(f)>, +B<PerlIO_eof(f)>, B<PerlIO_error(f)>, B<PerlIO_fileno(f)>, +B<PerlIO_clearerr(f)>, B<PerlIO_flush(f)>, B<PerlIO_tell(f)>, +B<PerlIO_seek(f,o,w)>, B<PerlIO_getpos(f,p)>, B<PerlIO_setpos(f,p)>, +B<PerlIO_rewind(f)>, B<PerlIO_tmpfile()> -An Error Handler, An Event Driven Program +=over -=item THE PERL_CALL FUNCTIONS +=item Co-existence with stdio +B<PerlIO_importFILE(f,flags)>, B<PerlIO_exportFILE(f,flags)>, +B<PerlIO_findFILE(f)>, B<PerlIO_releaseFILE(p,f)>, B<PerlIO_setlinebuf(f)>, +B<PerlIO_has_cntptr(f)>, B<PerlIO_get_ptr(f)>, B<PerlIO_get_cnt(f)>, +B<PerlIO_canset_cnt(f)>, B<PerlIO_fast_gets(f)>, +B<PerlIO_set_ptrcnt(f,p,c)>, B<PerlIO_set_cnt(f,c)>, B<PerlIO_has_base(f)>, +B<PerlIO_get_base(f)>, B<PerlIO_get_bufsiz(f)> -B<perl_call_sv>, B<perl_call_pv>, B<perl_call_method>, -B<perl_call_argv> +=back -=item FLAG VALUES +=head2 perlxs - XS language reference manual +=item DESCRIPTION =over -=item G_SCALAR +=item Introduction +=item On The Road -=item G_ARRAY +=item The Anatomy of an XSUB +=item The Argument Stack -=item G_DISCARD +=item The RETVAL Variable +=item The MODULE Keyword -=item G_NOARGS +=item The PACKAGE Keyword +=item The PREFIX Keyword -=item G_EVAL +=item The OUTPUT: Keyword +=item The CODE: Keyword -=item G_KEEPERR +=item The INIT: Keyword +=item The NO_INIT Keyword -=item Determining the Context +=item Initializing Function Parameters +=item Default Parameter Values +=item The PREINIT: Keyword +=item The SCOPE: Keyword -=back +=item The INPUT: Keyword -=item KNOWN PROBLEMS +=item Variable-length Parameter Lists +=item The PPCODE: Keyword -=item EXAMPLES +=item Returning Undef And Empty Lists +=item The REQUIRE: Keyword -=over +=item The CLEANUP: Keyword -=item No Parameters, Nothing returned +=item The BOOT: Keyword +=item The VERSIONCHECK: Keyword -=item Passing Parameters +=item The PROTOTYPES: Keyword +=item The PROTOTYPE: Keyword -=item Returning a Scalar +=item The ALIAS: Keyword +=item The INCLUDE: Keyword -=item Returning a list of values +=item The CASE: Keyword +=item The & Unary Operator -=item Returning a list in a scalar context +=item Inserting Comments and C Preprocessor Directives +=item Using XS With C++ -=item Returning Data from Perl via the parameter list +=item Interface Strategy +=item Perl Objects And C Structures -=item Using G_EVAL +=item The Typemap +=back -=item Using G_KEEPERR +=item EXAMPLES +=item XS VERSION -=item Using perl_call_sv +=item AUTHOR +=head2 perlxstut, perlXStut - Tutorial for XSUBs -=item Using perl_call_argv +=item DESCRIPTION +=over -=item Using perl_call_method +=item VERSION CAVEAT +=item DYNAMIC VERSUS STATIC -=item Using GIMME +=item EXAMPLE 1 +=item EXAMPLE 2 -=item Using Perl to dispose of temporaries +=item WHAT HAS GONE ON? +=item WRITING GOOD TEST SCRIPTS -=item Strategies for storing Callback Context Information +=item EXAMPLE 3 +=item WHAT'S NEW HERE? -1. Ignore the problem - Allow only 1 callback, 2. Create a sequence of -callbacks - hard wired limit, 3. Use a parameter to map to the Perl -callback +=item INPUT AND OUTPUT PARAMETERS -=item Alternate Stack Manipulation +=item THE XSUBPP COMPILER +=item THE TYPEMAP FILE +=item WARNING +=item EXAMPLE 4 -=back +=item WHAT HAS HAPPENED HERE? -=item SEE ALSO +=item SPECIFYING ARGUMENTS TO XSUBPP +=item THE ARGUMENT STACK -=item AUTHOR +=item EXTENDING YOUR EXTENSION +=item DOCUMENTING YOUR EXTENSION -=item DATE +=item INSTALLING YOUR EXTENSION +=item SEE ALSO +=item Author +=item Last Changed +=back -=head2 perlembed - how to embed perl in your C program +=head2 perlguts - Perl's Internal Functions =item DESCRIPTION +=item Variables =over -=item PREAMBLE +=item Datatypes +=item What is an "IV"? -B<Use C from Perl?>, B<Use a UNIX program from Perl?>, B<Use Perl from -Perl?>, B<Use C from C?>, B<Use Perl from C?> +=item Working with SVs -=item ROADMAP +=item What's Really Stored in an SV? +=item Working with AVs -=item Compiling your C program +=item Working with HVs +=item Hash API Extensions -=item Adding a Perl interpreter to your C program +=item References +=item Blessed References and Class Objects -=item Calling a Perl subroutine from your C program +=item Creating New Variables +=item Reference Counts and Mortality -=item Evaluating a Perl statement from your C program +=item Stashes and Globs +=item Double-Typed SVs -=item Performing Perl pattern matches and substitutions from your C -program +=item Magic Variables +=item Assigning Magic +=item Magic Virtual Tables +=item Finding Magic =back -=item MORAL +=item Subroutines +=over -=item AUTHOR +=item XSUBs and the Argument Stack +=item Calling Perl Routines from within C Programs +=item Memory Allocation +=item PerlIO +=item Putting a C value on Perl stack -=head2 perlpod - plain old documentation +=item Scratchpads -=item DESCRIPTION +=item Scratchpads and recursion +=back -=item Embedding Pods in Perl Modules +=item Compiled code +=over -=item SEE ALSO +=item Code tree +=item Examining the tree -=item AUTHOR +=item Compile pass 1: check routines +=item Compile pass 1a: constant folding +=item Compile pass 2: context propagation +=item Compile pass 3: peephole optimization +=back -=head2 perlbook - Perl book information +=item API LISTING + +AvFILL, av_clear, av_extend, av_fetch, av_len, av_make, av_pop, av_push, +av_shift, av_store, av_undef, av_unshift, CLASS, Copy, croak, CvSTASH, +DBsingle, DBsub, DBtrace, dMARK, dORIGMARK, dowarn, dSP, dXSARGS, dXSI32, +dXSI32, ENTER, EXTEND, FREETMPS, G_ARRAY, G_DISCARD, G_EVAL, GIMME, +GIMME_V, G_NOARGS, G_SCALAR, G_VOID, gv_fetchmeth, gv_fetchmethod, +gv_fetchmethod_autoload, gv_stashpv, gv_stashsv, GvSV, HEf_SVKEY, HeHASH, +HeKEY, HeKLEN, HePV, HeSVKEY, HeSVKEY_force, HeSVKEY_set, HeVAL, hv_clear, +hv_delayfree_ent, hv_delete, hv_delete_ent, hv_exists, hv_exists_ent, +hv_fetch, hv_fetch_ent, hv_free_ent, hv_iterinit, hv_iterkey, hv_iterkeysv, +hv_iternext, hv_iternextsv, hv_iterval, hv_magic, HvNAME, hv_store, +hv_store_ent, hv_undef, isALNUM, isALPHA, isDIGIT, isLOWER, isSPACE, +isUPPER, items, ix, LEAVE, MARK, mg_clear, mg_copy, mg_find, mg_free, +mg_get, mg_len, mg_magical, mg_set, Move, na, New, Newc, Newz, newAV, +newHV, newRV_inc, newRV_noinc, newSV, newSViv, newSVnv, newSVpv, newSVrv, +newSVsv, newXS, newXSproto, Nullav, Nullch, Nullcv, Nullhv, Nullsv, +ORIGMARK, perl_alloc, perl_call_argv, perl_call_method, perl_call_pv, +perl_call_sv, perl_construct, perl_destruct, perl_eval_sv, perl_eval_pv, +perl_free, perl_get_av, perl_get_cv, perl_get_hv, perl_get_sv, perl_parse, +perl_require_pv, perl_run, POPi, POPl, POPp, POPn, POPs, PUSHMARK, PUSHi, +PUSHn, PUSHp, PUSHs, PUTBACK, Renew, Renewc, RETVAL, safefree, safemalloc, +saferealloc, savepv, savepvn, SAVETMPS, SP, SPAGAIN, ST, strEQ, strGE, +strGT, strLE, strLT, strNE, strnEQ, strnNE, sv_2mortal, sv_bless, sv_catpv, +sv_catpvn, sv_catpvf, sv_catsv, sv_cmp, sv_cmp, SvCUR, SvCUR_set, sv_dec, +sv_dec, SvEND, sv_eq, SvGROW, sv_grow, sv_inc, SvIOK, SvIOK_off, SvIOK_on, +SvIOK_only, SvIOK_only, SvIOKp, sv_isa, SvIV, sv_isobject, SvIVX, SvLEN, +sv_len, sv_len, sv_magic, sv_mortalcopy, SvOK, sv_newmortal, sv_no, SvNIOK, +SvNIOK_off, SvNIOKp, SvNOK, SvNOK_off, SvNOK_on, SvNOK_only, SvNOK_only, +SvNOKp, SvNV, SvNVX, SvPOK, SvPOK_off, SvPOK_on, SvPOK_only, SvPOK_only, +SvPOKp, SvPV, SvPVX, SvREFCNT, SvREFCNT_dec, SvREFCNT_inc, SvROK, +SvROK_off, SvROK_on, SvRV, sv_setiv, sv_setnv, sv_setpv, sv_setpvn, +sv_setpvf, sv_setref_iv, sv_setref_nv, sv_setref_pv, sv_setref_pvn, +sv_setsv, SvSTASH, SVt_IV, SVt_PV, SVt_PVAV, SVt_PVCV, SVt_PVHV, SVt_PVMG, +SVt_NV, SvTRUE, SvTYPE, svtype, SvUPGRADE, sv_upgrade, sv_undef, sv_unref, +sv_usepvn, sv_yes, THIS, toLOWER, toUPPER, warn, XPUSHi, XPUSHn, XPUSHp, +XPUSHs, XS, XSRETURN, XSRETURN_EMPTY, XSRETURN_IV, XSRETURN_NO, +XSRETURN_NV, XSRETURN_PV, XSRETURN_UNDEF, XSRETURN_YES, XST_mIV, XST_mNV, +XST_mNO, XST_mPV, XST_mUNDEF, XST_mYES, XS_VERSION, XS_VERSION_BOOTCHECK, +Zero + +=item EDITOR + +=item DATE + +=head2 perlcall - Perl calling conventions from C =item DESCRIPTION +An Error Handler, An Event Driven Program + +=item THE PERL_CALL FUNCTIONS +B<perl_call_sv>, B<perl_call_pv>, B<perl_call_method>, B<perl_call_argv> +=item FLAG VALUES +=over -=head1 PRAGMA DOCUMENTATION +=item G_VOID +=item G_SCALAR +=item G_ARRAY +=item G_DISCARD -=head2 diagnostics - Perl compiler pragma to force verbose warning -diagnostics +=item G_NOARGS -=item SYNOPSIS +=item G_EVAL +=item G_KEEPERR -=item DESCRIPTION +=item Determining the Context + +=back + +=item KNOWN PROBLEMS +=item EXAMPLES =over -=item The C<diagnostics> Pragma +=item No Parameters, Nothing returned +=item Passing Parameters -=item The I<splain> Program +=item Returning a Scalar +=item Returning a list of values +=item Returning a list in a scalar context +=item Returning Data from Perl via the parameter list -=back +=item Using G_EVAL -=item EXAMPLES +=item Using G_KEEPERR +=item Using perl_call_sv -=item INTERNALS +=item Using perl_call_argv +=item Using perl_call_method -=item BUGS +=item Using GIMME_V +=item Using Perl to dispose of temporaries -=item AUTHOR +=item Strategies for storing Callback Context Information +1. Ignore the problem - Allow only 1 callback, 2. Create a sequence of +callbacks - hard wired limit, 3. Use a parameter to map to the Perl +callback +=item Alternate Stack Manipulation +=item Creating and calling an anonymous subroutine in C +=back -=head2 integer - Perl pragma to compute arithmetic in integer instead -of double +=item SEE ALSO -=item SYNOPSIS +=item AUTHOR +=item DATE + +=head1 PRAGMA DOCUMENTATION + +=head2 autouse - postpone load of modules until a function is used + +=item SYNOPSIS =item DESCRIPTION +=item WARNING +=item BUGS +=item AUTHOR +=item SEE ALSO -=head2 less - perl pragma to request less of something from the -compiler +=head2 blib - Use MakeMaker's uninstalled version of a package =item SYNOPSIS +=item DESCRIPTION + +=item BUGS + +=item AUTHOR + +=head2 constant - Perl pragma to declare constants + +=item SYNOPSIS =item DESCRIPTION +=item NOTES + +=item TECHNICAL NOTE +=item BUGS +=item AUTHOR +=item COPYRIGHT -=head2 lib - manipulate @INC at compile time +=head2 diagnostics - Perl compiler pragma to force verbose warning +diagnostics =item SYNOPSIS +=item DESCRIPTION + +=over + +=item The C<diagnostics> Pragma + +=item The I<splain> Program + +=back + +=item EXAMPLES + +=item INTERNALS + +=item BUGS + +=item AUTHOR + +=head2 integer - Perl pragma to compute arithmetic in integer instead of +double + +=item SYNOPSIS =item DESCRIPTION +=head2 less - perl pragma to request less of something from the compiler -=over +=item SYNOPSIS -=item ADDING DIRECTORIES TO @INC +=item DESCRIPTION +=head2 lib - manipulate @INC at compile time -=item DELETING DIRECTORIES FROM @INC +=item SYNOPSIS +=item DESCRIPTION -=item RESTORING ORIGINAL @INC +=over +=item ADDING DIRECTORIES TO @INC +=item DELETING DIRECTORIES FROM @INC +=item RESTORING ORIGINAL @INC =back =item SEE ALSO - =item AUTHOR +=head2 locale - Perl pragma to use and avoid POSIX locales for built-in +operations + +=item SYNOPSIS + +=item DESCRIPTION + +=head2 ops - Perl pragma to restrict unsafe operations when compiling +=item SYNOPSIS +=item DESCRIPTION +=item SEE ALSO =head2 overload - Package for overloading perl operations =item SYNOPSIS - =item CAVEAT SCRIPTOR - =item DESCRIPTION - =over =item Declaration of overloaded functions - =item Calling Conventions for Binary Operations - FALSE, TRUE, C<undef> =item Calling Conventions for Unary Operations - =item Overloadable Operations - I<Arithmetic operations>, I<Comparison operations>, I<Bit operations>, -I<Increment and decrement>, I<Transcendental functions>, I<Boolean, -string and numeric conversion>, I<Special> +I<Increment and decrement>, I<Transcendental functions>, I<Boolean, string +and numeric conversion>, I<Special> +=item Inheritance and overloading +Strings as values of C<use overload> directive, Overloading of an operation +is inherited by derived classes =back =item SPECIAL SYMBOLS FOR C<use overload> - =over -=item Last Resort - - -=item Fallback +=item Last Resort +=item Fallback C<undef>, TRUE, defined, but FALSE =item Copy Constructor - B<Example> - - =back =item MAGIC AUTOGENERATION - I<Assignment forms of arithmetic operations>, I<Conversion operations>, -I<Increment and decrement>, C<abs($a)>, I<Unary minus>, +I<Increment and decrement>, C<abs($a)>, I<Unary minus>, I<Negation>, I<Concatenation>, I<Comparison operations>, I<Copy operator> =item WARNING - =item Run-time Overloading - =item Public functions - -overload::StrVal(arg), overload::Overloaded(arg), -overload::Method(obj,op) +overload::StrVal(arg), overload::Overloaded(arg), overload::Method(obj,op) =item IMPLEMENTATION - =item AUTHOR - =item DIAGNOSTICS - =item BUGS +=head2 sigtrap - Perl pragma to enable simple signal handling + +=item SYNOPSIS +=item DESCRIPTION +=item OPTIONS +=over -=head2 sigtrap - Perl pragma to enable stack backtrace on unexpected -signals +=item SIGNAL HANDLERS -=item SYNOPSIS +B<stack-trace>, B<die>, B<handler> I<your-handler> +=item SIGNAL LISTS -=item DESCRIPTION +B<normal-signals>, B<error-signals>, B<old-interface-signals> +=item OTHER +B<untrapped>, B<any>, I<signal>, I<number> +=back +=item EXAMPLES =head2 strict - Perl pragma to restrict unsafe constructs =item SYNOPSIS - =item DESCRIPTION - C<strict refs>, C<strict vars>, C<strict subs> +=head2 subs - Perl pragma to predeclare sub names +=item SYNOPSIS +=item DESCRIPTION -=head2 subs - Perl pragma to predeclare sub names +=head2 vars - Perl pragma to predeclare global variable names =item SYNOPSIS +=item DESCRIPTION + +=head1 MODULE DOCUMENTATION + +=head2 AnyDBM_File - provide framework for multiple DBMs + +=item SYNOPSIS =item DESCRIPTION +=over + +=item DBM Comparisons +[0], [1], [2], [3] +=back +=item SEE ALSO -=head2 vars - Perl pragma to predeclare global variable names +=head2 AutoLoader - load subroutines only on demand =item SYNOPSIS - =item DESCRIPTION +=over +=item Subroutine Stubs +=item Using B<AutoLoader>'s AUTOLOAD Subroutine +=item Overriding B<AutoLoader>'s AUTOLOAD Subroutine -=head1 MODULE DOCUMENTATION +=item Package Lexicals +=item B<AutoLoader> vs. B<SelfLoader> + +=back +=item CAVEATS +=item SEE ALSO -=head2 AnyDBM_File - provide framework for multiple DBMs +=head2 AutoSplit - split a package for autoloading =item SYNOPSIS - =item DESCRIPTION +=item CAVEATS + +=item DIAGNOSTICS + +=head2 Benchmark - benchmark running times of code + +=item SYNOPSIS + +=item DESCRIPTION =over -=item DBM Comparisons +=item Methods +new, debug -[0], [1], [2], [3] +=item Standard Exports + +timeit(COUNT, CODE), timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] ), +timethese ( COUNT, CODEHASHREF, [ STYLE ] ), timediff ( T1, T2 ), timestr ( +TIMEDIFF, [ STYLE, [ FORMAT ]] ) +=item Optional Exports +clearcache ( COUNT ), clearallcache ( ), disablecache ( ), enablecache ( ) =back -=item SEE ALSO +=item NOTES +=item INHERITANCE +=item CAVEATS +=item AUTHORS +=item MODIFICATION HISTORY -=head2 AutoLoader - load functions only on demand +=head2 Bundle::CPAN - A bundle to play with all the other modules on CPAN =item SYNOPSIS +=item CONTENTS =item DESCRIPTION +=item AUTHOR - - - -=head2 AutoSplit - split a package for autoloading +=head2 CGI - Simple Common Gateway Interface Class =item SYNOPSIS +=item ABSTRACT + +=item INSTALLATION =item DESCRIPTION +=over +=item CREATING A NEW QUERY OBJECT: +=item CREATING A NEW QUERY OBJECT FROM AN INPUT FILE +=item FETCHING A LIST OF KEYWORDS FROM THE QUERY: -=head2 Benchmark - benchmark running times of code +=item FETCHING THE NAMES OF ALL THE PARAMETERS PASSED TO YOUR SCRIPT: -=item SYNOPSIS +=item FETCHING THE VALUE OR VALUES OF A SINGLE NAMED PARAMETER: +=item SETTING THE VALUE(S) OF A NAMED PARAMETER: -=item DESCRIPTION +=item APPENDING ADDITIONAL VALUES TO A NAMED PARAMETER: +=item IMPORTING ALL PARAMETERS INTO A NAMESPACE: -=over +=item DELETING A PARAMETER COMPLETELY: -=item Methods +=item DELETING ALL PARAMETERS: +=item SAVING THE STATE OF THE FORM TO A FILE: -new, debug +=item CREATING A SELF-REFERENCING URL THAT PRESERVES STATE INFORMATION: -=item Standard Exports +=item COMPATIBILITY WITH CGI-LIB.PL +=item CALLING CGI FUNCTIONS THAT TAKE MULTIPLE ARGUMENTS -timeit(COUNT, CODE), timethis, timethese, timediff, timestr +=item CREATING THE HTTP HEADER: -=item Optional Exports +=item GENERATING A REDIRECTION INSTRUCTION +=item CREATING THE HTML HEADER: +B<Parameters:>, 4, 5, 6.. +=item ENDING THE HTML DOCUMENT: =back -=item NOTES +=item CREATING FORMS +=over -=item INHERITANCE +=item CREATING AN ISINDEX TAG +=item STARTING AND ENDING A FORM -=item CAVEATS +B<application/x-www-form-urlencoded>, B<multipart/form-data> +=item CREATING A TEXT FIELD -=item AUTHORS +B<Parameters> +=item CREATING A BIG TEXT FIELD -=item MODIFICATION HISTORY +=item CREATING A PASSWORD FIELD +=item CREATING A FILE UPLOAD FIELD +B<Parameters> +=item CREATING A POPUP MENU +=item CREATING A SCROLLING LIST -=head2 Carp, carp - warn of errors (from perspective of caller) +B<Parameters:> -=item SYNOPSIS +=item CREATING A GROUP OF RELATED CHECKBOXES +B<Parameters:> -=item DESCRIPTION +=item CREATING A STANDALONE CHECKBOX +B<Parameters:> +=item CREATING A RADIO BUTTON GROUP +B<Parameters:> +=item CREATING A SUBMIT BUTTON -=head2 Cwd, getcwd - get pathname of current working directory +B<Parameters:> -=item SYNOPSIS +=item CREATING A RESET BUTTON +=item CREATING A DEFAULT BUTTON -=item DESCRIPTION +=item CREATING A HIDDEN FIELD +B<Parameters:> +=item CREATING A CLICKABLE IMAGE BUTTON +B<Parameters:>, 3.The third option (-align, optional) is an alignment type, +and may be +TOP, BOTTOM or MIDDLE +=item CREATING A JAVASCRIPT ACTION BUTTON -=head2 DB_File - Perl5 access to Berkeley DB +=back -=item SYNOPSIS +=item NETSCAPE COOKIES +1. an expiration time, 2. a domain, 3. a path, 4. a "secure" flag, +B<-name>, B<-value>, B<-path>, B<-domain>, B<-expires>, B<-secure> -=item DESCRIPTION +=item WORKING WITH NETSCAPE FRAMES +1. Create a <Frameset> document, 2. Specify the destination for the +document in the HTTP header, 3. Specify the destination for the document in +the <FORM> tag -DB_HASH, DB_BTREE, DB_RECNO +=item LIMITED SUPPORT FOR CASCADING STYLE SHEETS + +=item DEBUGGING =over -=item How does DB_File interface to Berkeley DB? +=item DUMPING OUT ALL THE NAME/VALUE PAIRS +=back -=item Differences with Berkeley DB +=item FETCHING ENVIRONMENT VARIABLES +B<accept()>, B<raw_cookie()>, B<user_agent()>, B<path_info()>, +B<path_translated()>, B<remote_host()>, B<script_name()>Return the script +name as a partial URL, for self-refering +scripts, B<referer()>, B<auth_type ()>, B<server_name ()>, B<virtual_host +()>, B<server_software ()>, B<remote_user ()>, B<user_name ()>, +B<request_method()> -=item RECNO +=item CREATING HTML ELEMENTS +=over -=item In Memory Databases +=item PROVIDING ARGUMENTS TO HTML SHORTCUTS +=item Generating new HTML tags -=item Using the Berkeley DB Interface Directly +=back +=item IMPORTING CGI METHOD CALLS INTO YOUR NAME SPACE -get, put, del, fd, seq, sync +B<cgi>, B<form>, B<html2>, B<html3>, B<netscape>, B<shortcuts>, +B<standard>, B<all> +=item USING NPH SCRIPTS +In the B<use> statementSimply add ":nph" to the list of symbols to be +imported into your script:, By calling the B<nph()> method:, By using +B<-nph> parameters in the B<header()> and B<redirect()> statements: -=back +=item AUTHOR INFORMATION -=item EXAMPLES +=item CREDITS +Matt Heffron (heffron@falstaff.css.beckman.com), James Taylor +(james.taylor@srs.gov), Scott Anguish <sanguish@digifix.com>, Mike Jewell +(mlj3u@virginia.edu), Timothy Shimmin (tes@kbs.citri.edu.au), Joergen Haegg +(jh@axis.se), Laurent Delfosse (delfosse@csgrad1.cs.wvu.edu), Richard +Resnick (applepi1@aol.com), Craig Bishop (csb@barwonwater.vic.gov.au), Tony +Curtis (tc@vcpc.univie.ac.at), Tim Bunce (Tim.Bunce@ig.co.uk), Tom +Christiansen (tchrist@convex.com), Andreas Koenig +(k@franz.ww.TU-Berlin.DE), Tim MacKenzie (Tim.MacKenzie@fulcrum.com.au), +Kevin B. Hendricks (kbhend@dogwood.tyler.wm.edu), Stephen Dahmen +(joyfire@inxpress.net), Ed Jordan (ed@fidalgo.net), David Alan Pisoni +(david@cnation.com), ...and many many more.. -=over +=item A COMPLETE EXAMPLE OF A SIMPLE FORM-BASED SCRIPT -=item Using HASH +=item BUGS +=item SEE ALSO -=item Using BTREE +=head2 CGI::Apache - Make things work with CGI.pm against Perl-Apache API +=item SYNOPSIS -=item Using RECNO +=item DESCRIPTION +=item NOTE 1 -=item Locking Databases +=item NOTE 2 +=item SEE ALSO +=item AUTHOR +=head2 CGI::Carp, B<CGI::Carp> - CGI routines for writing to the HTTPD (or +other) error log -=back +=item SYNOPSIS -=item HISTORY +=item DESCRIPTION +=item REDIRECTING ERROR MESSAGES -=item WARNINGS +=item MAKING PERL ERRORS APPEAR IN THE BROWSER WINDOW +=item CHANGE LOG -=item BUGS +=item AUTHORS + +=item SEE ALSO +=head2 CGI::Fast - CGI Interface for Fast CGI -=item AVAILABILITY +=item SYNOPSIS +=item DESCRIPTION -=item SEE ALSO +=item OTHER PIECES OF THE PUZZLE +=item WRITING FASTCGI PERL SCRIPTS -=item AUTHOR +=item INSTALLING FASTCGI SCRIPTS +=item USING FASTCGI SCRIPTS AS CGI SCRIPTS +=item CAVEATS +=item AUTHOR INFORMATION +=item BUGS -=head2 Devel::SelfStubber - generate stubs for a SelfLoading module +=item SEE ALSO -=item SYNOPSIS +=head2 CGI::Push - Simple Interface to Server Push +=item SYNOPSIS =item DESCRIPTION +=item USING CGI::Push +-last_page, -type, -delay, -cookie, -target, -expires +=item INSTALLING CGI::Push SCRIPTS +=item CAVEATS -=head2 DirHandle - supply object methods for directory handles +=item AUTHOR INFORMATION + +=item BUGS + +=item SEE ALSO + +=head2 CGI::Switch - Try more than one constructors and return the first +object available =item SYNOPSIS +=item DESCRIPTION + +=item SEE ALSO + +=item AUTHOR + +=head2 CPAN - query, download and build perl modules from CPAN sites + +=item SYNOPSIS =item DESCRIPTION +=over +=item Interactive Mode +Searching for authors, bundles, distribution files and modules, make, test, +install, clean modules or distributions, readme, look module or +distribution +=item CPAN::Shell -=head2 DynaLoader - Dynamically load C libraries into Perl code +=item autobundle -=item SYNOPSIS +=item recompile +=item The 4 Classes: Authors, Bundles, Modules, Distributions -=item DESCRIPTION +=item ProgrammerE<39>s interface +expand($type,@things), Programming Examples -@dl_library_path, @dl_resolve_using, @dl_require_symbols, dl_error(), -$dl_debug, dl_findfile(), dl_expandspec(), dl_load_file(), -dl_find_symbol(), dl_undef_symbols(), dl_install_xsub(), boostrap() +=item Cache Manager -=item AUTHOR +=item Bundles +=item Prerequisites + +=item Debugging +=item Floppy, Zip, and all that Jazz +=back +=item CONFIGURATION -=head2 English - use nice English (or awk) names for ugly punctuation -variables +o conf E<lt>scalar optionE<gt>, o conf E<lt>scalar optionE<gt> +E<lt>valueE<gt>, o conf E<lt>list optionE<gt>, o conf E<lt>list optionE<gt> +[shift|pop], o conf E<lt>list optionE<gt> [unshift|push|splice] +E<lt>listE<gt> -=item SYNOPSIS +=item SECURITY +=item EXPORT + +=item BUGS + +=item AUTHOR + +=item SEE ALSO + +=head2 CPAN::FirstTime - Utility for CPAN::Config file Initialization + +=item SYNOPSIS =item DESCRIPTION +=head2 CPANox, CPAN::Nox - Wrapper around CPAN.pm without using any XS +module +=item SYNOPSIS +=item DESCRIPTION +=item SEE ALSO -=head2 Env - perl module that imports environment variables +=head2 Carp, carp - warn of errors (from perspective of caller) =item SYNOPSIS +=item DESCRIPTION + +=head2 Class::Struct - declare struct-like datatypes as Perl classes + +=item SYNOPSIS =item DESCRIPTION +=over -=item AUTHOR +=item The C<struct()> function +=item Element Types and Accessor Methods +Scalar (C<'$'> or C<'*$'>), Array (C<'@'> or C<'*@'>), Hash (C<'%'> or +C<'*%'>), Class (C<'Class_Name'> or C<'*Class_Name'>) +=back +=item EXAMPLES -=head2 Exporter - Implements default import method for modules +Example 1, Example 2 -=item SYNOPSIS +=item Author and Modification History +=head2 Config - access Perl configuration information + +=item SYNOPSIS =item DESCRIPTION +myconfig(), config_sh(), config_vars(@names) -=over +=item EXAMPLE -=item Selecting What To Export +=item WARNING +=item NOTE -=item Specialised Import Lists +=head2 Cwd, getcwd - get pathname of current working directory +=item SYNOPSIS -=item Module Version Checking +=item DESCRIPTION +=head2 DB_File - Perl5 access to Berkeley DB -=item Managing Unknown Symbols +=item SYNOPSIS + +=item DESCRIPTION +B<DB_HASH>, B<DB_BTREE>, B<DB_RECNO> -=item Tag Handling Utility Functions +=over + +=item Interface to Berkeley DB +=item Opening a Berkeley DB Database File +=item Default Parameters +=item In Memory Databases =back +=item DB_HASH +=over +=item A Simple Example -=head2 ExtUtils::Install - install files from here to there +=back -=item SYNOPSIS +=item DB_BTREE +=over -=item DESCRIPTION +=item Changing the BTREE sort order +=item Handling Duplicate Keys +=item The get_dup() Method +=item Matching Partial Keys +=back -=head2 ExtUtils::Liblist - determine libraries to use and how to use -them +=item DB_RECNO -=item SYNOPSIS +=over +=item The 'bval' Option -=item DESCRIPTION +=item A Simple Example +=item Extra Methods -For static extensions, For dynamic extensions, For dynamic extensions +B<$X-E<gt>push(list) ;>, B<$value = $X-E<gt>pop ;>, B<$X-E<gt>shift>, +B<$X-E<gt>unshift(list) ;>, B<$X-E<gt>length> + +=item Another Example + +=back + +=item THE API INTERFACE + +B<$status = $X-E<gt>get($key, $value [, $flags]) ;>, B<$status = +$X-E<gt>put($key, $value [, $flags]) ;>, B<$status = $X-E<gt>del($key [, +$flags]) ;>, B<$status = $X-E<gt>fd ;>, B<$status = $X-E<gt>seq($key, +$value, $flags) ;>, B<$status = $X-E<gt>sync([$flags]) ;> + +=item HINTS AND TIPS =over -=item EXTRALIBS +=item Locking Databases +=item Sharing Databases With C Applications -=item LDLOADLIBS and LD_RUN_PATH +=item The untie() Gotcha + +=back +=item COMMON QUESTIONS -=item BSLOADLIBS +=over + +=item Why is there Perl source in my database? +=item How do I store complex data structures with DB_File? +=item What does "Invalid Argument" mean? +=item What does "Bareword 'DB_File' not allowed" mean? =back -=item PORTABILITY +=item HISTORY +=item BUGS + +=item AVAILABILITY =item SEE ALSO +=item AUTHOR +=head2 Devel::SelfStubber - generate stubs for a SelfLoading module +=item SYNOPSIS +=item DESCRIPTION -=head2 ExtUtils::MM_OS2 - methods to override UN*X behaviour in -ExtUtils::MakeMaker +=head2 DirHandle - supply object methods for directory handles + +=item SYNOPSIS =item DESCRIPTION +=head2 DynaLoader - Dynamically load C libraries into Perl code +=item SYNOPSIS +=item DESCRIPTION +@dl_library_path, @dl_resolve_using, @dl_require_symbols, @dl_librefs, +@dl_modules, dl_error(), $dl_debug, dl_findfile(), dl_expandspec(), +dl_load_file(), dl_loadflags(), dl_find_symbol(), +dl_find_symbol_anywhere(), dl_undef_symbols(), dl_install_xsub(), +bootstrap() -=head2 ExtUtils::MM_Unix - methods used by ExtUtils::MakeMaker +=item AUTHOR + +=head2 English - use nice English (or awk) names for ugly punctuation +variables =item SYNOPSIS +=item DESCRIPTION + +=head2 Env - perl module that imports environment variables + +=item SYNOPSIS =item DESCRIPTION +=item AUTHOR -=item METHODS +=head2 Exporter - Implements default import method for modules +=item SYNOPSIS + +=item DESCRIPTION =over -=item Preloaded methods +=item Selecting What To Export +=item Specialised Import Lists -catdir, catfile, nicetext, libscan, exescan, lsdir, path, -replace_manpage_separator, file_name_is_absolute, prefixify, -maybe_command_in_dirs, maybe_command, perl_script +=item Module Version Checking -=item SelfLoaded methods +=item Managing Unknown Symbols +=item Tag Handling Utility Functions -guess_name, init_main, init_dirscan, init_others, find_perl +=back -=item Methods to actually produce chunks of text for the Makefile +=head2 ExtUtils::Command - utilities to replace common UNIX commands in +Makefiles etc. +=item SYNOPSIS -post_initialize, const_config, constants, const_loadlibs, const_cccmd, -tool_autosplit, tool_xsubpp, tools_other, dist, macro, depend, -post_constants, pasthru, c_o, xs_c, xs_o, top_targets, linkext, dlsyms, -dynamic, dynamic_bs, dynamic_lib, static, static_lib, installpm, -installpm_x, manifypods, processPL, installbin, subdirs, subdir_x, -clean, realclean, dist_basics, dist_core, dist_dir, dist_test, dist_ci, -install, force, perldepend, makefile, staticmake, test, -test_via_harness, test_via_script, postamble, makeaperl, extliblist, -dir_target, needs_linking, has_link_code, writedoc +=item DESCRIPTION +cat, eqtime src dst, rm_f files..., rm_f files..., touch files .., mv +source... destination, cp source... destination, chmod mode files.., mkpath +directory.., test_f file +=item BUGS -=back +=item SEE ALSO + +=item AUTHOR + +=head2 ExtUtils::Embed - Utilities for embedding Perl in C/C++ applications + +=item SYNOPSIS + +=item DESCRIPTION + +=item @EXPORT + +=item FUNCTIONS + +xsinit(), Examples, ldopts(), Examples, perl_inc(), ccflags(), ccdlflags(), +ccopts(), xsi_header(), xsi_protos(@modules), xsi_body(@modules) + +=item EXAMPLES =item SEE ALSO +=item AUTHOR + +=head2 ExtUtils::Install - install files from here to there +=item SYNOPSIS +=item DESCRIPTION +=head2 ExtUtils::Liblist - determine libraries to use and how to use them -=head2 ExtUtils::MM_VMS - methods to override UN*X behaviour in -ExtUtils::MakeMaker +=item SYNOPSIS =item DESCRIPTION +For static extensions, For dynamic extensions, For dynamic extensions + +=over +=item EXTRALIBS +=item LDLOADLIBS and LD_RUN_PATH +=item BSLOADLIBS -=head2 ExtUtils::MakeMaker - create an extension Makefile +=back + +=item PORTABILITY + +=over + +=item VMS implementation + +=back + +=item SEE ALSO + +=head2 ExtUtils::MM_OS2 - methods to override UN*X behaviour in +ExtUtils::MakeMaker =item SYNOPSIS +=item DESCRIPTION + +=head2 ExtUtils::MM_Unix - methods used by ExtUtils::MakeMaker + +=item SYNOPSIS =item DESCRIPTION +=item METHODS =over -=item Hintsfile support +=item Preloaded methods +canonpath, catdir, catfile, curdir, rootdir, updir -=item What's new in version 5 of MakeMaker +=item SelfLoaded methods +c_o (o), cflags (o), clean (o), const_cccmd (o), const_config (o), +const_loadlibs (o), constants (o), depend (o), dir_target (o), dist (o), +dist_basics (o), dist_ci (o), dist_core (o), dist_dir (o), dist_test (o), +dlsyms (o), dynamic (o), dynamic_bs (o), dynamic_lib (o), exescan, +extliblist, file_name_is_absolute, find_perl -=item Incompatibilities between MakeMaker 5.00 and 4.23 +=item Methods to actually produce chunks of text for the Makefile +force (o), guess_name, has_link_code, init_dirscan, init_main, init_others, +install (o), installbin (o), libscan (o), linkext (o), lsdir, macro (o), +makeaperl (o), makefile (o), manifypods (o), maybe_command, +maybe_command_in_dirs, needs_linking (o), nicetext, parse_version, pasthru +(o), path, perl_script, perldepend (o), pm_to_blib, post_constants (o), +post_initialize (o), postamble (o), prefixify, processPL (o), realclean +(o), replace_manpage_separator, static (o), static_lib (o), staticmake (o), +subdir_x (o), subdirs (o), test (o), test_via_harness (o), test_via_script +(o), tool_autosplit (o), tools_other (o), tool_xsubpp (o), top_targets (o), +writedoc, xs_c (o), xs_o (o), perl_archive, export_list -=item Default Makefile Behaviour +=back +=item SEE ALSO -=item make test +=head2 ExtUtils::MM_VMS - methods to override UN*X behaviour in +ExtUtils::MakeMaker +=item SYNOPSIS -=item make install +=item DESCRIPTION +=over -=item PREFIX attribute +=item Methods always loaded +eliminate_macros, fixpath, catdir, catfile, wraplist, curdir (override), +rootdir (override), updir (override) -=item AFS users +=item SelfLoaded methods +guess_name (override), find_perl (override), path (override), maybe_command +(override), maybe_command_in_dirs (override), perl_script (override), +file_name_is_absolute (override), replace_manpage_separator, init_others +(override), constants (override), cflags (override), const_cccmd +(override), pm_to_blib (override), tool_autosplit (override), tool_sxubpp +(override), xsubpp_version (override), tools_other (override), dist +(override), c_o (override), xs_c (override), xs_o (override), top_targets +(override), dlsyms (override), dynamic_lib (override), dynamic_bs +(override), static_lib (override), manifypods (override), processPL +(override), installbin (override), subdir_x (override), clean (override), +realclean (override), dist_basics (override), dist_core (override), +dist_dir (override), dist_test (override), install (override), perldepend +(override), makefile (override), test (override), test_via_harness +(override), test_via_script (override), makeaperl (override), nicetext +(override) -=item Static Linking of a new Perl Binary +=back +=head2 ExtUtils::MM_Win32 - methods to override UN*X behaviour in +ExtUtils::MakeMaker -=item Determination of Perl Library and Installation Locations +=item SYNOPSIS +=item DESCRIPTION -=item Useful Default Makefile Macros +catfile, static_lib (o), dynamic_lib (o), canonpath, perl_script, +pm_to_blib, test_via_harness (o), tool_autosplit (override), tools_other +(o), manifypods (o), dist_ci (o), dist_core (o), pasthru (o) +=head2 ExtUtils::MakeMaker - create an extension Makefile -=item Using Attributes and Parameters +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item How To Write A Makefile.PL + +=item Default Makefile Behaviour + +=item make test + +=item make testdb + +=item make install + +=item PREFIX and LIB attribute + +=item AFS users + +=item Static Linking of a new Perl Binary + +=item Determination of Perl Library and Installation Locations + +=item Which architecture dependent directory? +=item Using Attributes and Parameters C, CONFIG, CONFIGURE, DEFINE, DIR, DISTNAME, DL_FUNCS, DL_VARS, -EXE_FILES, FIRST_MAKEFILE, FULLPERL, H, INC, INSTALLARCHLIB, -INSTALLBIN, INSTALLDIRS, INSTALLMAN1DIR, INSTALLMAN3DIR, -INSTALLPRIVLIB, INSTALLSITELIB, INSTALLSITEARCH, INST_ARCHLIB, -INST_EXE, INST_LIB, INST_MAN1DIR, INST_MAN3DIR, LDFROM, LIBPERL_A, -LIBS, LINKTYPE, MAKEAPERL, MAKEFILE, MAN1PODS, MAN3PODS, MAP_TARGET, -MYEXTLIB, NAME, NEEDS_LINKING, NOECHO, NORECURS, OBJECT, PERL, -PERLMAINCC, PERL_ARCHLIB, PERL_LIB, PERL_SRC, PL_FILES, PM, PMLIBDIRS, -PREFIX, PREREQ, SKIP, TYPEMAPS, VERSION, VERSION_FROM, XS, XSOPT, -XSPROTOARG, XS_VERSION +EXCLUDE_EXT, EXE_FILES, NO_VC, FIRST_MAKEFILE, FULLPERL, H, INC, +INCLUDE_EXT, INSTALLARCHLIB, INSTALLBIN, INSTALLDIRS, INSTALLMAN1DIR, +INSTALLMAN3DIR, INSTALLPRIVLIB, INSTALLSCRIPT, INSTALLSITELIB, +INSTALLSITEARCH, INST_ARCHLIB, INST_BIN, INST_EXE, INST_LIB, INST_MAN1DIR, +INST_MAN3DIR, INST_SCRIPT, LDFROM, LIBPERL_A, LIB, LIBS, LINKTYPE, +MAKEAPERL, MAKEFILE, MAN1PODS, MAN3PODS, MAP_TARGET, MYEXTLIB, NAME, +NEEDS_LINKING, NOECHO, NORECURS, OBJECT, OPTIMIZE, PERL, PERLMAINCC, +PERL_ARCHLIB, PERL_LIB, PERL_SRC, PL_FILES, PM, PMLIBDIRS, PREFIX, +PREREQ_PM, SKIP, TYPEMAPS, VERSION, VERSION_FROM, XS, XSOPT, XSPROTOARG, +XS_VERSION =item Additional lowercase attributes - clean, depend, dist, dynamic_lib, installpm, linkext, macro, realclean, tool_autosplit =item Overriding MakeMaker Methods +=item Hintsfile support =item Distribution Support + make distcheck, make skipcheck, make distclean, make manifest, + make distdir, make tardist, make dist, make uutardist, make +shdist, make zipdist, make ci - make distcheck, make skipcheck, make distclean, make - manifest, make distdir, make tardist, make dist, make - uutardist, make shdist, make ci +=back +=item SEE ALSO +=item AUTHORS -=back +=head2 ExtUtils::Manifest - utilities to write and check a MANIFEST file -=item AUTHORS +=item SYNOPSIS +=item DESCRIPTION -=item MODIFICATION HISTORY +=item MANIFEST.SKIP +=item EXPORT_OK -=item TODO +=item GLOBAL VARIABLES +=item DIAGNOSTICS +C<Not in MANIFEST:> I<file>, C<No such file:> I<file>, C<MANIFEST:> I<$!>, +C<Added to MANIFEST:> I<file> +=item SEE ALSO +=item AUTHOR -=head2 ExtUtils::Manifest - utilities to write and check a MANIFEST -file +=head2 ExtUtils::Miniperl, writemain - write the C code for perlmain.c =item SYNOPSIS - =item DESCRIPTION +=item SEE ALSO -=item MANIFEST.SKIP +=head2 ExtUtils::Mkbootstrap - make a bootstrap file for use by DynaLoader +=item SYNOPSIS -=item EXPORT_OK +=item DESCRIPTION +=head2 ExtUtils::Mksymlists - write linker options files for dynamic +extension -=item GLOBAL VARIABLES +=item SYNOPSIS +=item DESCRIPTION -=item DIAGNOSTICS +NAME, DL_FUNCS, DL_VARS, FILE, FUNCLIST, DLBASE + +=item AUTHOR +=item REVISION -C<Not in MANIFEST:> I<file>, C<No such file:> I<file>, C<MANIFEST:> -I<$!>, C<Added to MANIFEST:> I<file> +=head2 ExtUtils::testlib - add blib/* directories to @INC -=item SEE ALSO +=item SYNOPSIS +=item DESCRIPTION -=item AUTHOR +=head2 Fcntl - load the C Fcntl.h defines +=item SYNOPSIS +=item DESCRIPTION +=item NOTE +=item EXPORTED SYMBOLS -=head2 ExtUtils::Mkbootstrap - make a bootstrap file for use by -DynaLoader +=head2 File::Basename, fileparse - split a pathname into pieces =item SYNOPSIS - =item DESCRIPTION +fileparse_set_fstype, fileparse +=item EXAMPLES +C<basename>, C<dirname> +=head2 File::CheckTree, validate - run many filetest checks on a tree -=head2 ExtUtils::Mksymlists - write linker options files for dynamic -extension +=item SYNOPSIS + +=item DESCRIPTION + +=head2 File::Compare - Compare files or filehandles =item SYNOPSIS +=item DESCRIPTION + +=item RETURN + +=item AUTHOR + +=head2 File::Copy - Copy files or filehandles + +=item SYNOPSIS =item DESCRIPTION +=over -NAME, DL_FUNCS, DL_VARS, FILE, FUNCLIST, DLBASE +=item Special behavior if C<syscopy> is defined (VMS and OS/2) + +rmscopy($from,$to[,$date_flag]) + +=back + +=item RETURN =item AUTHOR +=head2 File::Find, find - traverse a file tree + +=item SYNOPSIS + +=item DESCRIPTION + +=head2 File::Path - create or remove a series of directories + +=item SYNOPSIS + +=item DESCRIPTION + +=item AUTHORS =item REVISION +=head2 File::stat - by-name interface to Perl's built-in stat() functions + +=item SYNOPSIS +=item DESCRIPTION +=item NOTE +=item AUTHOR -=head2 Fcntl - load the C Fcntl.h defines +=head2 FileCache - keep more files open than the system permits =item SYNOPSIS +=item DESCRIPTION + +=item BUGS + +=head2 FileHandle - supply object methods for filehandles + +=item SYNOPSIS =item DESCRIPTION +$fh->print, $fh->printf, $fh->getline, $fh->getlines -=item NOTE +=item SEE ALSO +=head2 FindBin - Locate directory of original perl script +=item SYNOPSIS +=item DESCRIPTION + +=item EXPORTABLE VARIABLES + +=item KNOWN BUGS + +=item AUTHORS + +=item COPYRIGHT +=item REVISION -=head2 File::Basename, Basename - parse file specifications +=head2 GDBM_File - Perl5 access to the gdbm library. =item SYNOPSIS +=item DESCRIPTION + +=item AVAILABILITY + +=item BUGS + +=item SEE ALSO + +=head2 Getopt::Long, GetOptions - extended processing of command line +options + +=item SYNOPSIS =item DESCRIPTION +E<lt>noneE<gt>, !, =s, :s, =i, :i, =f, :f -fileparse_set_fstype, fileparse +=over + +=item Linkage specification + +=item Aliases and abbreviations + +=item Non-option call-back routine + +=item Option starters + +=item Return value + +=back + +=item COMPATIBILITY =item EXAMPLES +=item CONFIGURATION OPTIONS -C<basename>, C<dirname> +default, auto_abbrev, getopt_compat, require_order, permute, bundling +(default: reset), bundling_override (default: reset), ignore_case +(default: set), ignore_case_always (default: reset), pass_through (default: +reset), debug (default: reset) +=item OTHER USEFUL VARIABLES +$Getopt::Long::VERSION, $Getopt::Long::error +=head2 Getopt::Std, getopt - Process single-character switches with switch +clustering -=head2 File::CheckTree, validate - run many filetest checks on a tree +=item SYNOPSIS + +=item DESCRIPTION + +=head2 I18N::Collate - compare 8-bit scalar data according to the current +locale + +=item SYNOPSIS + +=item DESCRIPTION + +=head2 IO - load various IO modules =item SYNOPSIS +=item DESCRIPTION + +=head2 IO::File - supply object methods for filehandles + +=item SYNOPSIS =item DESCRIPTION +=item CONSTRUCTOR +new ([ ARGS ] ), new_tmpfile +=item METHODS +open( FILENAME [,MODE [,PERMS]] ) -=head2 File::Find, find - traverse a file tree +=item SEE ALSO -=item SYNOPSIS +=item HISTORY +=head2 IO::Handle - supply object methods for I/O handles + +=item SYNOPSIS =item DESCRIPTION +=item CONSTRUCTOR +new (), new_from_fd ( FD, MODE ) +=item METHODS +$fh->fdopen ( FD, MODE ), $fh->opened, $fh->getline, $fh->getlines, +$fh->ungetc ( ORD ), $fh->write ( BUF, LEN [, OFFSET }\] ), $fh->flush, +$fh->error, $fh->clearerr, $fh->untaint -=head2 File::Path - create or remove a series of directories +=item NOTE -=item SYNOPSIS +=item SEE ALSO + +=item BUGS + +=item HISTORY +=head2 IO::Pipe, IO::pipe - supply object methods for pipes -=item DESCRIPTION +=item SYNOPSIS +=item DESCRIPTION -=item AUTHORS +=item CONSTRCUTOR +new ( [READER, WRITER] ) -=item REVISION +=item METHODS +reader ([ARGS]), writer ([ARGS]), handles () +=item SEE ALSO +=item AUTHOR +=item COPYRIGHT -=head2 FileCache - keep more files open than the system permits +=head2 IO::Seekable - supply seek based methods for I/O objects =item SYNOPSIS +=item DESCRIPTION + +=item SEE ALSO + +=item HISTORY + +=head2 IO::Select - OO interface to the select system call + +=item SYNOPSIS =item DESCRIPTION +=item CONSTRUCTOR -=item BUGS +new ( [ HANDLES ] ) + +=item METHODS +add ( HANDLES ), remove ( HANDLES ), exists ( HANDLE ), handles, can_read ( +[ TIMEOUT ] ), can_write ( [ TIMEOUT ] ), has_error ( [ TIMEOUT ] ), count +(), bits(), bits(), select ( READ, WRITE, ERROR [, TIMEOUT ] ) +=item EXAMPLE +=item AUTHOR +=item COPYRIGHT -=head2 FileHandle - supply object methods for filehandles +=head2 IO::Socket - Object interface to socket communications =item SYNOPSIS - =item DESCRIPTION +=item CONSTRUCTOR - $fh->print, $fh->printf, $fh->getline, $fh->getlines +new ( [ARGS] ) -=item SEE ALSO +=item METHODS +accept([PKG]), timeout([VAL]), sockopt(OPT [, VAL]), sockdomain, socktype, +protocol -=item BUGS +=item SUB-CLASSES + +=over +=item IO::Socket::INET +=item METHODS +sockaddr (), sockport (), sockhost (), peeraddr (), peerport (), peerhost +() +=item IO::Socket::UNIX -=head2 GDBM_File - Perl5 access to the gdbm library. +=item METHODS + +hostpath(), peerpath() + +=back + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=head2 IO::lib::IO::File, IO::File - supply object methods for filehandles =item SYNOPSIS +=item DESCRIPTION + +=item CONSTRUCTOR + +new ([ ARGS ] ), new_tmpfile + +=item METHODS + +open( FILENAME [,MODE [,PERMS]] ) + +=item SEE ALSO + +=item HISTORY + +=head2 IO::lib::IO::Handle, IO::Handle - supply object methods for I/O +handles + +=item SYNOPSIS =item DESCRIPTION +=item CONSTRUCTOR -=item AVAILABILITY +new (), new_from_fd ( FD, MODE ) +=item METHODS -=item BUGS +$fh->fdopen ( FD, MODE ), $fh->opened, $fh->getline, $fh->getlines, +$fh->ungetc ( ORD ), $fh->write ( BUF, LEN [, OFFSET }\] ), $fh->flush, +$fh->error, $fh->clearerr, $fh->untaint +=item NOTE =item SEE ALSO +=item BUGS + +=item HISTORY + +=head2 IO::lib::IO::Pipe, IO::pipe - supply object methods for pipes +=item SYNOPSIS +=item DESCRIPTION +=item CONSTRCUTOR -=head2 Getopt::Long, GetOptions - extended processing of command line -options +new ( [READER, WRITER] ) + +=item METHODS + +reader ([ARGS]), writer ([ARGS]), handles () + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=head2 IO::lib::IO::Seekable, IO::Seekable - supply seek based methods for +I/O objects =item SYNOPSIS +=item DESCRIPTION + +=item SEE ALSO + +=item HISTORY + +=head2 IO::lib::IO::Select, IO::Select - OO interface to the select system +call + +=item SYNOPSIS =item DESCRIPTION +=item CONSTRUCTOR -<none>, !, =s, :s, =i, :i, =f, :f +new ( [ HANDLES ] ) -=over +=item METHODS -=item Linkage specification +add ( HANDLES ), remove ( HANDLES ), exists ( HANDLE ), handles, can_read ( +[ TIMEOUT ] ), can_write ( [ TIMEOUT ] ), has_error ( [ TIMEOUT ] ), count +(), bits(), bits(), select ( READ, WRITE, ERROR [, TIMEOUT ] ) +=item EXAMPLE -=item Aliases and abbreviations +=item AUTHOR +=item COPYRIGHT -=item Non-option call-back routine +=head2 IO::lib::IO::Socket, IO::Socket - Object interface to socket +communications +=item SYNOPSIS -=item Option starters +=item DESCRIPTION +=item CONSTRUCTOR -=item Return value +new ( [ARGS] ) + +=item METHODS + +accept([PKG]), timeout([VAL]), sockopt(OPT [, VAL]), sockdomain, socktype, +protocol + +=item SUB-CLASSES + +=over + +=item IO::Socket::INET +=item METHODS + +sockaddr (), sockport (), sockhost (), peeraddr (), peerport (), peerhost +() +=item IO::Socket::UNIX + +=item METHODS +hostpath(), peerpath() =back -=item COMPATIBILITY +=item SEE ALSO +=item AUTHOR -=item EXAMPLES +=item COPYRIGHT +=head2 IPC::Open2, open2 - open a process for both reading and writing + +=item SYNOPSIS + +=item DESCRIPTION -=item CONFIGURATION VARIABLES +=item WARNING +=item SEE ALSO -$Getopt::Long::autoabbrev, $Getopt::Long::getopt_compat, -$Getopt::Long::order, $Getopt::Long::ignorecase, -$Getopt::Long::VERSION, $Getopt::Long::error, $Getopt::Long::debug +=head2 IPC::Open3, open3 - open a process for reading, writing, and error +handling +=item SYNOPSIS +=item DESCRIPTION +=item WARNING -=head2 Getopt::Std, getopt - Process single-character switches with -switch clustering +=head2 Math::BigFloat - Arbitrary length float math package =item SYNOPSIS +=item DESCRIPTION + +number format, Error returns 'NaN', Division is computed to + +=item BUGS + +=item AUTHOR + +=head2 Math::BigInt - Arbitrary size integer math package + +=item SYNOPSIS =item DESCRIPTION +Canonical notation, Input, Output +=item EXAMPLES +=item BUGS +=item AUTHOR -=head2 I18N::Collate - compare 8-bit scalar data according to the -current locale +=head2 Math::Complex - complex numbers and associated mathematical +functions =item SYNOPSIS +=item DESCRIPTION + +=item OPERATIONS + +=item CREATION + +=item STRINGIFICATION + +=item USAGE + +=item ERRORS DUE TO DIVISION BY ZERO + +=item BUGS + +=item AUTHORS + +=head2 Math::Trig - trigonometric functions + +=item SYNOPSIS =item DESCRIPTION +=item TRIGONOMETRIC FUNCTIONS +=over +=item ERRORS DUE TO DIVISION BY ZERO +=item SIMPLE (REAL) ARGUMENTS, COMPLEX RESULTS -=head2 IPC::Open2, open2 - open a process for both reading and writing +=back + +=item ANGLE CONVERSIONS + +=item BUGS + +=item AUTHORS + +=head2 NDBM_File - Tied access to ndbm files =item SYNOPSIS +=item DESCRIPTION + +=head2 Net::Ping - check a remote host for reachability + +=item SYNOPSIS =item DESCRIPTION +=over + +=item Functions + +Net::Ping->new([$proto [, $def_timeout [, $bytes]]]);, $p->ping($host [, +$timeout]);, $p->close();, pingecho($host [, $timeout]); + +=back =item WARNING +=item NOTES -=item SEE ALSO +=head2 Net::hostent - by-name interface to Perl's built-in gethost*() +functions + +=item SYNOPSIS +=item DESCRIPTION +=item EXAMPLES +=item NOTE +=item AUTHOR -=head2 IPC::Open3, open3 - open a process for reading, writing, and -error handling +=head2 Net::netent - by-name interface to Perl's built-in getnet*() +functions =item SYNOPSIS +=item DESCRIPTION + +=item EXAMPLES + +=item NOTE + +=item AUTHOR + +=head2 Net::protoent - by-name interface to Perl's built-in getproto*() +functions + +=item SYNOPSIS =item DESCRIPTION +=item NOTE +=item AUTHOR +=head2 Net::servent - by-name interface to Perl's built-in getserv*() +functions +=item SYNOPSIS -=head2 Net::Ping, pingecho - check a host for upness +=item DESCRIPTION + +=item EXAMPLES + +=item NOTE + +=item AUTHOR + +=head2 ODBM_File - Tied access to odbm files =item SYNOPSIS +=item DESCRIPTION + +=head2 Opcode - Disable named opcodes when compiling perl code + +=item SYNOPSIS =item DESCRIPTION +=item NOTE + +=item WARNING -=over +=item Operator Names and Operator Lists -=item Parameters +an operator name (opname), an operator tag name (optag), a negated opname +or optag, an operator set (opset) +=item Opcode Functions -hostname, timeout +opcodes, opset (OP, ...), opset_to_ops (OPSET), opset_to_hex (OPSET), +full_opset, empty_opset, invert_opset (OPSET), verify_opset (OPSET, ...), +define_optag (OPTAG, OPSET), opmask_add (OPSET), opmask, opdesc (OP, ...), +opdump (PAT) +=item Manipulating Opsets +=item TO DO (maybe) -=back +=item Predefined Opcode Tags + +:base_core, :base_mem, :base_loop, :base_io, :base_orig, :base_math, +:default, :filesys_read, :sys_db, :browse, :filesys_open, :filesys_write, +:subprocess, :ownprocess, :others, :still_to_be_decided, :dangerous + +=item SEE ALSO + +=item AUTHORS + +=head2 Opcode::Safe, Safe - Compile and execute code in restricted +compartments + +=item SYNOPSIS + +=item DESCRIPTION + +a new namespace, an operator mask =item WARNING +=over +=item RECENT CHANGES +=item Methods in class Safe +permit (OP, ...), permit_only (OP, ...), deny (OP, ...), deny_only (OP, +...), trap (OP, ...), untrap (OP, ...), share (NAME, ...), share_from +(PACKAGE, ARRAYREF), varglob (VARNAME), reval (STRING), rdo (FILENAME), +root (NAMESPACE), mask (MASK) -=head2 POSIX - Perl interface to IEEE Std 1003.1 +=item Some Safety Issues -=item SYNOPSIS +Memory, CPU, Snooping, Signals, State Changes +=item AUTHOR + +=back + +=head2 Opcode::ops, ops - Perl pragma to restrict unsafe operations when +compiling + +=item SYNOPSIS =item DESCRIPTION +=item SEE ALSO -=item NOTE +=head2 POSIX - Perl interface to IEEE Std 1003.1 +=item SYNOPSIS -=item CAVEATS +=item DESCRIPTION + +=item NOTE +=item CAVEATS =item FUNCTIONS - -_exit, abort, abs, access, acos, alarm, asctime, asin, assert, atan, -atan2, atexit, atof, atoi, atol, bsearch, calloc, ceil, chdir, chmod, -chown, clearerr, clock, close, closedir, cos, cosh, creat, ctermid, -ctime, cuserid, difftime, div, dup, dup2, errno, execl, execle, execlp, -execv, execve, execvp, exit, exp, fabs, fclose, fcntl, fdopen, feof, -ferror, fflush, fgetc, fgetpos, fgets, fileno, floor, fmod, fopen, -fork, fpathconf, fprintf, fputc, fputs, fread, free, freopen, frexp, -fscanf, fseek, fsetpos, fstat, ftell, fwrite, getc, getchar, getcwd, -getegid, getenv, geteuid, getgid, getgrgid, getgrnam, getgroups, -getlogin, getpgrp, getpid, getppid, getpwnam, getpwuid, gets, getuid, -gmtime, isalnum, isalpha, isatty, iscntrl, isdigit, isgraph, islower, -isprint, ispunct, isspace, isupper, isxdigit, kill, labs, ldexp, ldiv, -link, localeconv, localtime, log, log10, longjmp, lseek, malloc, mblen, -mbstowcs, mbtowc, memchr, memcmp, memcpy, memmove, memset, mkdir, -mkfifo, mktime, modf, nice, offsetof, open, opendir, pat +_exit, abort, abs, access, acos, alarm, asctime, asin, assert, atan, atan2, +atexit, atof, atoi, atol, bsearch, calloc, ceil, chdir, chmod, chown, +clearerr, clock, close, closedir, cos, cosh, creat, ctermid, ctime, +cuserid, difftime, div, dup, dup2, errno, execl, execle, execlp, execv, +execve, execvp, exit, exp, fabs, fclose, fcntl, fdopen, feof, ferror, +fflush, fgetc, fgetpos, fgets, fileno, floor, fmod, fopen, fork, fpathconf, +fprintf, fputc, fputs, fread, free, freopen, frexp, fscanf, fseek, fsetpos, +fstat, ftell, fwrite, getc, getchar, getcwd, getegid, getenv, geteuid, +getgid, getgrgid, getgrnam, getgroups, getlogin, getpgrp, getpid, getppid, +getpwnam, getpwuid, gets, getuid, gmtime, isalnum, isalpha, isatty, +iscntrl, isdigit, isgraph, islower, isprint, ispunct, isspace, isupper, +isxdigit, kill, labs, ldexp, ldiv, link, localeconv, localtime, log, log10, +longjmp, lseek, malloc, mblen, mbstowcs, mbtowc, memchr, memcmp, memcpy, +memmove, memset, mkdir, mkfifo, mktime, modf, nice, offsetof, open, +opendir, pathconf, pause, perror, pipe, pow, printf, putc, putchar, puts, +qsort, raise, rand, read, readdir, realloc, remove, rename, rewind, +rewinddir, rmdir, scanf, setgid, setjmp, setlocale, setpgid, setsid, +setuid, sigaction, siglongjmp, sigpending, sigprocmask, sigsetjmp, +sigsuspend, sin, sinh, sleep, sprintf, sqrt, srand, sscanf, stat, strcat, +strchr, strcmp, strcoll, strcpy, strcspn, strerror, strftime, strlen, +strncat, strncmp, strncpy, stroul, strpbrk, strrchr, strspn, strstr, +strtod, strtok, strtol, strtoul, strxfrm, sysconf, system, tan, tanh, +tcdrain, tcflow, tcflush, tcgetpgrp, tcsendbreak, tcsetpgrp, time, times, +tmpfile, tmpnam, tolower, toupper, ttyname, tzname, tzset, umask, uname, +ungetc, unlink, utime, vfprintf, vprintf, vsprintf, wait, waitpid, +wcstombs, wctomb, write =item CLASSES - =over =item POSIX::SigAction - new =item POSIX::SigSet - new, addset, delset, emptyset, fillset, ismember =item POSIX::Termios - new, getattr, getcc, getcflag, getiflag, getispeed, getlflag, getoflag, getospeed, setattr, setcc, setcflag, setiflag, setispeed, setlflag, setoflag, setospeed, Baud rate values, Terminal interface values, c_cc field values, c_cflag field values, c_iflag field values, c_lflag field values, c_oflag field values - - =back =item PATHNAME CONSTANTS - Constants =item POSIX CONSTANTS - Constants =item SYSTEM CONFIGURATION - Constants =item ERRNO - Constants =item FCNTL - Constants =item FLOAT - Constants =item LIMITS - Constants =item LOCALE - Constants =item MATH - Constants =item SIGNAL - Constants =item STAT - Constants, Macros =item STDLIB - Constants =item STDIO - Constants =item TIME - Constants =item UNISTD - Constants =item WAIT - Constants, Macros =item CREATION +=head2 Pod::Html, Pod::HTML - module to convert pod files to HTML + +=item SYNOPSIS + +=item DESCRIPTION + +=item ARGUMENTS + +help, htmlroot, infile, outfile, podroot, podpath, libpods, netscape, +nonetscape, index, noindex, recurse, norecurse, title, verbose + +=item EXAMPLE + +=item AUTHOR +=item BUGS +=item SEE ALSO +=item COPYRIGHT =head2 Pod::Text - convert POD data to formatted ASCII text =item SYNOPSIS - =item DESCRIPTION - =item AUTHOR - =item TODO +=head2 SDBM_File - Tied access to sdbm files +=item SYNOPSIS +=item DESCRIPTION +=head2 Safe - Compile and execute code in restricted compartments -=head2 Safe - Safe extension module for Perl +=item SYNOPSIS =item DESCRIPTION - a new namespace, an operator mask -=over +=item WARNING -=item Operator masks +=over +=item RECENT CHANGES =item Methods in class Safe +permit (OP, ...), permit_only (OP, ...), deny (OP, ...), deny_only (OP, +...), trap (OP, ...), untrap (OP, ...), share (NAME, ...), share_from +(PACKAGE, ARRAYREF), varglob (VARNAME), reval (STRING), rdo (FILENAME), +root (NAMESPACE), mask (MASK) -NAMESPACE, MASK, root (NAMESPACE), mask (MASK), trap (OP, ...), untrap -(OP, ...), share (VARNAME, ...), varglob (VARNAME), reval (STRING), rdo -(FILENAME) - -=item Subroutines in package Safe - +=item Some Safety Issues -ops_to_mask (OP, ...), mask_to_ops (MASK), opcode (OP, ...), opname -(OP, ...), fullmask, emptymask, MAXO, op_mask +Memory, CPU, Snooping, Signals, State Changes =item AUTHOR - - - =back - - - =head2 Search::Dict, look - search for key in dictionary file =item SYNOPSIS - =item DESCRIPTION - - - - =head2 SelectSaver - save and restore selected file handle =item SYNOPSIS - =item DESCRIPTION - - - - =head2 SelfLoader - load functions only on demand =item SYNOPSIS - =item DESCRIPTION - =over =item The __DATA__ token - =item SelfLoader autoloading - =item Autoloading and package lexicals - =item SelfLoader and AutoLoader - =item __DATA__, __END__, and the FOOBAR::DATA filehandle. - =item Classes and inherited methods. - - - =back =item Multiple packages and fully qualified subroutine names - - - - -=head2 Socket, sockaddr_in, sockaddr_un, inet_aton, inet_ntoa - load -the C socket.h defines and structure manipulators +=head2 Shell - run shell commands transparently within perl =item SYNOPSIS - =item DESCRIPTION +=item AUTHOR -inet_aton HOSTNAME, inet_ntoa IP_ADDRESS, INADDR_ANY, INADDR_LOOPBACK, -INADDR_NONE, sockaddr_in PORT, ADDRESS, sockaddr_in SOCKADDR_IN, -pack_sockaddr_in PORT, IP_ADDRESS, unpack_sockaddr_in SOCKADDR_IN, -sockaddr_un PATHNAME, sockaddr_un SOCKADDR_UN, pack_sockaddr_un PATH, -unpack_sockaddr_un SOCKADDR_UN +=head2 Socket, sockaddr_in, sockaddr_un, inet_aton, inet_ntoa - load the C +socket.h defines and structure manipulators +=item SYNOPSIS +=item DESCRIPTION +inet_aton HOSTNAME, inet_ntoa IP_ADDRESS, INADDR_ANY, INADDR_BROADCAST, +INADDR_LOOPBACK, INADDR_NONE, sockaddr_in PORT, ADDRESS, sockaddr_in +SOCKADDR_IN, pack_sockaddr_in PORT, IP_ADDRESS, unpack_sockaddr_in +SOCKADDR_IN, sockaddr_un PATHNAME, sockaddr_un SOCKADDR_UN, +pack_sockaddr_un PATH, unpack_sockaddr_un SOCKADDR_UN =head2 Symbol - manipulate Perl symbols and their names =item SYNOPSIS - =item DESCRIPTION - - - - =head2 Sys::Hostname - Try every conceivable way to get hostname =item SYNOPSIS - =item DESCRIPTION - =item AUTHOR - - - - -=head2 Syslog, Sys::Syslog, openlog, closelog, setlogmask, syslog - -Perl interface to the UNIX syslog(3) calls +=head2 Syslog, Sys::Syslog, openlog, closelog, setlogmask, syslog - Perl +interface to the UNIX syslog(3) calls =item SYNOPSIS - =item DESCRIPTION - -openlog $ident, $logopt, $facility, syslog $priority, $mask, $format, -@args, setlogmask $mask_priority, closelog +openlog $ident, $logopt, $facility, syslog $priority, $format, @args, +setlogmask $mask_priority, closelog =item EXAMPLES - =item DEPENDENCIES - =item SEE ALSO - =item AUTHOR - - - - =head2 Term::Cap - Perl termcap interface =item SYNOPSIS - =item DESCRIPTION - =item EXAMPLES - - - - =head2 Term::Complete - Perl word completion module =item SYNOPSIS - =item DESCRIPTION - -<tab>Attempts word completion. Cannot be changed, ^D, ^U, <del>, <bs> +E<lt>tabE<gt>, ^D, ^U, E<lt>delE<gt>, E<lt>bsE<gt> =item DIAGNOSTICS - =item BUGS - =item AUTHOR - - - - -=head2 Term::ReadLine - Perl interface to various C<readline> packages. -If no real package is found, substitutes stubs instead of basic -functions. +=head2 Term::ReadLine - Perl interface to various C<readline> packages. If +no real package is found, substitutes stubs instead of basic functions. =item SYNOPSIS - =item DESCRIPTION - =item Minimal set of supported functions - C<ReadLine>, C<new>, C<readline>, C<addhistory>, C<IN>, $C<OUT>, -C<MinLine>, C<findConsole>, C<Features> - -=item EXPORTS - +C<MinLine>, C<findConsole>, Attribs, C<Features> +=item Additional supported functions +=item EXPORTS +=item ENVIRONMENT =head2 Test::Harness - run perl standard test scripts with statistics =item SYNOPSIS - =item DESCRIPTION - =over =item The test script output - - - =back =item EXPORT - =item DIAGNOSTICS - C<All tests successful.\nFiles=%d, Tests=%d, %s>, C<FAILED tests -%s\n\tFailed %d/%d tests, %.2f%% okay.>, C<Test returned status %d -(wstat %d)>, C<Failed 1 test, %.2f%% okay. %s>, C<Failed %d/%d tests, -%.2f%% okay. %s> +%s\n\tFailed %d/%d tests, %.2f%% okay.>, C<Test returned status %d (wstat +%d)>, C<Failed 1 test, %.2f%% okay. %s>, C<Failed %d/%d tests, %.2f%% okay. +%s> =item SEE ALSO - =item AUTHORS - =item BUGS - - - - =head2 Text::Abbrev, abbrev - create an abbreviation table from a list =item SYNOPSIS - =item DESCRIPTION - =item EXAMPLE +=head2 Text::ParseWords - parse text into an array of tokens +=item SYNOPSIS +=item DESCRIPTION +=item AUTHORS -=head2 Text::Soundex - Implementation of the Soundex Algorithm as -Described by Knuth +=head2 Text::Soundex - Implementation of the Soundex Algorithm as Described +by Knuth =item SYNOPSIS - =item DESCRIPTION - =item EXAMPLES - =item LIMITATIONS - =item AUTHOR +=head2 Text::Tabs -- expand and unexpand tabs per the unix expand(1) and +unexpand(1) +=item SYNOPSIS +=item DESCRIPTION +=item BUGS -=head2 Text::Tabs -- expand and unexpand tabs +=item AUTHOR -=item SYNOPSIS +=head2 Text::Wrap - line wrapping to form simple paragraphs +=item SYNOPSIS =item DESCRIPTION +=item EXAMPLE + +=item BUGS =item AUTHOR +=head2 Tie::Hash, Tie::StdHash - base class definitions for tied hashes + +=item SYNOPSIS + +=item DESCRIPTION +TIEHASH classname, LIST, STORE this, key, value, FETCH this, key, FIRSTKEY +this, NEXTKEY this, lastkey, EXISTS this, key, DELETE this, key, CLEAR this +=item CAVEATS +=item MORE INFORMATION -=head2 Text::Wrap -- wrap text into a paragraph +=head2 Tie::RefHash - use references as hash keys =item SYNOPSIS - =item DESCRIPTION +=item EXAMPLE =item AUTHOR +=item VERSION +=item SEE ALSO - - -=head2 Tie::Hash, Tie::StdHash - base class definitions for tied hashes +=head2 Tie::Scalar, Tie::StdScalar - base class definitions for tied +scalars =item SYNOPSIS - =item DESCRIPTION +TIESCALAR classname, LIST, FETCH this, STORE this, value, DESTROY this -TIEHASH classname, LIST, STORE this, key, value, FETCH this, key, -FIRSTKEY this, NEXTKEY this, lastkey, EXISTS this, key, DELETE this, -key, CLEAR this +=item MORE INFORMATION -=item CAVEATS +=head2 Tie::SubstrHash - Fixed-table-size, fixed-key-length hashing +=item SYNOPSIS -=item MORE INFORMATION +=item DESCRIPTION +=item CAVEATS +=head2 Time::Local - efficiently compute time from local and GMT time +=item SYNOPSIS +=item DESCRIPTION -=head2 Tie::Scalar, Tie::StdScalar - base class definitions for tied -scalars +=head2 Time::gmtime - by-name interface to Perl's built-in gmtime() +function =item SYNOPSIS - =item DESCRIPTION +=item NOTE -TIESCALAR classname, LIST, FETCH this, STORE this, value, DESTROY this +=item AUTHOR -=item MORE INFORMATION +=head2 Time::localtime - by-name interface to Perl's built-in localtime() +function +=item SYNOPSIS +=item DESCRIPTION +=item NOTE +=item AUTHOR -=head2 Tie::SubstrHash - Fixed-table-size, fixed-key-length hashing +=head2 Time::tm - internal object used by Time::gmtime and Time::localtime =item SYNOPSIS +=item DESCRIPTION + +=item AUTHOR + +=head2 UNIVERSAL - base class for ALL classes (blessed references) + +=item SYNOPSIS =item DESCRIPTION +isa ( TYPE ), can ( METHOD ), VERSION ( [ REQUIRE ] ), isa ( VAL, TYPE ), +can ( VAL, METHOD ) -=item CAVEATS +=head2 User::grent - by-name interface to Perl's built-in getgr*() +functions +=item SYNOPSIS +=item DESCRIPTION +=item NOTE +=item AUTHOR -=head2 Time::Local - efficiently compute tome from local and GMT time +=head2 User::pwent - by-name interface to Perl's built-in getpw*() +functions =item SYNOPSIS - =item DESCRIPTION +=item NOTE - +=item AUTHOR =head1 AUXILIARY DOCUMENTATION -Here should be listed all the extra program's docs, but they don't all -have man pages yet: +Here should be listed all the extra programs' documentation, but they +don't all have manual pages yet: =item a2p @@ -3144,10 +4969,8 @@ have man pages yet: =item wrapsuid - =head1 AUTHOR -Larry Wall E<lt><F<lwall@sems.com>E<gt>, with the help of oodles of -other folks. - +Larry Wall <F<larry@wall.org>>, with the help of oodles +of other folks. diff --git a/pod/perltoot.pod b/pod/perltoot.pod new file mode 100644 index 0000000000..2f5634c58c --- /dev/null +++ b/pod/perltoot.pod @@ -0,0 +1,1789 @@ +=head1 NAME + +perltoot - Tom's object-oriented tutorial for perl + +=head1 DESCRIPTION + +Object-oriented programming is a big seller these days. Some managers +would rather have objects than sliced bread. Why is that? What's so +special about an object? Just what I<is> an object anyway? + +An object is nothing but a way of tucking away complex behaviours into +a neat little easy-to-use bundle. (This is what professors call +abstraction.) Smart people who have nothing to do but sit around for +weeks on end figuring out really hard problems make these nifty +objects that even regular people can use. (This is what professors call +software reuse.) Users (well, programmers) can play with this little +bundle all they want, but they aren't to open it up and mess with the +insides. Just like an expensive piece of hardware, the contract says +that you void the warranty if you muck with the cover. So don't do that. + +The heart of objects is the class, a protected little private namespace +full of data and functions. A class is a set of related routines that +addresses some problem area. You can think of it as a user-defined type. +The Perl package mechanism, also used for more traditional modules, +is used for class modules as well. Objects "live" in a class, meaning +that they belong to some package. + +More often than not, the class provides the user with little bundles. +These bundles are objects. They know whose class they belong to, +and how to behave. Users ask the class to do something, like "give +me an object." Or they can ask one of these objects to do something. +Asking a class to do something for you is calling a I<class method>. +Asking an object to do something for you is calling an I<object method>. +Asking either a class (usually) or an object (sometimes) to give you +back an object is calling a I<constructor>, which is just a +kind of method. + +That's all well and good, but how is an object different from any other +Perl data type? Just what is an object I<really>; that is, what's its +fundamental type? The answer to the first question is easy. An object +is different from any other data type in Perl in one and only one way: +you may dereference it using not merely string or numeric subscripts +as with simple arrays and hashes, but with named subroutine calls. +In a word, with I<methods>. + +The answer to the second question is that it's a reference, and not just +any reference, mind you, but one whose referent has been I<bless>()ed +into a particular class (read: package). What kind of reference? Well, +the answer to that one is a bit less concrete. That's because in Perl +the designer of the class can employ any sort of reference they'd like +as the underlying intrinsic data type. It could be a scalar, an array, +or a hash reference. It could even be a code reference. But because +of its inherent flexibility, an object is usually a hash reference. + +=head1 Creating a Class + +Before you create a class, you need to decide what to name it. That's +because the class (package) name governs the name of the file used to +house it, just as with regular modules. Then, that class (package) +should provide one or more ways to generate objects. Finally, it should +provide mechanisms to allow users of its objects to indirectly manipulate +these objects from a distance. + +For example, let's make a simple Person class module. It gets stored in +the file Person.pm. If it were called a Happy::Person class, it would +be stored in the file Happy/Person.pm, and its package would become +Happy::Person instead of just Person. (On a personal computer not +running Unix or Plan 9, but something like MacOS or VMS, the directory +separator may be different, but the principle is the same.) Do not assume +any formal relationship between modules based on their directory names. +This is merely a grouping convenience, and has no effect on inheritance, +variable accessibility, or anything else. + +For this module we aren't going to use Exporter, because we're +a well-behaved class module that doesn't export anything at all. +In order to manufacture objects, a class needs to have a I<constructor +method>. A constructor gives you back not just a regular data type, +but a brand-new object in that class. This magic is taken care of by +the bless() function, whose sole purpose is to enable its referent to +be used as an object. Remember: being an object really means nothing +more than that methods may now be called against it. + +While a constructor may be named anything you'd like, most Perl +programmers seem to like to call theirs new(). However, new() is not +a reserved word, and a class is under no obligation to supply such. +Some programmers have also been known to use a function with +the same name as the class as the constructor. + +=head2 Object Representation + +By far the most common mechanism used in Perl to represent a Pascal +record, a C struct, or a C++ class an anonymous hash. That's because a +hash has an arbitrary number of data fields, each conveniently accessed by +an arbitrary name of your own devising. + +If you were just doing a simple +struct-like emulation, you would likely go about it something like this: + + $rec = { + name => "Jason", + age => 23, + peers => [ "Norbert", "Rhys", "Phineas"], + }; + +If you felt like it, you could add a bit of visual distinction +by up-casing the hash keys: + + $rec = { + NAME => "Jason", + AGE => 23, + PEERS => [ "Norbert", "Rhys", "Phineas"], + }; + +And so you could get at C<$rec-E<gt>{NAME}> to find "Jason", or +C<@{ $rec-E<gt>{PEERS} }> to get at "Norbert", "Rhys", and "Phineas". +(Have you ever noticed how many 23-year-old programmers seem to +be named "Jason" these days? :-) + +This same model is often used for classes, although it is not considered +the pinnacle of programming propriety for folks from outside the +class to come waltzing into an object, brazenly accessing its data +members directly. Generally speaking, an object should be considered +an opaque cookie that you use I<object methods> to access. Visually, +methods look like you're dereffing a reference using a function name +instead of brackets or braces. + +=head2 Class Interface + +Some languages provide a formal syntactic interface to a class's methods, +but Perl does not. It relies on you to read the documentation of each +class. If you try to call an undefined method on an object, Perl won't +complain, but the program will trigger an exception while it's running. +Likewise, if you call a method expecting a prime number as its argument +with a non-prime one instead, you can't expect the compiler to catch this. +(Well, you can expect it all you like, but it's not going to happen.) + +Let's suppose you have a well-educated user of your Person class, +someone who has read the docs that explain the prescribed +interface. Here's how they might use the Person class: + + use Person; + + $him = Person->new(); + $him->name("Jason"); + $him->age(23); + $him->peers( "Norbert", "Rhys", "Phineas" ); + + push @All_Recs, $him; # save object in array for later + + printf "%s is %d years old.\n", $him->name, $him->age; + print "His peers are: ", join(", ", $him->peers), "\n"; + + printf "Last rec's name is %s\n", $All_Recs[-1]->name; + +As you can see, the user of the class doesn't know (or at least, has no +business paying attention to the fact) that the object has one particular +implementation or another. The interface to the class and its objects +is exclusively via methods, and that's all the user of the class should +ever play with. + +=head2 Constructors and Instance Methods + +Still, I<someone> has to know what's in the object. And that someone is +the class. It implements methods that the programmer uses to access +the object. Here's how to implement the Person class using the standard +hash-ref-as-an-object idiom. We'll make a class method called new() to +act as the constructor, and three object methods called name(), age(), and +peers() to get at per-object data hidden away in our anonymous hash. + + package Person; + use strict; + + ################################################## + ## the object constructor (simplistic version) ## + ################################################## + sub new { + my $self = {}; + $self->{NAME} = undef; + $self->{AGE} = undef; + $self->{PEERS} = []; + bless($self); # but see below + return $self; + } + + ############################################## + ## methods to access per-object data ## + ## ## + ## With args, they set the value. Without ## + ## any, they only retrieve it/them. ## + ############################################## + + sub name { + my $self = shift; + if (@_) { $self->{NAME} = shift } + return $self->{NAME}; + } + + sub age { + my $self = shift; + if (@_) { $self->{AGE} = shift } + return $self->{AGE}; + } + + sub peers { + my $self = shift; + if (@_) { @{ $self->{PEERS} } = @_ } + return @{ $self->{PEERS} }; + } + + 1; # so the require or use succeeds + +We've created three methods to access an object's data, name(), age(), +and peers(). These are all substantially similar. If called with an +argument, they set the appropriate field; otherwise they return the +value held by that field, meaning the value of that hash key. + +=head2 Planning for the Future: Better Constructors + +Even though at this point you may not even know what it means, someday +you're going to worry about inheritance. (You can safely ignore this +for now and worry about it later if you'd like.) To ensure that this +all works out smoothly, you must use the double-argument form of bless(). +The second argument is the class into which the referent will be blessed. +By not assuming our own class as the default second argument and instead +using the class passed into us, we make our constructor inheritable. + +While we're at it, let's make our constructor a bit more flexible. +Rather than being uniquely a class method, we'll set it up so that +it can be called as either a class method I<or> an object +method. That way you can say: + + $me = Person->new(); + $him = $me->new(); + +To do this, all we have to do is check whether what was passed in +was a reference or not. If so, we were invoked as an object method, +and we need to extract the package (class) using the ref() function. +If not, we just use the string passed in as the package name +for blessing our referent. + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = {}; + $self->{NAME} = undef; + $self->{AGE} = undef; + $self->{PEERS} = []; + bless ($self, $class); + return $self; + } + +That's about all there is for constructors. These methods bring objects +to life, returning neat little opaque bundles to the user to be used in +subsequent method calls. + +=head2 Destructors + +Every story has a beginning and an end. The beginning of the object's +story is its constructor, explicitly called when the object comes into +existence. But the ending of its story is the I<destructor>, a method +implicitly called when an object leaves this life. Any per-object +clean-up code is placed in the destructor, which must (in Perl) be called +DESTROY. + +If constructors can have arbitrary names, then why not destructors? +Because while a constructor is explicitly called, a destructor is not. +Destruction happens automatically via Perl's garbage collection (GC) +system, which is a quick but somewhat lazy reference-based GC system. +To know what to call, Perl insists that the destructor be named DESTROY. +Perl's notion of the right time to call a destructor is not well-defined +currently, which is why your destructors should not rely on when they are +called. + +Why is DESTROY in all caps? Perl on occasion uses purely uppercase +function names as a convention to indicate that the function will +be automatically called by Perl in some way. Others that are called +implicitly include BEGIN, END, AUTOLOAD, plus all methods used by +tied objects, described in L<perltie>. + +In really good object-oriented programming languages, the user doesn't +care when the destructor is called. It just happens when it's supposed +to. In low-level languages without any GC at all, there's no way to +depend on this happening at the right time, so the programmer must +explicitly call the destructor to clean up memory and state, crossing +their fingers that it's the right time to do so. Unlike C++, an +object destructor is nearly never needed in Perl, and even when it is, +explicit invocation is uncalled for. In the case of our Person class, +we don't need a destructor because Perl takes care of simple matters +like memory deallocation. + +The only situation where Perl's reference-based GC won't work is +when there's a circularity in the data structure, such as: + + $this->{WHATEVER} = $this; + +In that case, you must delete the self-reference manually if you expect +your program not to leak memory. While admittedly error-prone, this is +the best we can do right now. Nonetheless, rest assured that when your +program is finished, its objects' destructors are all duly called. +So you are guaranteed that an object I<eventually> gets properly +destroyed, except in the unique case of a program that never exits. +(If you're running Perl embedded in another application, this full GC +pass happens a bit more frequently--whenever a thread shuts down.) + +=head2 Other Object Methods + +The methods we've talked about so far have either been constructors or +else simple "data methods", interfaces to data stored in the object. +These are a bit like an object's data members in the C++ world, except +that strangers don't access them as data. Instead, they should only +access the object's data indirectly via its methods. This is an +important rule: in Perl, access to an object's data should I<only> +be made through methods. + +Perl doesn't impose restrictions on who gets to use which methods. +The public-versus-private distinction is by convention, not syntax. +(Well, unless you use the Alias module described below in +L</"Data Members as Variables">.) Occasionally you'll see method names beginning or ending +with an underscore or two. This marking is a convention indicating +that the methods are private to that class alone and sometimes to its +closest acquaintances, its immediate subclasses. But this distinction +is not enforced by Perl itself. It's up to the programmer to behave. + +There's no reason to limit methods to those that simply access data. +Methods can do anything at all. The key point is that they're invoked +against an object or a class. Let's say we'd like object methods that +do more than fetch or set one particular field. + + sub exclaim { + my $self = shift; + return sprintf "Hi, I'm %s, age %d, working with %s", + $self->{NAME}, $self->{AGE}, join(", ", $self->{PEERS}); + } + +Or maybe even one like this: + + sub happy_birthday { + my $self = shift; + return ++$self->{AGE}; + } + +Some might argue that one should go at these this way: + + sub exclaim { + my $self = shift; + return sprintf "Hi, I'm %s, age %d, working with %s", + $self->name, $self->age, join(", ", $self->peers); + } + + sub happy_birthday { + my $self = shift; + return $self->age( $self->age() + 1 ); + } + +But since these methods are all executing in the class itself, this +may not be critical. There are tradeoffs to be made. Using direct +hash access is faster (about an order of magnitude faster, in fact), and +it's more convenient when you want to interpolate in strings. But using +methods (the external interface) internally shields not just the users of +your class but even you yourself from changes in your data representation. + +=head1 Class Data + +What about "class data", data items common to each object in a class? +What would you want that for? Well, in your Person class, you might +like to keep track of the total people alive. How do you implement that? + +You I<could> make it a global variable called $Person::Census. But about +only reason you'd do that would be if you I<wanted> people to be able to +get at your class data directly. They could just say $Person::Census +and play around with it. Maybe this is ok in your design scheme. +You might even conceivably want to make it an exported variable. To be +exportable, a variable must be a (package) global. If this were a +traditional module rather than an object-oriented one, you might do that. + +While this approach is expected in most traditional modules, it's +generally considered rather poor form in most object modules. In an +object module, you should set up a protective veil to separate interface +from implementation. So provide a class method to access class data +just as you provide object methods to access object data. + +So, you I<could> still keep $Census as a package global and rely upon +others to honor the contract of the module and therefore not play around +with its implementation. You could even be supertricky and make $Census a +tied object as described in L<perltie>, thereby intercepting all accesses. + +But more often than not, you just want to make your class data a +file-scoped lexical. To do so, simply put this at the top of the file: + + my $Census = 0; + +Even though the scope of a my() normally expires when the block in which +it was declared is done (in this case the whole file being required or +used), Perl's deep binding of lexical variables guarantees that the +variable will not be deallocated, remaining accessible to functions +declared within that scope. This doesn't work with global variables +given temporary values via local(), though. + +Irrespective of whether you leave $Census a package global or make +it instead a file-scoped lexical, you should make these +changes to your Person::new() constructor: + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = {}; + $Census++; + $self->{NAME} = undef; + $self->{AGE} = undef; + $self->{PEERS} = []; + bless ($self, $class); + return $self; + } + + sub population { + return $Census; + } + +Now that we've done this, we certainly do need a destructor so that +when Person is destroyed, the $Census goes down. Here's how +this could be done: + + sub DESTROY { --$Census } + +Notice how there's no memory to deallocate in the destructor? That's +something that Perl takes care of for you all by itself. + +=head2 Accessing Class Data + +It turns out that this is not really a good way to go about handling +class data. A good scalable rule is that I<you must never reference class +data directly from an object method>. Otherwise you aren't building a +scalable, inheritable class. The object must be the rendezvous point +for all operations, especially from an object method. The globals +(class data) would in some sense be in the "wrong" package in your +derived classes. In Perl, methods execute in the context of the class +they were defined in, I<not> that of the object that triggered them. +Therefore, namespace visibility of package globals in methods is unrelated +to inheritance. + +Got that? Maybe not. Ok, let's say that some other class "borrowed" +(well, inherited) the DESTROY method as it was defined above. When those +objects are destroyed, the original $Census variable will be altered, +not the one in the new class's package namespace. Perhaps this is what +you want, but probably it isn't. + +Here's how to fix this. We'll store a reference to the data in the +value accessed by the hash key "_CENSUS". Why the underscore? Well, +mostly because an initial underscore already conveys strong feelings +of magicalness to a C programmer. It's really just a mnemonic device +to remind ourselves that this field is special and not to be used as +a public data member in the same way that NAME, AGE, and PEERS are. +(Because we've been developing this code under the strict pragma, prior +to perl version 5.004 we'll have to quote the field name.) + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = {}; + $self->{NAME} = undef; + $self->{AGE} = undef; + $self->{PEERS} = []; + # "private" data + $self->{"_CENSUS"} = \$Census; + bless ($self, $class); + ++ ${ $self->{"_CENSUS"} }; + return $self; + } + + sub population { + my $self = shift; + if (ref $self) { + return ${ $self->{"_CENSUS"} }; + } else { + return $Census; + } + } + + sub DESTROY { + my $self = shift; + -- ${ $self->{"_CENSUS"} }; + } + +=head2 Debugging Methods + +It's common for a class to have a debugging mechanism. For example, +you might want to see when objects are created or destroyed. To do that, +add a debugging variable as a file-scoped lexical. For this, we'll pull +in the standard Carp module to emit our warnings and fatal messages. +That way messages will come out with the caller's filename and +line number instead of our own; if we wanted them to be from our own +perspective, we'd just use die() and warn() directly instead of croak() +and carp() respectively. + + use Carp; + my $Debugging = 0; + +Now add a new class method to access the variable. + + sub debug { + my $class = shift; + if (ref $class) { confess "Class method called as object method" } + unless (@_ == 1) { confess "usage: CLASSNAME->debug(level)" } + $Debugging = shift; + } + +Now fix up DESTROY to murmur a bit as the moribund object expires: + + sub DESTROY { + my $self = shift; + if ($Debugging) { carp "Destroying $self " . $self->name } + -- ${ $self->{"_CENSUS"} }; + } + +One could conceivably make a per-object debug state. That +way you could call both of these: + + Person->debug(1); # entire class + $him->debug(1); # just this object + +To do so, we need our debugging method to be a "bimodal" one, one that +works on both classes I<and> objects. Therefore, adjust the debug() +and DESTROY methods as follows: + + sub debug { + my $self = shift; + confess "usage: thing->debug(level)" unless @_ == 1; + my $level = shift; + if (ref($self)) { + $self->{"_DEBUG"} = $level; # just myself + } else { + $Debugging = $level; # whole class + } + } + + sub DESTROY { + my $self = shift; + if ($Debugging || $self->{"_DEBUG"}) { + carp "Destroying $self " . $self->name; + } + -- ${ $self->{"_CENSUS"} }; + } + +What happens if a derived class (which we'll call Employee) inherits +methods from this Person base class? Then C<Employee-E<gt>debug()>, when called +as a class method, manipulates $Person::Debugging not $Employee::Debugging. + +=head2 Class Destructors + +The object destructor handles the death of each distinct object. But sometimes +you want a bit of cleanup when the entire class is shut down, which +currently only happens when the program exits. To make such a +I<class destructor>, create a function in that class's package named +END. This works just like the END function in traditional modules, +meaning that it gets called whenever your program exits unless it execs +or dies of an uncaught signal. For example, + + sub END { + if ($Debugging) { + print "All persons are going away now.\n"; + } + } + +When the program exits, all the class destructors (END functions) are +be called in the opposite order that they were loaded in (LIFO order). + +=head2 Documenting the Interface + +And there you have it: we've just shown you the I<implementation> of this +Person class. Its I<interface> would be its documentation. Usually this +means putting it in pod ("plain old documentation") format right there +in the same file. In our Person example, we would place the following +docs anywhere in the Person.pm file. Even though it looks mostly like +code, it's not. It's embedded documentation such as would be used by +the pod2man, pod2html, or pod2text programs. The Perl compiler ignores +pods entirely, just as the translators ignore code. Here's an example of +some pods describing the informal interface: + + =head1 NAME + + Person - class to implement people + + =head1 SYNOPSIS + + use Person; + + ################# + # class methods # + ################# + $ob = Person->new; + $count = Person->population; + + ####################### + # object data methods # + ####################### + + ### get versions ### + $who = $ob->name; + $years = $ob->age; + @pals = $ob->peers; + + ### set versions ### + $ob->name("Jason"); + $ob->age(23); + $ob->peers( "Norbert", "Rhys", "Phineas" ); + + ######################## + # other object methods # + ######################## + + $phrase = $ob->exclaim; + $ob->happy_birthday; + + =head1 DESCRIPTION + + The Person class implements dah dee dah dee dah.... + +That's all there is to the matter of interface versus implementation. +A programmer who opens up the module and plays around with all the private +little shiny bits that were safely locked up behind the interface contract +has voided the warranty, and you shouldn't worry about their fate. + +=head1 Aggregation + +Suppose you later want to change the class to implement better names. +Perhaps you'd like to support both given names (called Christian names, +irrespective of one's religion) and family names (called surnames), plus +nicknames and titles. If users of your Person class have been properly +accessing it through its documented interface, then you can easily change +the underlying implementation. If they haven't, then they lose and +it's their fault for breaking the contract and voiding their warranty. + +To do this, we'll make another class, this one called Fullname. What's +the Fullname class look like? To answer that question, you have to +first figure out how you want to use it. How about we use it this way: + + $him = Person->new(); + $him->fullname->title("St"); + $him->fullname->christian("Thomas"); + $him->fullname->surname("Aquinas"); + $him->fullname->nickname("Tommy"); + printf "His normal name is %s\n", $him->name; + printf "But his real name is %s\n", $him->fullname->as_string; + +Ok. To do this, we'll change Person::new() so that it supports +a full name field this way: + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = {}; + $self->{FULLNAME} = Fullname->new(); + $self->{AGE} = undef; + $self->{PEERS} = []; + $self->{"_CENSUS"} = \$Census; + bless ($self, $class); + ++ ${ $self->{"_CENSUS"} }; + return $self; + } + + sub fullname { + my $self = shift; + return $self->{FULLNAME}; + } + +Then to support old code, define Person::name() this way: + + sub name { + my $self = shift; + return $self->{FULLNAME}->nickname(@_) + || $self->{FULLNAME}->christian(@_); + } + +Here's the Fullname class. We'll use the same technique +of using a hash reference to hold data fields, and methods +by the appropriate name to access them: + + package Fullname; + use strict; + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = { + TITLE => undef, + CHRISTIAN => undef, + SURNAME => undef, + NICK => undef, + }; + bless ($self, $class); + return $self; + } + + sub christian { + my $self = shift; + if (@_) { $self->{CHRISTIAN} = shift } + return $self->{CHRISTIAN}; + } + + sub surname { + my $self = shift; + if (@_) { $self->{SURNAME} = shift } + return $self->{SURNAME}; + } + + sub nickname { + my $self = shift; + if (@_) { $self->{NICK} = shift } + return $self->{NICK}; + } + + sub title { + my $self = shift; + if (@_) { $self->{TITLE} = shift } + return $self->{TITLE}; + } + + sub as_string { + my $self = shift; + my $name = join(" ", @$self{'CHRISTIAN', 'SURNAME'}); + if ($self->{TITLE}) { + $name = $self->{TITLE} . " " . $name; + } + return $name; + } + + 1; + +Finally, here's the test program: + + #!/usr/bin/perl -w + use strict; + use Person; + sub END { show_census() } + + sub show_census () { + printf "Current population: %d\n", Person->population; + } + + Person->debug(1); + + show_census(); + + my $him = Person->new(); + + $him->fullname->christian("Thomas"); + $him->fullname->surname("Aquinas"); + $him->fullname->nickname("Tommy"); + $him->fullname->title("St"); + $him->age(1); + + printf "%s is really %s.\n", $him->name, $him->fullname; + printf "%s's age: %d.\n", $him->name, $him->age; + $him->happy_birthday; + printf "%s's age: %d.\n", $him->name, $him->age; + + show_census(); + +=head1 Inheritance + +Object-oriented programming systems all support some notion of +inheritance. Inheritance means allowing one class to piggy-back on +top of another one so you don't have to write the same code again and +again. It's about software reuse, and therefore related to Laziness, +the principal virtue of a programmer. (The import/export mechanisms in +traditional modules are also a form of code reuse, but a simpler one than +the true inheritance that you find in object modules.) + +Sometimes the syntax of inheritance is built into the core of the +language, and sometimes it's not. Perl has no special syntax for +specifying the class (or classes) to inherit from. Instead, it's all +strictly in the semantics. Each package can have a variable called @ISA, +which governs (method) inheritance. If you try to call a method on an +object or class, and that method is not found in that object's package, +Perl then looks to @ISA for other packages to go looking through in +search of the missing method. + +Like the special per-package variables recognized by Exporter (such as +@EXPORT, @EXPORT_OK, @EXPORT_FAIL, %EXPORT_TAGS, and $VERSION), the @ISA +array I<must> be a package-scoped global and not a file-scoped lexical +created via my(). Most classes have just one item in their @ISA array. +In this case, we have what's called "single inheritance", or SI for short. + +Consider this class: + + package Employee; + use Person; + @ISA = ("Person"); + 1; + +Not a lot to it, eh? All it's doing so far is loading in another +class and stating that this one will inherit methods from that +other class if need be. We have given it none of its own methods. +We rely upon an Employee to behave just like a Person. + +Setting up an empty class like this is called the "empty subclass test"; +that is, making a derived class that does nothing but inherit from a +base class. If the original base class has been designed properly, +then the new derived class can be used as a drop-in replacement for the +old one. This means you should be able to write a program like this: + + use Employee + my $empl = Employee->new(); + $empl->name("Jason"); + $empl->age(23); + printf "%s is age %d.\n", $empl->name, $empl->age; + +By proper design, we mean always using the two-argument form of bless(), +avoiding direct access of global data, and not exporting anything. If you +look back at the Person::new() function we defined above, we were careful +to do that. There's a bit of package data used in the constructor, +but the reference to this is stored on the object itself and all other +methods access package data via that reference, so we should be ok. + +What do we mean by the Person::new() function -- isn't that actually +a method? Well, in principle, yes. A method is just a function that +expects as its first argument a class name (package) or object +(blessed reference). Person::new() is the function that both the +C<Person-E<gt>new()> method and the C<Employee-E<gt>new()> method end +up calling. Understand that while a method call looks a lot like a +function call, they aren't really quite the same, and if you treat them +as the same, you'll very soon be left with nothing but broken programs. +First, the actual underlying calling conventions are different: method +calls get an extra argument. Second, function calls don't do inheritance, +but methods do. + + Method Call Resulting Function Call + ----------- ------------------------ + Person->new() Person::new("Person") + Employee->new() Person::new("Employee") + +So don't use function calls when you mean to call a method. + +If an employee is just a Person, that's not all too very interesting. +So let's add some other methods. We'll give our employee +data fields to access their salary, their employee ID, and their +start date. + +If you're getting a little tired of creating all these nearly identical +methods just to get at the object's data, do not despair. Later, +we'll describe several different convenience mechanisms for shortening +this up. Meanwhile, here's the straight-forward way: + + sub salary { + my $self = shift; + if (@_) { $self->{SALARY} = shift } + return $self->{SALARY}; + } + + sub id_number { + my $self = shift; + if (@_) { $self->{ID} = shift } + return $self->{ID}; + } + + sub start_date { + my $self = shift; + if (@_) { $self->{START_DATE} = shift } + return $self->{START_DATE}; + } + +=head2 Overridden Methods + +What happens when both a derived class and its base class have the same +method defined? Well, then you get the derived class's version of that +method. For example, let's say that we want the peers() method called on +an employee to act a bit differently. Instead of just returning the list +of peer names, let's return slightly different strings. So doing this: + + $empl->peers("Peter", "Paul", "Mary"); + printf "His peers are: %s\n", join(", ", $empl->peers); + +will produce: + + His peers are: PEON=PETER, PEON=PAUL, PEON=MARY + +To do this, merely add this definition into the Employee.pm file: + + sub peers { + my $self = shift; + if (@_) { @{ $self->{PEERS} } = @_ } + return map { "PEON=\U$_" } @{ $self->{PEERS} }; + } + +There, we've just demonstrated the high-falutin' concept known in certain +circles as I<polymorphism>. We've taken on the form and behaviour of +an existing object, and then we've altered it to suit our own purposes. +This is a form of Laziness. (Getting polymorphed is also what happens +when the wizard decides you'd look better as a frog.) + +Every now and then you'll want to have a method call trigger both its +derived class (also known as "subclass") version as well as its base class +(also known as "superclass") version. In practice, constructors and +destructors are likely to want to do this, and it probably also makes +sense in the debug() method we showed previously. + +To do this, add this to Employee.pm: + + use Carp; + my $Debugging = 0; + + sub debug { + my $self = shift; + confess "usage: thing->debug(level)" unless @_ == 1; + my $level = shift; + if (ref($self)) { + $self->{"_DEBUG"} = $level; + } else { + $Debugging = $level; # whole class + } + Person::debug($self, $Debugging); # don't really do this + } + +As you see, we turn around and call the Person package's debug() function. +But this is far too fragile for good design. What if Person doesn't +have a debug() function, but is inheriting I<its> debug() method +from elsewhere? It would have been slightly better to say + + Person->debug($Debugging); + +But even that's got too much hard-coded. It's somewhat better to say + + $self->Person::debug($Debugging); + +Which is a funny way to say to start looking for a debug() method up +in Person. This strategy is more often seen on overridden object methods +than on overridden class methods. + +There is still something a bit off here. We've hard-coded our +superclass's name. This in particular is bad if you change which classes +you inherit from, or add others. Fortunately, the pseudoclass SUPER +comes to the rescue here. + + $self->SUPER::debug($Debugging); + +This way it starts looking in my class's @ISA. This only makes sense +from I<within> a method call, though. Don't try to access anything +in SUPER:: from anywhere else, because it doesn't exist outside +an overridden method call. + +Things are getting a bit complicated here. Have we done anything +we shouldn't? As before, one way to test whether we're designing +a decent class is via the empty subclass test. Since we already have +an Employee class that we're trying to check, we'd better get a new +empty subclass that can derive from Employee. Here's one: + + package Boss; + use Employee; # :-) + @ISA = qw(Employee); + +And here's the test program: + + #!/usr/bin/perl -w + use strict; + use Boss; + Boss->debug(1); + + my $boss = Boss->new(); + + $boss->fullname->title("Don"); + $boss->fullname->surname("Pichon Alvarez"); + $boss->fullname->christian("Federico Jesus"); + $boss->fullname->nickname("Fred"); + + $boss->age(47); + $boss->peers("Frank", "Felipe", "Faust"); + + printf "%s is age %d.\n", $boss->fullname, $boss->age; + printf "His peers are: %s\n", join(", ", $boss->peers); + +Running it, we see that we're still ok. If you'd like to dump out your +object in a nice format, somewhat like the way the 'x' command works in +the debugger, you could use the Data::Dumper module from CPAN this way: + + use Data::Dumper; + print "Here's the boss:\n"; + print Dumper($boss); + +Which shows us something like this: + + Here's the boss: + $VAR1 = bless( { + _CENSUS => \1, + FULLNAME => bless( { + TITLE => 'Don', + SURNAME => 'Pichon Alvarez', + NICK => 'Fred', + CHRISTIAN => 'Federico Jesus' + }, 'Fullname' ), + AGE => 47, + PEERS => [ + 'Frank', + 'Felipe', + 'Faust' + ] + }, 'Boss' ); + +Hm.... something's missing there. What about the salary, start date, +and ID fields? Well, we never set them to anything, even undef, so they +don't show up in the hash's keys. The Employee class has no new() method +of its own, and the new() method in Person doesn't know about Employees. +(Nor should it: proper OO design dictates that a subclass be allowed to +know about its immediate superclass, but never vice-versa.) So let's +fix up Employee::new() this way: + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + $self->{SALARY} = undef; + $self->{ID} = undef; + $self->{START_DATE} = undef; + bless ($self, $class); # reconsecrate + return $self; + } + +Now if you dump out an Employee or Boss object, you'll find +that new fields show up there now. + +=head2 Multiple Inheritance + +Ok, at the risk of confusing beginners and annoying OO gurus, it's +time to confess that Perl's object system includes that controversial +notion known as multiple inheritance, or MI for short. All this means +is that rather than having just one parent class who in turn might +itself have a parent class, etc., that you can directly inherit from +two or more parents. It's true that some uses of MI can get you into +trouble, although hopefully not quite so much trouble with Perl as with +dubiously-OO languages like C++. + +The way it works is actually pretty simple: just put more than one package +name in your @ISA array. When it comes time for Perl to go finding +methods for your object, it looks at each of these packages in order. +Well, kinda. It's actually a fully recursive, depth-first order. +Consider a bunch of @ISA arrays like this: + + @First::ISA = qw( Alpha ); + @Second::ISA = qw( Beta ); + @Third::ISA = qw( First Second ); + +If you have an object of class Third: + + my $ob = Third->new(); + $ob->spin(); + +How do we find a spin() method (or a new() method for that matter)? +Because the search is depth-first, classes will be looked up +in the following order: Third, First, Alpha, Second, and Beta. + +In practice, few class modules have been seen that actually +make use of MI. One nearly always chooses simple containership of +one class within another over MI. That's why our Person +object I<contained> a Fullname object. That doesn't mean +it I<was> one. + +However, there is one particular area where MI in Perl is rampant: +borrowing another class's class methods. This is rather common, +especially with some bundled "objectless" classes, +like Exporter, DynaLoader, AutoLoader, and SelfLoader. These classes +do not provide constructors; they exist only so you may inherit their +class methods. (It's not entirely clear why inheritance was done +here rather than traditional module importation.) + +For example, here is the POSIX module's @ISA: + + package POSIX; + @ISA = qw(Exporter DynaLoader); + +The POSIX module isn't really an object module, but then, +neither are Exporter or DynaLoader. They're just lending their +classes' behaviours to POSIX. + +Why don't people use MI for object methods much? One reason is that +it can have complicated side-effects. For one thing, your inheritance +graph (no longer a tree) might converge back to the same base class. +Although Perl guards against recursive inheritance, merely having parents +who are related to each other via a common ancestor, incestuous though +it sounds, is not forbidden. What if in our Third class shown above we +wanted its new() method to also call both overridden constructors in its +two parent classes? The SUPER notation would only find the first one. +Also, what about if the Alpha and Beta classes both had a common ancestor, +like Nought? If you kept climbing up the inheritance tree calling +overridden methods, you'd end up calling Nought::new() twice, +which might well be a bad idea. + +=head2 UNIVERSAL: The Root of All Objects + +Wouldn't it be convenient if all objects were rooted at some ultimate +base class? That way you could give every object common methods without +having to go and add it to each and every @ISA. Well, it turns out that +you can. You don't see it, but Perl tacitly and irrevocably assumes +that there's an extra element at the end of @ISA: the class UNIVERSAL. +In version 5.003, there were no predefined methods there, but you could put +whatever you felt like into it. + +However, as of version 5.004 (or some subversive releases, like 5.003_08), +UNIVERSAL has some methods in it already. These are builtin to your Perl +binary, so they don't take any extra time to load. Predefined methods +include isa(), can(), and VERSION(). isa() tells you whether an object or +class "is" another one without having to traverse the hierarchy yourself: + + $has_io = $fd->isa("IO::Handle"); + $itza_handle = IO::Socket->isa("IO::Handle"); + +The can() method, called against that object or class, reports back +whether its string argument is a callable method name in that class. +In fact, it gives you back a function reference to that method: + + $his_print_method = $obj->can('as_string'); + +Finally, the VERSION method checks whether the class (or the object's +class) has a package global called $VERSION that's high enough, as in: + + Some_Module->VERSION(3.0); + $his_vers = $ob->VERSION(); + +However, we don't usually call VERSION ourselves. (Remember that an all +uppercase function name is a Perl convention that indicates that the +function will be automatically used by Perl in some way.) In this case, +it happens when you say + + use Some_Module 3.0; + +If you wanted to add version checking to your Person class explained +above, just add this to Person.pm: + + use vars qw($VERSION); + $VERSION = '1.1'; + +and then in Employee.pm could you can say + + use Employee 1.1; + +And it would make sure that you have at least that version number or +higher available. This is not the same as loading in that exact version +number. No mechanism currently exists for concurrent installation of +multiple versions of a module. Lamentably. + +=head1 Alternate Object Representations + +Nothing requires objects to be implemented as hash references. An object +can be any sort of reference so long as its referent has been suitably +blessed. That means scalar, array, and code references are also fair +game. + +A scalar would work if the object has only one datum to hold. An array +would work for most cases, but makes inheritance a bit dodgy because +you have to invent new indices for the derived classes. + +=head2 Arrays as Objects + +If the user of your class honors the contract and sticks to the advertised +interface, then you can change its underlying interface if you feel +like it. Here's another implementation that conforms to the same +interface specification. This time we'll use an array reference +instead of a hash reference to represent the object. + + package Person; + use strict; + + my($NAME, $AGE, $PEERS) = ( 0 .. 2 ); + + ############################################ + ## the object constructor (array version) ## + ############################################ + sub new { + my $self = []; + $self->[$NAME] = undef; # this is unnecessary + $self->[$AGE] = undef; # as is this + $self->[$PEERS] = []; # but this isn't, really + bless($self); + return $self; + } + + sub name { + my $self = shift; + if (@_) { $self->[$NAME] = shift } + return $self->[$NAME]; + } + + sub age { + my $self = shift; + if (@_) { $self->[$AGE] = shift } + return $self->[$AGE]; + } + + sub peers { + my $self = shift; + if (@_) { @{ $self->[$PEERS] } = @_ } + return @{ $self->[$PEERS] }; + } + + 1; # so the require or use succeeds + +You might guess that the array access would be a lot faster than the +hash access, but they're actually comparable. The array is a I<little> +bit faster, but not more than ten or fifteen percent, even when you +replace the variables above like $AGE with literal numbers, like 1. +A bigger difference between the two approaches can be found in memory use. +A hash representation takes up more memory than an array representation +because you have to allocate memory for the keys as well as for the values. +However, it really isn't that bad, especially since as of version 5.004, +memory is only allocated once for a given hash key, no matter how many +hashes have that key. It's expected that sometime in the future, even +these differences will fade into obscurity as more efficient underlying +representations are devised. + +Still, the tiny edge in speed (and somewhat larger one in memory) +is enough to make some programmers choose an array representation +for simple classes. There's still a little problem with +scalability, though, because later in life when you feel +like creating subclasses, you'll find that hashes just work +out better. + +=head2 Closures as Objects + +Using a code reference to represent an object offers some fascinating +possibilities. We can create a new anonymous function (closure) who +alone in all the world can see the object's data. This is because we +put the data into an anonymous hash that's lexically visible only to +the closure we create, bless, and return as the object. This object's +methods turn around and call the closure as a regular subroutine call, +passing it the field we want to affect. (Yes, +the double-function call is slow, but if you wanted fast, you wouldn't +be using objects at all, eh? :-) + +Use would be similar to before: + + use Person; + $him = Person->new(); + $him->name("Jason"); + $him->age(23); + $him->peers( [ "Norbert", "Rhys", "Phineas" ] ); + printf "%s is %d years old.\n", $him->name, $him->age; + print "His peers are: ", join(", ", @{$him->peers}), "\n"; + +but the implementation would be radically, perhaps even sublimely +different: + + package Person; + + sub new { + my $that = shift; + my $class = ref($that) || $that; + my $self = { + NAME => undef, + AGE => undef, + PEERS => [], + }; + my $closure = sub { + my $field = shift; + if (@_) { $self->{$field} = shift } + return $self->{$field}; + }; + bless($closure, $class); + return $closure; + } + + sub name { &{ $_[0] }("NAME", @_[ 1 .. $#_ ] ) } + sub age { &{ $_[0] }("AGE", @_[ 1 .. $#_ ] ) } + sub peers { &{ $_[0] }("PEERS", @_[ 1 .. $#_ ] ) } + + 1; + +Because this object is hidden behind a code reference, it's probably a bit +mysterious to those whose background is more firmly rooted in standard +procedural or object-based programming languages than in functional +programming languages whence closures derive. The object +created and returned by the new() method is itself not a data reference +as we've seen before. It's an anonymous code reference that has within +it access to a specific version (lexical binding and instantiation) +of the object's data, which are stored in the private variable $self. +Although this is the same function each time, it contains a different +version of $self. + +When a method like C<$him-E<gt>name("Jason")> is called, its implicit +zeroth argument is the invoking object--just as it is with all method +calls. But in this case, it's our code reference (something like a +function pointer in C++, but with deep binding of lexical variables). +There's not a lot to be done with a code reference beyond calling it, so +that's just what we do when we say C<&{$_[0]}>. This is just a regular +function call, not a method call. The initial argument is the string +"NAME", and any remaining arguments are whatever had been passed to the +method itself. + +Once we're executing inside the closure that had been created in new(), +the $self hash reference suddenly becomes visible. The closure grabs +its first argument ("NAME" in this case because that's what the name() +method passed it), and uses that string to subscript into the private +hash hidden in its unique version of $self. + +Nothing under the sun will allow anyone outside the executing method to +be able to get at this hidden data. Well, nearly nothing. You I<could> +single step through the program using the debugger and find out the +pieces while you're in the method, but everyone else is out of luck. + +There, if that doesn't excite the Scheme folks, then I just don't know +what will. Translation of this technique into C++, Java, or any other +braindead-static language is left as a futile exercise for aficionados +of those camps. + +You could even add a bit of nosiness via the caller() function and +make the closure refuse to operate unless called via its own package. +This would no doubt satisfy certain fastidious concerns of programming +police and related puritans. + +If you were wondering when Hubris, the third principle virtue of a +programmer, would come into play, here you have it. (More seriously, +Hubris is just the pride in craftsmanship that comes from having written +a sound bit of well-designed code.) + +=head1 AUTOLOAD: Proxy Methods + +Autoloading is a way to intercept calls to undefined methods. An autoload +routine may choose to create a new function on the fly, either loaded +from disk or perhaps just eval()ed right there. This define-on-the-fly +strategy is why it's called autoloading. + +But that's only one possible approach. Another one is to just +have the autoloaded method itself directly provide the +requested service. When used in this way, you may think +of autoloaded methods as "proxy" methods. + +When Perl tries to call an undefined function in a particular package +and that function is not defined, it looks for a function in +that same package called AUTOLOAD. If one exists, it's called +with the same arguments as the original function would have had. +The fully-qualified name of the function is stored in that package's +global variable $AUTOLOAD. Once called, the function can do anything +it would like, including defining a new function by the right name, and +then doing a really fancy kind of C<goto> right to it, erasing itself +from the call stack. + +What does this have to do with objects? After all, we keep talking about +functions, not methods. Well, since a method is just a function with +an extra argument and some fancier semantics about where it's found, +we can use autoloading for methods, too. Perl doesn't start looking +for an AUTOLOAD method until it has exhausted the recursive hunt up +through @ISA, though. Some programmers have even been known to define +a UNIVERSAL::AUTOLOAD method to trap unresolved method calls to any +kind of object. + +=head2 Autoloaded Data Methods + +You probably began to get a little suspicious about the duplicated +code way back earlier when we first showed you the Person class, and +then later the Employee class. Each method used to access the +hash fields looked virtually identical. This should have tickled +that great programming virtue, Impatience, but for the time, +we let Laziness win out, and so did nothing. Proxy methods can cure +this. + +Instead of writing a new function every time we want a new data field, +we'll use the autoload mechanism to generate (actually, mimic) methods on +the fly. To verify that we're accessing a valid member, we will check +against an C<_permitted> (pronounced "under-permitted") field, which +is a reference to a file-scoped lexical (like a C file static) hash of permitted fields in this record +called %fields. Why the underscore? For the same reason as the _CENSUS +field we once used: as a marker that means "for internal use only". + +Here's what the module initialization code and class +constructor will look like when taking this approach: + + package Person; + use Carp; + use vars qw($AUTOLOAD); # it's a package global + + my %fields = ( + name => undef, + age => undef, + peers => undef, + ); + + sub new { + my $that = shift; + my $class = ref($that) || $that; + my $self = { + _permitted => \%fields, + %fields, + }; + bless $self, $class; + return $self; + } + +If we wanted our record to have default values, we could fill those in +where current we have C<undef> in the %fields hash. + +Notice how we saved a reference to our class data on the object itself? +Remember that it's important to access class data through the object +itself instead of having any method reference %fields directly, or else +you won't have a decent inheritance. + +The real magic, though, is going to reside in our proxy method, which +will handle all calls to undefined methods for objects of class Person +(or subclasses of Person). It has to be called AUTOLOAD. Again, it's +all caps because it's called for us implicitly by Perl itself, not by +a user directly. + + sub AUTOLOAD { + my $self = shift; + my $type = ref($self) + or croak "$self is not an object"; + + my $name = $AUTOLOAD; + $name =~ s/.*://; # strip fully-qualified portion + + unless (exists $self->{_permitted}->{$name} ) { + croak "Can't access `$name' field in class $type"; + } + + if (@_) { + return $self->{$name} = shift; + } else { + return $self->{$name}; + } + } + +Pretty nifty, eh? All we have to do to add new data fields +is modify %fields. No new functions need be written. + +I could have avoided the C<_permitted> field entirely, but I +wanted to demonstrate how to store a reference to class data on the +object so you wouldn't have to access that class data +directly from an object method. + +=head2 Inherited Autoloaded Data Methods + +But what about inheritance? Can we define our Employee +class similarly? Yes, so long as we're careful enough. + +Here's how to be careful: + + package Employee; + use Person; + use strict; + use vars qw(@ISA); + @ISA = qw(Person); + + my %fields = ( + id => undef, + salary => undef, + ); + + sub new { + my $that = shift; + my $class = ref($that) || $that; + my $self = bless $that->SUPER::new(), $class; + my($element); + foreach $element (keys %fields) { + $self->{_permitted}->{$element} = $fields{$element}; + } + @{$self}{keys %fields} = values %fields; + return $self; + } + +Once we've done this, we don't even need to have an +AUTOLOAD function in the Employee package, because +we'll grab Person's version of that via inheritance, +and it will all work out just fine. + +=head1 Metaclassical Tools + +Even though proxy methods can provide a more convenient approach to making +more struct-like classes than tediously coding up data methods as +functions, it still leaves a bit to be desired. For one thing, it means +you have to handle bogus calls that you don't mean to trap via your proxy. +It also means you have to be quite careful when dealing with inheritance, +as detailed above. + +Perl programmers have responded to this by creating several different +class construction classes. These metaclasses are classes +that create other classes. A couple worth looking at are +Class::Struct and Alias. These and other related metaclasses can be +found in the modules directory on CPAN. + +=head2 Class::Struct + +One of the older ones is Class::Struct. In fact, its syntax and +interface were sketched out long before perl5 even solidified into a +real thing. What it does is provide you a way to "declare" a class +as having objects whose fields are of a specific type. The function +that does this is called, not surprisingly enough, struct(). Because +structures or records are not base types in Perl, each time you want to +create a class to provide a record-like data object, you yourself have +to define a new() method, plus separate data-access methods for each of +that record's fields. You'll quickly become bored with this process. +The Class::Struct::struct() function alleviates this tedium. + +Here's a simple example of using it: + + use Class::Struct qw(struct); + use Jobbie; # user-defined; see below + + struct 'Fred' => { + one => '$', + many => '@', + profession => Jobbie, # calls Jobbie->new() + }; + + $ob = Fred->new; + $ob->one("hmmmm"); + + $ob->many(0, "here"); + $ob->many(1, "you"); + $ob->many(2, "go"); + print "Just set: ", $ob->many(2), "\n"; + + $ob->profession->salary(10_000); + +You can declare types in the struct to be basic Perl types, or +user-defined types (classes). User types will be initialized by calling +that class's new() method. + +Here's a real-world example of using struct generation. Let's say you +wanted to override Perl's idea of gethostbyname() and gethostbyaddr() so +that they would return objects that acted like C structures. We don't +care about high-falutin' OO gunk. All we want is for these objects to +act like structs in the C sense. + + use Socket; + use Net::hostent; + $h = gethostbyname("perl.com"); # object return + printf "perl.com's real name is %s, address %s\n", + $h->name, inet_ntoa($h->addr); + +Here's how to do this using the Class::Struct module. +The crux is going to be this call: + + struct 'Net::hostent' => [ # note bracket + name => '$', + aliases => '@', + addrtype => '$', + 'length' => '$', + addr_list => '@', + ]; + +Which creates object methods of those names and types. +It even creates a new() method for us. + +We could also have implemented our object this way: + + struct 'Net::hostent' => { # note brace + name => '$', + aliases => '@', + addrtype => '$', + 'length' => '$', + addr_list => '@', + }; + +and then Class::Struct would have used an anonymous hash as the object +type, instead of an anonymous array. The array is faster and smaller, +but the hash works out better if you eventually want to do inheritance. +Since for this struct-like object we aren't planning on inheritance, +this time we'll opt for better speed and size over better flexibility. + +Here's the whole implementation: + + package Net::hostent; + use strict; + + BEGIN { + use Exporter (); + use vars qw(@EXPORT @EXPORT_OK %EXPORT_TAGS); + @EXPORT = qw(gethostbyname gethostbyaddr gethost); + @EXPORT_OK = qw( + $h_name @h_aliases + $h_addrtype $h_length + @h_addr_list $h_addr + ); + %EXPORT_TAGS = ( FIELDS => [ @EXPORT_OK, @EXPORT ] ); + } + use vars @EXPORT_OK; + + # Class::Struct forbids use of @ISA + sub import { goto &Exporter::import } + + use Class::Struct qw(struct); + struct 'Net::hostent' => [ + name => '$', + aliases => '@', + addrtype => '$', + 'length' => '$', + addr_list => '@', + ]; + + sub addr { shift->addr_list->[0] } + + sub populate (@) { + return unless @_; + my $hob = new(); # Class::Struct made this! + $h_name = $hob->[0] = $_[0]; + @h_aliases = @{ $hob->[1] } = split ' ', $_[1]; + $h_addrtype = $hob->[2] = $_[2]; + $h_length = $hob->[3] = $_[3]; + $h_addr = $_[4]; + @h_addr_list = @{ $hob->[4] } = @_[ (4 .. $#_) ]; + return $hob; + } + + sub gethostbyname ($) { populate(CORE::gethostbyname(shift)) } + + sub gethostbyaddr ($;$) { + my ($addr, $addrtype); + $addr = shift; + require Socket unless @_; + $addrtype = @_ ? shift : Socket::AF_INET(); + populate(CORE::gethostbyaddr($addr, $addrtype)) + } + + sub gethost($) { + if ($_[0] =~ /^\d+(?:\.\d+(?:\.\d+(?:\.\d+)?)?)?$/) { + require Socket; + &gethostbyaddr(Socket::inet_aton(shift)); + } else { + &gethostbyname; + } + } + + 1; + +We've snuck in quite a fair bit of other concepts besides just dynamic +class creation, like overriding core functions, import/export bits, +function prototyping, short-cut function call via C<&whatever>, and +function replacement with C<goto &whatever>. These all mostly make +sense from the perspective of a traditional module, but as you can see, +we can also use them in an object module. + +You can look at other object-based, struct-like overrides of core +functions in the 5.004 release of Perl in File::stat, Net::hostent, +Net::netent, Net::protoent, Net::servent, Time::gmtime, Time::localtime, +User::grent, and User::pwent. These modules have a final component +that's all lowercase, by convention reserved for compiler pragmas, +because they affect the compilation and change a builtin function. +They also have the type names that a C programmer would most expect. + +=head2 Data Members as Variables + +If you're used to C++ objects, then you're accustomed to being able to +get at an object's data members as simple variables from within a method. +The Alias module provides for this, as well as a good bit more, such +as the possibility of private methods that the object can call but folks +outside the class cannot. + +Here's an example of creating a Person using the Alias module. +When you update these magical instance variables, you automatically +update value fields in the hash. Convenient, eh? + + package Person; + + # this is the same as before... + sub new { + my $that = shift; + my $class = ref($that) || $that; + my $self = { + NAME => undef, + AGE => undef, + PEERS => [], + }; + bless($self, $class); + return $self; + } + + use Alias qw(attr); + use vars qw($NAME $AGE $PEERS); + + sub name { + my $self = attr shift; + if (@_) { $NAME = shift; } + return $NAME; + } + + sub age { + my $self = attr shift; + if (@_) { $AGE = shift; } + return $AGE; + } + + sub peers { + my $self = attr shift; + if (@_) { @PEERS = @_; } + return @PEERS; + } + + sub exclaim { + my $self = attr shift; + return sprintf "Hi, I'm %s, age %d, working with %s", + $NAME, $AGE, join(", ", @PEERS); + } + + sub happy_birthday { + my $self = attr shift; + return ++$AGE; + } + +The need for the C<use vars> declaration is because what Alias does +is play with package globals with the same name as the fields. To use +globals while C<use strict> is in effect, you have to predeclare them. +These package variables are localized to the block enclosing the attr() +call just as if you'd used a local() on them. However, that means that +they're still considered global variables with temporary values, just +as with any other local(). + +It would be nice to combine Alias with +something like Class::Struct or Class::MethodMaker. + +=head2 NOTES + +=head2 Object Terminology + +In the various OO literature, it seems that a lot of different words +are used to describe only a few different concepts. If you're not +already an object programmer, then you don't need to worry about all +these fancy words. But if you are, then you might like to know how to +get at the same concepts in Perl. + +For example, it's common to call an object an I<instance> of a class +and to call those objects' methods I<instance methods>. Data fields +peculiar to each object are often called I<instance data> or I<object +attributes>, and data fields common to all members of that class are +I<class data>, I<class attributes>, or I<static data members>. + +Also, I<base class>, I<generic class>, and I<superclass> all describe +the same notion, whereas I<derived class>, I<specific class>, and +I<subclass> describe the other related one. + +C++ programmers have I<static methods> and I<virtual methods>, +but Perl only has I<class methods> and I<object methods>. +Actually, Perl only has methods. Whether a method gets used +as a class or object method is by usage only. You could accidentally +call a class method (one expecting a string argument) on an +object (one expecting a reference), or vice versa. + +Z<>From the C++ perspective, all methods in Perl are virtual. +This, by the way, is why they are never checked for function +prototypes in the argument list as regular builtin and user-defined +functions can be. + +Because a class is itself something of an object, Perl's classes can be +taken as describing both a "class as meta-object" (also called I<object +factory>) philosophy and the "class as type definition" (I<declaring> +behaviour, not I<defining> mechanism) idea. C++ supports the latter +notion, but not the former. + +=head1 SEE ALSO + +The following manpages will doubtless provide more +background for this one: +L<perlmod>, +L<perlref>, +L<perlobj>, +L<perlbot>, +L<perltie>, +and +L<overload>. + +=head1 COPYRIGHT + +I I<really> hate to have to say this, but recent unpleasant +experiences have mandated its inclusion: + + Copyright 1996 Tom Christiansen. All Rights Reserved. + +This work derives in part from the second edition of I<Programming Perl>. +Although destined for release as a manpage with the standard Perl +distribution, it is not public domain (nor is any of Perl and its docset: +publishers beware). It's expected to someday make its way into a revision +of the Camel Book. While it is copyright by me with all rights reserved, +permission is granted to freely distribute verbatim copies of this +document provided that no modifications outside of formatting be made, +and that this notice remain intact. You are permitted and encouraged to +use its code and derivatives thereof in your own source code for fun or +for profit as you see fit. But so help me, if in six months I find some +book out there with a hacked-up version of this material in it claiming to +be written by someone else, I'll tell all the world that you're a jerk. +Furthermore, your lawyer will meet my lawyer (or O'Reilly's) over lunch +to arrange for you to receive your just deserts. Count on it. + +=head2 Acknowledgments + +Thanks to +Larry Wall, +Roderick Schertler, +Gurusamy Sarathy, +Dean Roehrich, +Raphael Manfredi, +Brent Halsey, +Greg Bacon, +Brad Appleton, +and many others for their helpful comments. diff --git a/pod/perltrap.pod b/pod/perltrap.pod index dd219c064b..786dcda607 100644 --- a/pod/perltrap.pod +++ b/pod/perltrap.pod @@ -6,7 +6,8 @@ perltrap - Perl traps for the unwary The biggest trap of all is forgetting to use the B<-w> switch; see L<perlrun>. The second biggest trap is not making your entire program -runnable under C<use strict>. +runnable under C<use strict>. The third biggest trap is not reading +the list of changes in this version of Perl; see L<perldelta>. =head2 Awk Traps @@ -20,8 +21,8 @@ The English module, loaded via use English; -allows you to refer to special variables (like $RS) as -though they were in B<awk>; see L<perlvar> for details. +allows you to refer to special variables (like C<$/>) with names (like +C<$RS>), as though they were in B<awk>; see L<perlvar> for details. =item * @@ -47,8 +48,7 @@ You have to decide whether your array has numeric or string indices. =item * -Associative array values do not spring into existence upon mere -reference. +Hash values do not spring into existence upon mere reference. =item * @@ -58,8 +58,8 @@ comparisons. =item * Reading an input line does not split it for you. You get to split it -yourself to an array. And split() operator has different -arguments. +to an array yourself. And the split() operator has different +arguments than B<awk>'s. =item * @@ -69,13 +69,13 @@ executed.) See L<perlvar>. =item * -$<I<digit>> does not refer to fields--it refers to substrings matched by -the last match pattern. +$E<lt>I<digit>E<gt> does not refer to fields--it refers to substrings matched +by the last match pattern. =item * The print() statement does not add field and record separators unless -you set C<$,> and C<$.>. You can set $OFS and $ORS if you're using +you set C<$,> and C<$\>. You can set $OFS and $ORS if you're using the English module. =item * @@ -101,9 +101,9 @@ basically incompatible with C.) =item * The concatenation operator is ".", not the null string. (Using the -null string would render C</pat/ /pat/> unparsable, since the third slash -would be interpreted as a division operator--the tokener is in fact -slightly context sensitive for operators like "/", "?", and ">". +null string would render C</pat/ /pat/> unparsable, because the third slash +would be interpreted as a division operator--the tokenizer is in fact +slightly context sensitive for operators like "/", "?", and "E<gt>". And in fact, "." itself can be the beginning of a number.) =item * @@ -158,7 +158,7 @@ You must use C<elsif> rather than C<else if>. =item * -The C<break> and C<continue> keywords from C become in +The C<break> and C<continue> keywords from C become in Perl C<last> and C<next>, respectively. Unlike in C, these do I<NOT> work within a C<do { } while> construct. @@ -172,7 +172,7 @@ Variables begin with "$" or "@" in Perl. =item * -printf() does not implement the "*" format for interpolating +C<printf()> does not implement the "*" format for interpolating field widths, but it's trivial to use interpolation of double-quoted strings to achieve the same effect. @@ -183,7 +183,7 @@ Comments begin with "#", not "/*". =item * You can't take the address of anything, although a similar operator -in Perl 5 is the backslash, which creates a reference. +in Perl is the backslash, which creates a reference. =item * @@ -231,7 +231,7 @@ Sharp shell programmers should take note of the following: =item * -The backtick operator does variable interpretation without regard to +The backtick operator does variable interpolation without regard to the presence of single quotes in the command. =item * @@ -241,7 +241,7 @@ The backtick operator does no translation of the return value, unlike B<csh>. =item * Shells (especially B<csh>) do several levels of substitution on each -command line. Perl does substitution only in certain constructs +command line. Perl does substitution in only certain constructs such as double quotes, backticks, angle brackets, and search patterns. =item * @@ -274,36 +274,36 @@ context than they do in a scalar one. See L<perldata> for details. =item * -Avoid barewords if you can, especially all lower-case ones. -You can't tell just by looking at it whether a bareword is -a function or a string. By using quotes on strings and -parens on function calls, you won't ever get them confused. +Avoid barewords if you can, especially all lowercase ones. +You can't tell by just looking at it whether a bareword is +a function or a string. By using quotes on strings and +parentheses on function calls, you won't ever get them confused. =item * -You cannot discern from mere inspection which built-ins -are unary operators (like chop() and chdir()) +You cannot discern from mere inspection which builtins +are unary operators (like chop() and chdir()) and which are list operators (like print() and unlink()). -(User-defined subroutines can B<only> be list operators, never +(User-defined subroutines can be B<only> list operators, never unary ones.) See L<perlop>. =item * People have a hard time remembering that some functions default to $_, or @ARGV, or whatever, but that others which -you might expect to do not. +you might expect to do not. -=item * +=item * -The <FH> construct is not the name of the filehandle, it is a readline -operation on that handle. The data read is only assigned to $_ if the +The E<lt>FHE<gt> construct is not the name of the filehandle, it is a readline +operation on that handle. The data read is assigned to $_ only if the file read is the sole condition in a while loop: while (<FH>) { } - while ($_ = <FH>) { }.. + while (defined($_ = <FH>)) { }.. <FH>; # data discarded! -=item * +=item * Remember not to use "C<=>" when you need "C<=~>"; these two constructs are quite different: @@ -313,14 +313,14 @@ these two constructs are quite different: =item * -The C<do {}> construct isn't a real loop that you can use +The C<do {}> construct isn't a real loop that you can use loop control on. =item * -Use my() for local variables whenever you can get away with -it (but see L<perlform> for where you can't). -Using local() actually gives a local value to a global +Use C<my()> for local variables whenever you can get away with +it (but see L<perlform> for where you can't). +Using C<local()> actually gives a local value to a global variable, which leaves you open to unforeseen side-effects of dynamic scoping. @@ -332,65 +332,621 @@ external name is still an alias for the original. =back -=head2 Perl4 Traps +=head2 Perl4 to Perl5 Traps -Penitent Perl 4 Programmers should take note of the following -incompatible changes that occurred between release 4 and release 5: +Practicing Perl4 Programmers should take note of the following +Perl4-to-Perl5 specific traps. + +They're crudely ordered according to the following list: =over 4 -=item * +=item Discontinuance, Deprecation, and BugFix traps -C<@> now always interpolates an array in double-quotish strings. Some programs -may now need to use backslash to protect any C<@> that shouldn't interpolate. +Anything that's been fixed as a perl4 bug, removed as a perl4 feature +or deprecated as a perl4 feature with the intent to encourage usage of +some other perl5 feature. -=item * +=item Parsing Traps -Barewords that used to look like strings to Perl will now look like subroutine -calls if a subroutine by that name is defined before the compiler sees them. -For example: +Traps that appear to stem from the new parser. - sub SeeYa { die "Hasta la vista, baby!" } - $SIG{'QUIT'} = SeeYa; +=item Numerical Traps -In Perl 4, that set the signal handler; in Perl 5, it actually calls the -function! You may use the B<-w> switch to find such places. +Traps having to do with numerical or mathematical operators. -=item * +=item General data type traps -Symbols starting with C<_> are no longer forced into package C<main>, except -for $_ itself (and @_, etc.). +Traps involving perl standard data types. -=item * +=item Context Traps - scalar, list contexts + +Traps related to context within lists, scalar statements/declarations. + +=item Precedence Traps + +Traps related to the precedence of parsing, evaluation, and execution of +code. + +=item General Regular Expression Traps using s///, etc. + +Traps related to the use of pattern matching. + +=item Subroutine, Signal, Sorting Traps + +Traps related to the use of signals and signal handlers, general subroutines, +and sorting, along with sorting subroutines. + +=item OS Traps + +OS-specific traps. + +=item DBM Traps + +Traps specific to the use of C<dbmopen()>, and specific dbm implementations. + +=item Unclassified Traps + +Everything else. + +=back + +If you find an example of a conversion trap that is not listed here, +please submit it to Bill Middleton <F<wjm@best.com>> for inclusion. +Also note that at least some of these can be caught with B<-w>. + +=head2 Discontinuance, Deprecation, and BugFix traps -Double-colon is now a valid package separator in an identifier. Thus these -behave differently in perl4 vs. perl5: +Anything that has been discontinued, deprecated, or fixed as +a bug from perl4. - print "$a::$b::$c\n"; +=over 4 + +=item * Discontinuance + +Symbols starting with "_" are no longer forced into package main, except +for C<$_> itself (and C<@_>, etc.). + + package test; + $_legacy = 1; + + package main; + print "\$_legacy is ",$_legacy,"\n"; + + # perl4 prints: $_legacy is 1 + # perl5 prints: $_legacy is + +=item * Deprecation + +Double-colon is now a valid package separator in a variable name. Thus these +behave differently in perl4 vs. perl5, because the packages don't exist. + + $a=1;$b=2;$c=3;$var=4; + print "$a::$b::$c "; print "$var::abc::xyz\n"; + + # perl4 prints: 1::2::3 4::abc::xyz + # perl5 prints: 3 -=item * +Given that C<::> is now the preferred package delimiter, it is debatable +whether this should be classed as a bug or not. +(The older package delimiter, ' ,is used here) -C<s'$lhs'$rhs'> now does no interpolation on either side. It used to -interpolate C<$lhs> but not C<$rhs>. + $x = 10 ; + print "x=${'x}\n" ; -=item * + # perl4 prints: x=10 + # perl5 prints: Can't find string terminator "'" anywhere before EOF -The second and third arguments of splice() are now evaluated in scalar -context (as the book says) rather than list context. +Also see precedence traps, for parsing C<$:>. -=item * +=item * BugFix -These are now semantic errors because of precedence: +The second and third arguments of C<splice()> are now evaluated in scalar +context (as the Camel says) rather than list context. - shift @list + 20; - $n = keys %map + 20; + sub sub1{return(0,2) } # return a 2-elem array + sub sub2{ return(1,2,3)} # return a 3-elem array + @a1 = ("a","b","c","d","e"); + @a2 = splice(@a1,&sub1,&sub2); + print join(' ',@a2),"\n"; -Because if that were to work, then this couldn't: + # perl4 prints: a b + # perl5 prints: c d e - sleep $dormancy + 20; +=item * Discontinuance -=item * +You can't do a C<goto> into a block that is optimized away. Darn. + + goto marker1; + + for(1){ + marker1: + print "Here I is!\n"; + } + + # perl4 prints: Here I is! + # perl5 dumps core (SEGV) + +=item * Discontinuance + +It is no longer syntactically legal to use whitespace as the name +of a variable, or as a delimiter for any kind of quote construct. +Double darn. + + $a = ("foo bar"); + $b = q baz ; + print "a is $a, b is $b\n"; + + # perl4 prints: a is foo bar, b is baz + # perl5 errors: Bareword found where operator expected + +=item * Discontinuance + +The archaic while/if BLOCK BLOCK syntax is no longer supported. + + if { 1 } { + print "True!"; + } + else { + print "False!"; + } + + # perl4 prints: True! + # perl5 errors: syntax error at test.pl line 1, near "if {" + +=item * BugFix + +The C<**> operator now binds more tightly than unary minus. +It was documented to work this way before, but didn't. + + print -4**2,"\n"; + + # perl4 prints: 16 + # perl5 prints: -16 + +=item * Discontinuance + +The meaning of C<foreach{}> has changed slightly when it is iterating over a +list which is not an array. This used to assign the list to a +temporary array, but no longer does so (for efficiency). This means +that you'll now be iterating over the actual values, not over copies of +the values. Modifications to the loop variable can change the original +values. + + @list = ('ab','abc','bcd','def'); + foreach $var (grep(/ab/,@list)){ + $var = 1; + } + print (join(':',@list)); + + # perl4 prints: ab:abc:bcd:def + # perl5 prints: 1:1:bcd:def + +To retain Perl4 semantics you need to assign your list +explicitly to a temporary array and then iterate over that. For +example, you might need to change + + foreach $var (grep(/ab/,@list)){ + +to + + foreach $var (@tmp = grep(/ab/,@list)){ + +Otherwise changing $var will clobber the values of @list. (This most often +happens when you use C<$_> for the loop variable, and call subroutines in +the loop that don't properly localize C<$_>.) + +=item * Discontinuance + +C<split> with no arguments now behaves like C<split ' '> (which doesn't +return an initial null field if $_ starts with whitespace), it used to +behave like C<split /\s+/> (which does). + + $_ = ' hi mom'; + print join(':', split); + + # perl4 prints: :hi:mom + # perl5 prints: hi:mom + +=item * BugFix + +Perl 4 would ignore any text which was attached to an B<-e> switch, +always taking the code snippet from the following arg. Additionally, it +would silently accept an B<-e> switch without a following arg. Both of +these behaviors have been fixed. + + perl -e'print "attached to -e"' 'print "separate arg"' + + # perl4 prints: separate arg + # perl5 prints: attached to -e + + perl -e + + # perl4 prints: + # perl5 dies: No code specified for -e. + +=item * Discontinuance + +In Perl 4 the return value of C<push> was undocumented, but it was +actually the last value being pushed onto the target list. In Perl 5 +the return value of C<push> is documented, but has changed, it is the +number of elements in the resulting list. + + @x = ('existing'); + print push(@x, 'first new', 'second new'); + + # perl4 prints: second new + # perl5 prints: 3 + +=item * Discontinuance + +In Perl 4 (and versions of Perl 5 before 5.004), C<'\r'> characters in +Perl code were silently allowed, although they could cause (mysterious!) +failures in certain constructs, particularly here documents. Now, +C<'\r'> characters cause an immediate fatal error. (Note: In this +example, the notation B<\015> represents the incorrect line +ending. Depending upon your text viewer, it will look different.) + + print "foo";\015 + print "bar"; + + # perl4 prints: foobar + # perl5.003 prints: foobar + # perl5.004 dies: Illegal character \015 (carriage return) + +See L<perldiag> for full details. + +=item * Deprecation + +Some error messages will be different. + +=item * Discontinuance + +Some bugs may have been inadvertently removed. :-) + +=back + +=head2 Parsing Traps + +Perl4-to-Perl5 traps from having to do with parsing. + +=over 4 + +=item * Parsing + +Note the space between . and = + + $string . = "more string"; + print $string; + + # perl4 prints: more string + # perl5 prints: syntax error at - line 1, near ". =" + +=item * Parsing + +Better parsing in perl 5 + + sub foo {} + &foo + print("hello, world\n"); + + # perl4 prints: hello, world + # perl5 prints: syntax error + +=item * Parsing + +"if it looks like a function, it is a function" rule. + + print + ($foo == 1) ? "is one\n" : "is zero\n"; + + # perl4 prints: is zero + # perl5 warns: "Useless use of a constant in void context" if using -w + +=back + +=head2 Numerical Traps + +Perl4-to-Perl5 traps having to do with numerical operators, +operands, or output from same. + +=over 5 + +=item * Numerical + +Formatted output and significant digits + + print 7.373504 - 0, "\n"; + printf "%20.18f\n", 7.373504 - 0; + + # Perl4 prints: + 7.375039999999996141 + 7.37503999999999614 + + # Perl5 prints: + 7.373504 + 7.37503999999999614 + +=item * Numerical + +This specific item has been deleted. It demonstrated how the auto-increment +operator would not catch when a number went over the signed int limit. Fixed +in version 5.003_04. But always be wary when using large integers. +If in doubt: + + use Math::BigInt; + +=item * Numerical + +Assignment of return values from numeric equality tests +does not work in perl5 when the test evaluates to false (0). +Logical tests now return an null, instead of 0 + + $p = ($test == 1); + print $p,"\n"; + + # perl4 prints: 0 + # perl5 prints: + +Also see L<"General Regular Expression Traps using s///, etc."> +for another example of this new feature... + +=back + +=head2 General data type traps + +Perl4-to-Perl5 traps involving most data-types, and their usage +within certain expressions and/or context. + +=over 5 + +=item * (Arrays) + +Negative array subscripts now count from the end of the array. + + @a = (1, 2, 3, 4, 5); + print "The third element of the array is $a[3] also expressed as $a[-2] \n"; + + # perl4 prints: The third element of the array is 4 also expressed as + # perl5 prints: The third element of the array is 4 also expressed as 4 + +=item * (Arrays) + +Setting C<$#array> lower now discards array elements, and makes them +impossible to recover. + + @a = (a,b,c,d,e); + print "Before: ",join('',@a); + $#a =1; + print ", After: ",join('',@a); + $#a =3; + print ", Recovered: ",join('',@a),"\n"; + + # perl4 prints: Before: abcde, After: ab, Recovered: abcd + # perl5 prints: Before: abcde, After: ab, Recovered: ab + +=item * (Hashes) + +Hashes get defined before use + + local($s,@a,%h); + die "scalar \$s defined" if defined($s); + die "array \@a defined" if defined(@a); + die "hash \%h defined" if defined(%h); + + # perl4 prints: + # perl5 dies: hash %h defined + +=item * (Globs) + +glob assignment from variable to variable will fail if the assigned +variable is localized subsequent to the assignment + + @a = ("This is Perl 4"); + *b = *a; + local(@a); + print @b,"\n"; + + # perl4 prints: This is Perl 4 + # perl5 prints: + + # Another example + + *fred = *barney; # fred is aliased to barney + @barney = (1, 2, 4); + # @fred; + print "@fred"; # should print "1, 2, 4" + + # perl4 prints: 1 2 4 + # perl5 prints: In string, @fred now must be written as \@fred + +=item * (Scalar String) + +Changes in unary negation (of strings) +This change effects both the return value and what it +does to auto(magic)increment. + + $x = "aaa"; + print ++$x," : "; + print -$x," : "; + print ++$x,"\n"; + + # perl4 prints: aab : -0 : 1 + # perl5 prints: aab : -aab : aac + +=item * (Constants) + +perl 4 lets you modify constants: + + $foo = "x"; + &mod($foo); + for ($x = 0; $x < 3; $x++) { + &mod("a"); + } + sub mod { + print "before: $_[0]"; + $_[0] = "m"; + print " after: $_[0]\n"; + } + + # perl4: + # before: x after: m + # before: a after: m + # before: m after: m + # before: m after: m + + # Perl5: + # before: x after: m + # Modification of a read-only value attempted at foo.pl line 12. + # before: a + +=item * (Scalars) + +The behavior is slightly different for: + + print "$x", defined $x + + # perl 4: 1 + # perl 5: <no output, $x is not called into existence> + +=item * (Variable Suicide) + +Variable suicide behavior is more consistent under Perl 5. +Perl5 exhibits the same behavior for hashes and scalars, +that perl4 exhibits for only scalars. + + $aGlobal{ "aKey" } = "global value"; + print "MAIN:", $aGlobal{"aKey"}, "\n"; + $GlobalLevel = 0; + &test( *aGlobal ); + + sub test { + local( *theArgument ) = @_; + local( %aNewLocal ); # perl 4 != 5.001l,m + $aNewLocal{"aKey"} = "this should never appear"; + print "SUB: ", $theArgument{"aKey"}, "\n"; + $aNewLocal{"aKey"} = "level $GlobalLevel"; # what should print + $GlobalLevel++; + if( $GlobalLevel<4 ) { + &test( *aNewLocal ); + } + } + + # Perl4: + # MAIN:global value + # SUB: global value + # SUB: level 0 + # SUB: level 1 + # SUB: level 2 + + # Perl5: + # MAIN:global value + # SUB: global value + # SUB: this should never appear + # SUB: this should never appear + # SUB: this should never appear + +=back + +=head2 Context Traps - scalar, list contexts + +=over 5 + +=item * (list context) + +The elements of argument lists for formats are now evaluated in list +context. This means you can interpolate list values now. + + @fmt = ("foo","bar","baz"); + format STDOUT= + @<<<<< @||||| @>>>>> + @fmt; + . + write; + + # perl4 errors: Please use commas to separate fields in file + # perl5 prints: foo bar baz + +=item * (scalar context) + +The C<caller()> function now returns a false value in a scalar context +if there is no caller. This lets library files determine if they're +being required. + + caller() ? (print "You rang?\n") : (print "Got a 0\n"); + + # perl4 errors: There is no caller + # perl5 prints: Got a 0 + +=item * (scalar context) + +The comma operator in a scalar context is now guaranteed to give a +scalar context to its arguments. + + @y= ('a','b','c'); + $x = (1, 2, @y); + print "x = $x\n"; + + # Perl4 prints: x = c # Thinks list context interpolates list + # Perl5 prints: x = 3 # Knows scalar uses length of list + +=item * (list, builtin) + +C<sprintf()> funkiness (array argument converted to scalar array count) +This test could be added to t/op/sprintf.t + + @z = ('%s%s', 'foo', 'bar'); + $x = sprintf(@z); + if ($x eq 'foobar') {print "ok 2\n";} else {print "not ok 2 '$x'\n";} + + # perl4 prints: ok 2 + # perl5 prints: not ok 2 + +C<printf()> works fine, though: + + printf STDOUT (@z); + print "\n"; + + # perl4 prints: foobar + # perl5 prints: foobar + +Probably a bug. + +=back + +=head2 Precedence Traps + +Perl4-to-Perl5 traps involving precedence order. + +=over 5 + +=item * Precedence + +LHS vs. RHS when both sides are getting an op. + + @arr = ( 'left', 'right' ); + $a{shift @arr} = shift @arr; + print join( ' ', keys %a ); + + # perl4 prints: left + # perl5 prints: right + +=item * Precedence + +These are now semantic errors because of precedence: + + @list = (1,2,3,4,5); + %map = ("a",1,"b",2,"c",3,"d",4); + $n = shift @list + 2; # first item in list plus 2 + print "n is $n, "; + $m = keys %map + 2; # number of items in hash plus 2 + print "m is $m\n"; + + # perl4 prints: n is 3, m is 6 + # perl5 errors and fails to compile + +=item * Precedence The precedence of assignment operators is now the same as the precedence of assignment. Perl 4 mistakenly gave them the precedence of the associated @@ -400,7 +956,7 @@ operator. So you now must parenthesize them in expressions like Otherwise - /foo/ ? $a += 2 : $a -= 2; + /foo/ ? $a += 2 : $a -= 2 would be erroneously parsed as @@ -412,111 +968,518 @@ On the other hand, now works as a C programmer would expect. -=item * +=item * Precedence -C<open FOO || die> is now incorrect. You need parens around the filehandle. -While temporarily supported, using such a construct will -generate a non-fatal (but non-suppressible) warning. + open FOO || die; -=item * +is now incorrect. You need parentheses around the filehandle. +Otherwise, perl5 leaves the statement as its default precedence: -The elements of argument lists for formats are now evaluated in list -context. This means you can interpolate list values now. + open(FOO || die); -=item * + # perl4 opens or dies + # perl5 errors: Precedence problem: open FOO should be open(FOO) -You can't do a C<goto> into a block that is optimized away. Darn. +=item * Precedence -=item * +perl4 gives the special variable, C<$:> precedence, where perl5 +treats C<$::> as main C<package> -It is no longer syntactically legal to use whitespace as the name -of a variable, or as a delimiter for any kind of quote construct. -Double darn. + $a = "x"; print "$::a"; -=item * + # perl 4 prints: -:a + # perl 5 prints: x -The caller() function now returns a false value in a scalar context if there -is no caller. This lets library files determine if they're being required. +=item * Precedence -=item * +concatenation precedence over filetest operator? + + -e $foo .= "q" + + # perl4 prints: no output + # perl5 prints: Can't modify -e in concatenation + +=item * Precedence + +Assignment to value takes precedence over assignment to key in +perl5 when using the shift operator on both sides. + + @arr = ( 'left', 'right' ); + $a{shift @arr} = shift @arr; + print join( ' ', keys %a ); + + # perl4 prints: left + # perl5 prints: right + +=back + +=head2 General Regular Expression Traps using s///, etc. + +All types of RE traps. + +=over 5 + +=item * Regular Expression + +C<s'$lhs'$rhs'> now does no interpolation on either side. It used to +interpolate C<$lhs> but not C<$rhs>. (And still does not match a literal +'$' in string) + + $a=1;$b=2; + $string = '1 2 $a $b'; + $string =~ s'$a'$b'; + print $string,"\n"; + + # perl4 prints: $b 2 $a $b + # perl5 prints: 1 2 $a $b + +=item * Regular Expression C<m//g> now attaches its state to the searched string rather than the -regular expression. +regular expression. (Once the scope of a block is left for the sub, the +state of the searched string is lost) -=item * + $_ = "ababab"; + while(m/ab/g){ + &doit("blah"); + } + sub doit{local($_) = shift; print "Got $_ "} -C<reverse> is no longer allowed as the name of a sort subroutine. + # perl4 prints: blah blah blah + # perl5 prints: infinite loop blah... -=item * +=item * Regular Expression -B<taintperl> is no longer a separate executable. There is now a B<-T> -switch to turn on tainting when it isn't turned on automatically. +Currently, if you use the C<m//o> qualifier on a regular expression +within an anonymous sub, I<all> closures generated from that anonymous +sub will use the regular expression as it was compiled when it was used +the very first time in any such closure. For instance, if you say -=item * + sub build_match { + my($left,$right) = @_; + return sub { $_[0] =~ /$left stuff $right/o; }; + } -Double-quoted strings may no longer end with an unescaped C<$> or C<@>. +build_match() will always return a sub which matches the contents of +C<$left> and C<$right> as they were the I<first> time that build_match() +was called, not as they are in the current call. -=item * +This is probably a bug, and may change in future versions of Perl. -The archaic C<while/if> BLOCK BLOCK syntax is no longer supported. +=item * Regular Expression +If no parentheses are used in a match, Perl4 sets C<$+> to +the whole match, just like C<$&>. Perl5 does not. -=item * + "abcdef" =~ /b.*e/; + print "\$+ = $+\n"; -Negative array subscripts now count from the end of the array. + # perl4 prints: bcde + # perl5 prints: -=item * +=item * Regular Expression -The comma operator in a scalar context is now guaranteed to give a -scalar context to its arguments. +substitution now returns the null string if it fails -=item * + $string = "test"; + $value = ($string =~ s/foo//); + print $value, "\n"; -The C<**> operator now binds more tightly than unary minus. -It was documented to work this way before, but didn't. + # perl4 prints: 0 + # perl5 prints: -=item * +Also see L<Numerical Traps> for another example of this new feature. -Setting C<$#array> lower now discards array elements. +=item * Regular Expression -=item * +C<s`lhs`rhs`> (using backticks) is now a normal substitution, with no +backtick expansion -delete() is not guaranteed to return the old value for tie()d arrays, -since this capability may be onerous for some modules to implement. + $string = ""; + $string =~ s`^`hostname`; + print $string, "\n"; -=item * + # perl4 prints: <the local hostname> + # perl5 prints: hostname + +=item * Regular Expression + +Stricter parsing of variables used in regular expressions + + s/^([^$grpc]*$grpc[$opt$plus$rep]?)//o; + + # perl4: compiles w/o error + # perl5: with Scalar found where operator expected ..., near "$opt$plus" + +an added component of this example, apparently from the same script, is +the actual value of the s'd string after the substitution. +C<[$opt]> is a character class in perl4 and an array subscript in perl5 + + $grpc = 'a'; + $opt = 'r'; + $_ = 'bar'; + s/^([^$grpc]*$grpc[$opt]?)/foo/; + print ; + + # perl4 prints: foo + # perl5 prints: foobar + +=item * Regular Expression + +Under perl5, C<m?x?> matches only once, like C<?x?>. Under perl4, it matched +repeatedly, like C</x/> or C<m!x!>. + + $test = "once"; + sub match { $test =~ m?once?; } + &match(); + if( &match() ) { + # m?x? matches more then once + print "perl4\n"; + } else { + # m?x? matches only once + print "perl5\n"; + } + + # perl4 prints: perl4 + # perl5 prints: perl5 + + +=item * Regular Expression + +Under perl4 and upto version 5.003, a failed C<m//g> match used to +reset the internal iterator, so that subsequent C<m//g> match attempts +began from the beginning of the string. In perl version 5.004 and later, +failed C<m//g> matches do not reset the iterator position (which can be +found using the C<pos()> function--see L<perlfunc/pos>). + + $test = "foop"; + for (1..3) { + print $1 while ($test =~ /(o)/g); + # pos $test = 0; # to get old behavior + } + + # perl4 prints: oooooo + # perl5.004 prints: oo + +You may always reset the iterator yourself as shown in the commented line +to get the old behavior. + +=back + +=head2 Subroutine, Signal, Sorting Traps + +The general group of Perl4-to-Perl5 traps having to do with +Signals, Sorting, and their related subroutines, as well as +general subroutine traps. Includes some OS-Specific traps. + +=over 5 + +=item * (Signals) + +Barewords that used to look like strings to Perl will now look like subroutine +calls if a subroutine by that name is defined before the compiler sees them. + + sub SeeYa { warn"Hasta la vista, baby!" } + $SIG{'TERM'} = SeeYa; + print "SIGTERM is now $SIG{'TERM'}\n"; + + # perl4 prints: SIGTERM is main'SeeYa + # perl5 prints: SIGTERM is now main::1 + +Use B<-w> to catch this one + +=item * (Sort Subroutine) + +reverse is no longer allowed as the name of a sort subroutine. + + sub reverse{ print "yup "; $a <=> $b } + print sort reverse a,b,c; + + # perl4 prints: yup yup yup yup abc + # perl5 prints: abc + +=item * warn() won't let you specify a filehandle. + +Although it _always_ printed to STDERR, warn() would let you specify a +filehandle in perl4. With perl5 it does not. + + warn STDERR "Foo!"; + + # perl4 prints: Foo! + # perl5 prints: String found where operator expected + +=back + +=head2 OS Traps + +=over 5 + +=item * (SysV) + +Under HPUX, and some other SysV OSes, one had to reset any signal handler, +within the signal handler function, each time a signal was handled with +perl4. With perl5, the reset is now done correctly. Any code relying +on the handler _not_ being reset will have to be reworked. + +Since version 5.002, Perl uses sigaction() under SysV. + + sub gotit { + print "Got @_... "; + } + $SIG{'INT'} = 'gotit'; + + $| = 1; + $pid = fork; + if ($pid) { + kill('INT', $pid); + sleep(1); + kill('INT', $pid); + } else { + while (1) {sleep(10);} + } + + # perl4 (HPUX) prints: Got INT... + # perl5 (HPUX) prints: Got INT... Got INT... + +=item * (SysV) + +Under SysV OSes, C<seek()> on a file opened to append C<E<gt>E<gt>> now does +the right thing w.r.t. the fopen() manpage. e.g., - When a file is opened +for append, it is impossible to overwrite information already in +the file. + + open(TEST,">>seek.test"); + $start = tell TEST ; + foreach(1 .. 9){ + print TEST "$_ "; + } + $end = tell TEST ; + seek(TEST,$start,0); + print TEST "18 characters here"; + + # perl4 (solaris) seek.test has: 18 characters here + # perl5 (solaris) seek.test has: 1 2 3 4 5 6 7 8 9 18 characters here + + + +=back + +=head2 Interpolation Traps + +Perl4-to-Perl5 traps having to do with how things get interpolated +within certain expressions, statements, contexts, or whatever. + +=over 5 + +=item * Interpolation + +@ now always interpolates an array in double-quotish strings. + + print "To: someone@somewhere.com\n"; + + # perl4 prints: To:someone@somewhere.com + # perl5 errors : In string, @somewhere now must be written as \@somewhere + +=item * Interpolation + +Double-quoted strings may no longer end with an unescaped $ or @. + + $foo = "foo$"; + $bar = "bar@"; + print "foo is $foo, bar is $bar\n"; + + # perl4 prints: foo is foo$, bar is bar@ + # perl5 errors: Final $ should be \$ or $name + +Note: perl5 DOES NOT error on the terminating @ in $bar + +=item * Interpolation + +Perl now sometimes evaluates arbitrary expressions inside braces that occur +within double quotes (usually when the opening brace is preceded by C<$> +or C<@>). + + @www = "buz"; + $foo = "foo"; + $bar = "bar"; + sub foo { return "bar" }; + print "|@{w.w.w}|${main'foo}|"; + + # perl4 prints: |@{w.w.w}|foo| + # perl5 prints: |buz|bar| + +Note that you can C<use strict;> to ward off such trappiness under perl5. + +=item * Interpolation The construct "this is $$x" used to interpolate the pid at that -point, but now tries to dereference $x. C<$$> by itself still +point, but now apparently tries to dereference C<$x>. C<$$> by itself still works fine, however. -=item * + print "this is $$x\n"; -The meaning of foreach has changed slightly when it is iterating over a -list which is not an array. This used to assign the list to a -temporary array, but no longer does so (for efficiency). This means -that you'll now be iterating over the actual values, not over copies of -the values. Modifications to the loop variable can change the original -values. To retain Perl 4 semantics you need to assign your list -explicitly to a temporary array and then iterate over that. For -example, you might need to change + # perl4 prints: this is XXXx (XXX is the current pid) + # perl5 prints: this is + +=item * Interpolation + +Creation of hashes on the fly with C<eval "EXPR"> now requires either both +C<$>'s to be protected in the specification of the hash name, or both curlies +to be protected. If both curlies are protected, the result will be compatible +with perl4 and perl5. This is a very common practice, and should be changed +to use the block form of C<eval{}> if possible. + + $hashname = "foobar"; + $key = "baz"; + $value = 1234; + eval "\$$hashname{'$key'} = q|$value|"; + (defined($foobar{'baz'})) ? (print "Yup") : (print "Nope"); - foreach $var (grep /x/, @list) { ... } + # perl4 prints: Yup + # perl5 prints: Nope + +Changing + + eval "\$$hashname{'$key'} = q|$value|"; to - foreach $var (my @tmp = grep /x/, @list) { ... } + eval "\$\$hashname{'$key'} = q|$value|"; -Otherwise changing C<$var> will clobber the values of @list. (This most often -happens when you use C<$_> for the loop variable, and call subroutines in -the loop that don't properly localize C<$_>.) +causes the following result: -=item * + # perl4 prints: Nope + # perl5 prints: Yup -Some error messages will be different. +or, changing to -=item * + eval "\$$hashname\{'$key'\} = q|$value|"; + +causes the following result: + + # perl4 prints: Yup + # perl5 prints: Yup + # and is compatible for both versions + + +=item * Interpolation + +perl4 programs which unconsciously rely on the bugs in earlier perl versions. + + perl -e '$bar=q/not/; print "This is $foo{$bar} perl5"' + + # perl4 prints: This is not perl5 + # perl5 prints: This is perl5 + +=item * Interpolation + +You also have to be careful about array references. + + print "$foo{" + + perl 4 prints: { + perl 5 prints: syntax error + +=item * Interpolation -Some bugs may have been inadvertently removed. +Similarly, watch out for: + + $foo = "array"; + print "\$$foo{bar}\n"; + + # perl4 prints: $array{bar} + # perl5 prints: $ + +Perl 5 is looking for C<$array{bar}> which doesn't exist, but perl 4 is +happy just to expand $foo to "array" by itself. Watch out for this +especially in C<eval>'s. + +=item * Interpolation + +C<qq()> string passed to C<eval> + + eval qq( + foreach \$y (keys %\$x\) { + \$count++; + } + ); + + # perl4 runs this ok + # perl5 prints: Can't find string terminator ")" =back + +=head2 DBM Traps + +General DBM traps. + +=over 5 + +=item * DBM + +Existing dbm databases created under perl4 (or any other dbm/ndbm tool) +may cause the same script, run under perl5, to fail. The build of perl5 +must have been linked with the same dbm/ndbm as the default for C<dbmopen()> +to function properly without C<tie>'ing to an extension dbm implementation. + + dbmopen (%dbm, "file", undef); + print "ok\n"; + + # perl4 prints: ok + # perl5 prints: ok (IFF linked with -ldbm or -lndbm) + + +=item * DBM + +Existing dbm databases created under perl4 (or any other dbm/ndbm tool) +may cause the same script, run under perl5, to fail. The error generated +when exceeding the limit on the key/value size will cause perl5 to exit +immediately. + + dbmopen(DB, "testdb",0600) || die "couldn't open db! $!"; + $DB{'trap'} = "x" x 1024; # value too large for most dbm/ndbm + print "YUP\n"; + + # perl4 prints: + dbm store returned -1, errno 28, key "trap" at - line 3. + YUP + + # perl5 prints: + dbm store returned -1, errno 28, key "trap" at - line 3. + +=back + +=head2 Unclassified Traps + +Everything else. + +=over 5 + +=item * Unclassified + +C<require>/C<do> trap using returned value + +If the file doit.pl has: + + sub foo { + $rc = do "./do.pl"; + return 8; + } + print &foo, "\n"; + +And the do.pl file has the following single line: + + return 3; + +Running doit.pl gives the following: + + # perl 4 prints: 3 (aborts the subroutine early) + # perl 5 prints: 8 + +Same behavior if you replace C<do> with C<require>. + +=back + +As always, if any of these are ever officially declared as bugs, +they'll be fixed and removed. + diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 3d1c195007..198e5c12a3 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -7,7 +7,7 @@ perlvar - Perl predefined variables =head2 Predefined Names The following names have special meaning to Perl. Most of the -punctuational names have reasonable mnemonics, or analogues in one of +punctuation names have reasonable mnemonics, or analogues in one of the shells. Nevertheless, if you wish to use the long variable names, you just need to say @@ -51,8 +51,8 @@ a reference, you'll raise a run-time exception. The default input and pattern-searching space. The following pairs are equivalent: - while (<>) {...} # only equivalent in while! - while ($_ = <>) {...} + while (<>) {...} # equivalent in only while! + while (defined($_ = <>)) {...} /^Subject:/ $_ =~ /^Subject:/ @@ -63,7 +63,7 @@ equivalent: chop chop($_) -Here are the places where Perl will assume $_ even if you +Here are the places where Perl will assume $_ even if you don't use it: =over 3 @@ -83,16 +83,16 @@ Various list functions like print() and unlink(). The pattern matching operations C<m//>, C<s///>, and C<tr///> when used without an C<=~> operator. -=item * +=item * The default iterator variable in a C<foreach> loop if no other variable is supplied. -=item * +=item * The implicit iterator variable in the grep() and map() functions. -=item * +=item * The default place to put an input record when a C<E<lt>FHE<gt>> operation's result is tested by itself as the sole criterion of a C<while> @@ -102,7 +102,11 @@ test. Note that outside of a C<while> test, this will not happen. (Mnemonic: underline is understood in certain operations.) -=item $<I<digit>> +=back + +=over 8 + +=item $E<lt>I<digit>E<gt> Contains the subpattern from the corresponding set of parentheses in the last pattern matched, not counting patterns matched in nested @@ -123,7 +127,7 @@ BLOCK). (Mnemonic: like & in some editors.) This variable is read-only. The string preceding whatever was matched by the last successful pattern match (not counting any matches hidden within a BLOCK or eval -enclosed by the current BLOCK). (Mnemonic: ` often precedes a quoted +enclosed by the current BLOCK). (Mnemonic: C<`> often precedes a quoted string.) This variable is read-only. =item $POSTMATCH @@ -132,7 +136,7 @@ string.) This variable is read-only. The string following whatever was matched by the last successful pattern match (not counting any matches hidden within a BLOCK or eval() -enclosed by the current BLOCK). (Mnemonic: ' often follows a quoted +enclosed by the current BLOCK). (Mnemonic: C<'> often follows a quoted string.) Example: $_ = 'abcdefghi'; @@ -158,15 +162,15 @@ This variable is read-only. =item $* -Set to 1 to do multiline matching within a string, 0 to tell Perl +Set to 1 to do multi-line matching within a string, 0 to tell Perl that it can assume that strings contain a single line, for the purpose of optimizing pattern matches. Pattern matches on strings containing multiple newlines can produce confusing results when "C<$*>" is 0. Default is 0. (Mnemonic: * matches multiple things.) Note that this variable -only influences the interpretation of "C<^>" and "C<$>". A literal newline can +influences the interpretation of only "C<^>" and "C<$>". A literal newline can be searched for even when C<$* == 0>. -Use of "C<$*>" is deprecated in Perl 5. +Use of "C<$*>" is deprecated in modern perls. =item input_line_number HANDLE EXPR @@ -176,8 +180,9 @@ Use of "C<$*>" is deprecated in Perl 5. =item $. -The current input line number of the last filehandle that was read. An -explicit close on the filehandle resets the line number. Since +The current input line number for the last file handle from +which you read (or performed a C<seek> or C<tell> on). An +explicit close on a filehandle resets the line number. Because "C<E<lt>E<gt>>" never does an explicit close, line numbers increase across ARGV files (but see examples under eof()). Localizing C<$.> has the effect of also localizing Perl's notion of "the last read @@ -193,33 +198,39 @@ number.) =item $/ The input record separator, newline by default. Works like B<awk>'s RS -variable, including treating blank lines as delimiters if set to the -null string. You may set it to a multicharacter string to match a -multi-character delimiter. Note that setting it to C<"\n\n"> means -something slightly different than setting it to C<"">, if the file -contains consecutive blank lines. Setting it to C<""> will treat two or -more consecutive blank lines as a single blank line. Setting it to -C<"\n\n"> will blindly assume that the next input character belongs to the -next paragraph, even if it's a newline. (Mnemonic: / is used to -delimit line boundaries when quoting poetry.) +variable, including treating empty lines as delimiters if set to the +null string. (Note: An empty line cannot contain any spaces or tabs.) +You may set it to a multi-character string to match a multi-character +delimiter, or to C<undef> to read to end of file. Note that setting it +to C<"\n\n"> means something slightly different than setting it to +C<"">, if the file contains consecutive empty lines. Setting it to +C<""> will treat two or more consecutive empty lines as a single empty +line. Setting it to C<"\n\n"> will blindly assume that the next input +character belongs to the next paragraph, even if it's a newline. +(Mnemonic: / is used to delimit line boundaries when quoting poetry.) undef $/; $_ = <FH>; # whole file now here s/\n[ \t]+/ /g; +Remember: the value of $/ is a string, not a regexp. AWK has to be +better for something :-) + =item autoflush HANDLE EXPR =item $OUTPUT_AUTOFLUSH =item $| -If set to nonzero, forces a flush after every write or print on the -currently selected output channel. Default is 0. Note that STDOUT -will typically be line buffered if output is to the terminal and block -buffered otherwise. Setting this variable is useful primarily when you -are outputting to a pipe, such as when you are running a Perl script -under rsh and want to see the output as it's happening. This has no -effect on input buffering. +If set to nonzero, forces a flush right away and after every write or print on the +currently selected output channel. Default is 0 (regardless of whether +the channel is actually buffered by the system or not; C<$|> tells you +only whether you've asked Perl explicitly to flush after each write). +Note that STDOUT will typically be line buffered if output is to the +terminal and block buffered otherwise. Setting this variable is useful +primarily when you are outputting to a pipe, such as when you are running +a Perl script under rsh and want to see the output as it's happening. This +has no effect on input buffering. (Mnemonic: when you want your pipes to be piping hot.) =item output_field_separator HANDLE EXPR @@ -231,8 +242,8 @@ effect on input buffering. =item $, The output field separator for the print operator. Ordinarily the -print operator simply prints out the comma separated fields you -specify. In order to get behavior more like B<awk>, set this variable +print operator simply prints out the comma-separated fields you +specify. To get behavior more like B<awk>, set this variable as you would set B<awk>'s OFS variable to specify what is printed between fields. (Mnemonic: what is printed when there is a , in your print statement.) @@ -246,12 +257,12 @@ print statement.) =item $\ The output record separator for the print operator. Ordinarily the -print operator simply prints out the comma separated fields you -specify, with no trailing newline or record separator assumed. In -order to get behavior more like B<awk>, set this variable as you would +print operator simply prints out the comma-separated fields you +specify, with no trailing newline or record separator assumed. +To get behavior more like B<awk>, set this variable as you would set B<awk>'s ORS variable to specify what is printed at the end of the print. (Mnemonic: you set "C<$\>" instead of adding \n at the end of the -print. Also, it's just like /, but it's what you get "back" from +print. Also, it's just like C<$/>, but it's what you get "back" from Perl.) =item $LIST_SEPARATOR @@ -268,7 +279,7 @@ is a space. (Mnemonic: obvious, I think.) =item $; -The subscript separator for multi-dimensional array emulation. If you +The subscript separator for multidimensional array emulation. If you refer to a hash element as $foo{$a,$b,$c} @@ -291,7 +302,7 @@ keys contain binary data there might not be any safe value for "C<$;>". semi-semicolon. Yeah, I know, it's pretty lame, but "C<$,>" is already taken for something more important.) -Consider using "real" multi-dimensional arrays in Perl 5. +Consider using "real" multidimensional arrays. =item $OFMT @@ -300,11 +311,12 @@ Consider using "real" multi-dimensional arrays in Perl 5. The output format for printed numbers. This variable is a half-hearted attempt to emulate B<awk>'s OFMT variable. There are times, however, when B<awk> and Perl have differing notions of what is in fact -numeric. Also, the initial value is %.20g rather than %.6g, so you -need to set "C<$#>" explicitly to get B<awk>'s value. (Mnemonic: # is the -number sign.) +numeric. The initial value is %.I<n>g, where I<n> is the value +of the macro DBL_DIG from your system's F<float.h>. This is different from +B<awk>'s default OFMT setting of %.6g, so you need to set "C<$#>" +explicitly to get B<awk>'s value. (Mnemonic: # is the number sign.) -Use of "C<$#>" is deprecated in Perl 5. +Use of "C<$#>" is deprecated. =item format_page_number HANDLE EXPR @@ -360,7 +372,7 @@ appended. (Mnemonic: points to top of page.) =item $: The current set of characters after which a string may be broken to -fill continuation fields (starting with ^) in a format. Default is +fill continuation fields (starting with ^) in a format. Default is S<" \n-">, to break on whitespace or hyphens. (Mnemonic: a "colon" in poetry is a part of a line.) @@ -370,7 +382,7 @@ poetry is a part of a line.) =item $^L -What formats output to perform a formfeed. Default is \f. +What formats output to perform a form feed. Default is \f. =item $ACCUMULATOR @@ -389,10 +401,22 @@ L<perlfunc/formline()>. The status returned by the last pipe close, backtick (C<``>) command, or system() operator. Note that this is the status word returned by -the wait() system call, so the exit value of the subprocess is actually -(C<$? E<gt>E<gt> 8>). Thus on many systems, C<$? & 255> gives which signal, -if any, the process died from, and whether there was a core dump. -(Mnemonic: similar to B<sh> and B<ksh>.) +the wait() system call (or else is made up to look like it). Thus, +the exit value of the subprocess is actually (C<$? E<gt>E<gt> 8>), and +C<$? & 255> gives which signal, if any, the process died from, and +whether there was a core dump. (Mnemonic: similar to B<sh> and +B<ksh>.) + +Note that if you have installed a signal handler for C<SIGCHLD>, the +value of C<$?> will usually be wrong outside that handler. + +Inside an C<END> subroutine C<$?> contains the value that is going to be +given to C<exit()>. You can modify C<$?> in an C<END> subroutine to +change the exit status of the script. + +Under VMS, the pragma C<use vmsish 'status'> makes C<$?> reflect the +actual VMS exit status, instead of the default emulation of POSIX +status. =item $OS_ERROR @@ -405,7 +429,7 @@ all the usual caveats. (This means that you shouldn't depend on the value of "C<$!>" to be anything in particular unless you've gotten a specific error return indicating a system error.) If used in a string context, yields the corresponding system error string. You can assign -to "C<$!>" in order to set I<errno> if, for instance, you want "C<$!>" to return the +to "C<$!>" to set I<errno> if, for instance, you want "C<$!>" to return the string for error I<n>, or you want to set the exit value for the die() operator. (Mnemonic: What just went bang?) @@ -413,13 +437,16 @@ operator. (Mnemonic: What just went bang?) =item $^E -More specific information about the last system error than that -provided by C<$!>, if available. (If not, it's just C<$!> again.) -At the moment, this differs from C<$!> only under VMS, where it -provides the VMS status value from the last system error. The +More specific information about the last system error than that provided by +C<$!>, if available. (If not, it's just C<$!> again, except under OS/2.) +At the moment, this differs from C<$!> under only VMS and OS/2, where it +provides the VMS status value from the last system error, and OS/2 error +code of the last call to OS/2 API which was not directed via CRT. The caveats mentioned in the description of C<$!> apply here, too. (Mnemonic: Extra error explanation.) +Note that under OS/2 C<$!> and C<$^E> do not track each other, so if an +OS/2-specific call is performed, you may need to check both. =item $EVAL_ERROR @@ -431,7 +458,8 @@ invoked may have failed in the normal fashion). (Mnemonic: Where was the syntax error "at"?) Note that warning messages are not collected in this variable. You can, -however, set up a routine to process warnings by setting $SIG{__WARN__} below. +however, set up a routine to process warnings by setting C<$SIG{__WARN__}> +as described below. =item $PROCESS_ID @@ -462,8 +490,9 @@ The effective uid of this process. Example: $< = $>; # set real to effective uid ($<,$>) = ($>,$<); # swap real and effective uid -(Mnemonic: it's the uid you went I<TO>, if you're running setuid.) Note: -"C<$E<lt>>" and "C<$E<gt>>" can only be swapped on machines supporting setreuid(). +(Mnemonic: it's the uid you went I<TO>, if you're running setuid.) +Note: "C<$E<lt>>" and "C<$E<gt>>" can be swapped only on machines +supporting setreuid(). =item $REAL_GROUP_ID @@ -475,8 +504,14 @@ The real gid of this process. If you are on a machine that supports membership in multiple groups simultaneously, gives a space separated list of groups you are in. The first number is the one returned by getgid(), and the subsequent ones by getgroups(), one of which may be -the same as the first number. (Mnemonic: parentheses are used to I<GROUP> -things. The real gid is the group you I<LEFT>, if you're running setgid.) +the same as the first number. + +However, a value assigned to "C<$(>" must be a single number used to +set the real gid. So the value given by "C<$(>" should I<not> be assigned +back to "C<$(>" without being forced numeric, such as by adding zero. + +(Mnemonic: parentheses are used to I<GROUP> things. The real gid is the +group you I<LEFT>, if you're running setgid.) =item $EFFECTIVE_GROUP_ID @@ -488,21 +523,29 @@ The effective gid of this process. If you are on a machine that supports membership in multiple groups simultaneously, gives a space separated list of groups you are in. The first number is the one returned by getegid(), and the subsequent ones by getgroups(), one of -which may be the same as the first number. (Mnemonic: parentheses are -used to I<GROUP> things. The effective gid is the group that's I<RIGHT> for -you, if you're running setgid.) +which may be the same as the first number. + +Similarly, a value assigned to "C<$)>" must also be a space-separated +list of numbers. The first number is used to set the effective gid, and +the rest (if any) are passed to setgroups(). To get the effect of an +empty list for setgroups(), just repeat the new effective gid; that is, +to force an effective gid of 5 and an effectively empty setgroups() +list, say C< $) = "5 5" >. + +(Mnemonic: parentheses are used to I<GROUP> things. The effective gid +is the group that's I<RIGHT> for you, if you're running setgid.) -Note: "C<$E<lt>>", "C<$E<gt>>", "C<$(>" and "C<$)>" can only be set on machines -that support the corresponding I<set[re][ug]id()> routine. "C<$(>" and "C<$)>" -can only be swapped on machines supporting setregid(). Because Perl doesn't -currently use initgroups(), you can't set your group vector to multiple groups. +Note: "C<$E<lt>>", "C<$E<gt>>", "C<$(>" and "C<$)>" can be set only on +machines that support the corresponding I<set[re][ug]id()> routine. "C<$(>" +and "C<$)>" can be swapped only on machines supporting setregid(). =item $PROGRAM_NAME =item $0 Contains the name of the file containing the Perl script being -executed. Assigning to "C<$0>" modifies the argument area that the ps(1) +executed. On some operating systems +assigning to "C<$0>" modifies the argument area that the ps(1) program sees. This is more useful as a way of indicating the current program state than it is for hiding the program you're running. (Mnemonic: same as B<sh> and B<ksh>.) @@ -523,24 +566,15 @@ discouraged. =item $] -The string printed out when you say C<perl -v>. -(This is currently I<BROKEN>). -It can be used to -determine at the beginning of a script whether the perl interpreter -executing the script is in the right range of versions. If used in a -numeric context, returns the version + patchlevel / 1000. Example: - - # see if getc is available - ($version,$patchlevel) = - $] =~ /(\d+\.\d+).*\nPatch level: (\d+)/; - print STDERR "(No filename completion available.)\n" - if $version * 1000 + $patchlevel < 2016; - -or, used numerically, +The version + patchlevel / 1000 of the Perl interpreter. This variable +can be used to determine whether the Perl interpreter executing a +script is in the right range of versions. (Mnemonic: Is this version +of perl in the right bracket?) Example: warn "No checksumming!\n" if $] < 3.019; -(Mnemonic: Is this version of perl in the right bracket?) +See also the documentation of C<use VERSION> and C<require VERSION> +for a convenient way to fail if the Perl interpreter is too old. =item $DEBUGGING @@ -561,6 +595,11 @@ closed before the open() is attempted.) Note that the close-on-exec status of a file descriptor will be decided according to the value of C<$^F> at the time of the open, not the time of the exec. +=item $^H + +The current set of syntax checks enabled by C<use strict>. See the +documentation of C<strict> for more details. + =item $INPLACE_EDIT =item $^I @@ -569,6 +608,7 @@ The current value of the inplace-edit extension. Use C<undef> to disable inplace editing. (Mnemonic: value of B<-i> switch.) =item $OSNAME + =item $^O The name of the operating system under which this copy of Perl was @@ -588,7 +628,7 @@ it. =item $^T The time at which the script began running, in seconds since the -epoch (beginning of 1970). The values returned by the B<-M>, B<-A> +epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, and B<-C> filetests are based on this value. @@ -596,8 +636,8 @@ based on this value. =item $^W -The current value of the warning switch, either TRUE or FALSE. (Mnemonic: related to the -B<-w> switch.) +The current value of the warning switch, either TRUE or FALSE. +(Mnemonic: related to the B<-w> switch.) =item $EXECUTABLE_NAME @@ -607,13 +647,13 @@ The name that the Perl binary itself was executed as, from C's C<argv[0]>. =item $ARGV -contains the name of the current file when reading from <>. +contains the name of the current file when reading from E<lt>E<gt>. =item @ARGV The array @ARGV contains the command line arguments intended for the script. Note that C<$#ARGV> is the generally number of arguments minus -one, since C<$ARGV[0]> is the first argument, I<NOT> the command name. See +one, because C<$ARGV[0]> is the first argument, I<NOT> the command name. See "C<$0>" for the command name. =item @INC @@ -621,14 +661,14 @@ one, since C<$ARGV[0]> is the first argument, I<NOT> the command name. See The array @INC contains the list of places to look for Perl scripts to be evaluated by the C<do EXPR>, C<require>, or C<use> constructs. It initially consists of the arguments to any B<-I> command line switches, -followed by the default Perl library, probably "/usr/local/lib/perl", +followed by the default Perl library, probably F</usr/local/lib/perl>, followed by ".", to represent the current directory. If you need to -modify this at runtime, you should use the C<use lib> pragma in order -to also get the machine-dependent library properly loaded: +modify this at runtime, you should use the C<use lib> pragma +to get the machine-dependent library properly loaded also: use lib '/mypath/libdir/'; use SomeMod; - + =item %INC The hash %INC contains entries for each filename that has @@ -660,7 +700,7 @@ signals. Example: $SIG{'INT'} = 'DEFAULT'; # restore default action $SIG{'QUIT'} = 'IGNORE'; # ignore SIGQUIT -The %SIG array only contains values for the signals actually set within +The %SIG array contains values for only the signals actually set within the Perl script. Here are some other examples: $SIG{PIPE} = Plumber; # SCARY!! @@ -669,12 +709,28 @@ the Perl script. Here are some other examples: $SIG{"PIPE"} = Plumber(); # oops, what did Plumber() return?? The one marked scary is problematic because it's a bareword, which means -sometimes it's a string representing the function, and sometimes it's +sometimes it's a string representing the function, and sometimes it's going to call the subroutine call right then and there! Best to be sure -and quote it or take a reference to it. *Plumber works too. See L<perlsubs>. +and quote it or take a reference to it. *Plumber works too. See L<perlsub>. + +If your system has the sigaction() function then signal handlers are +installed using it. This means you get reliable signal handling. If +your system has the SA_RESTART flag it is used when signals handlers are +installed. This means that system calls for which it is supported +continue rather than returning when a signal arrives. If you want your +system calls to be interrupted by signal delivery then do something like +this: + + use POSIX ':signal_h'; + + my $alarm = 0; + sigaction SIGALRM, new POSIX::SigAction sub { $alarm = 1 } + or die "Error setting SIGALRM handler: $!\n"; + +See L<POSIX>. Certain internal hooks can be also set using the %SIG hash. The -routine indicated by $SIG{__WARN__} is called when a warning message is +routine indicated by C<$SIG{__WARN__}> is called when a warning message is about to be printed. The warning message is passed as the first argument. The presence of a __WARN__ hook causes the ordinary printing of warnings to STDERR to be suppressed. You can use this to save warnings @@ -683,13 +739,27 @@ in a variable, or turn warnings into fatal errors, like this: local $SIG{__WARN__} = sub { die $_[0] }; eval $proggie; -The routine indicated by $SIG{__DIE__} is called when a fatal exception +The routine indicated by C<$SIG{__DIE__}> is called when a fatal exception is about to be thrown. The error message is passed as the first argument. When a __DIE__ hook routine returns, the exception processing continues as it would have in the absence of the hook, unless the hook routine itself exits via a C<goto>, a loop exit, or a die(). -The __DIE__ handler is explicitly disabled during the call, so that you -can die from a __DIE__ handler. Similarly for __WARN__. +The C<__DIE__> handler is explicitly disabled during the call, so that you +can die from a C<__DIE__> handler. Similarly for C<__WARN__>. See +L<perlfunc/die>, L<perlfunc/warn> and L<perlfunc/eval>. -=back +=item $^M + +By default, running out of memory it is not trappable. However, if +compiled for this, Perl may use the contents of C<$^M> as an emergency +pool after die()ing with this message. Suppose that your Perl were +compiled with -DEMERGENCY_SBRK and used Perl's malloc. Then + $^M = 'a' x (1<<16); + +would allocate a 64K buffer for use when in emergency. See the F<INSTALL> +file for information on how to enable this option. As a disincentive to +casual use of this advanced feature, there is no L<English> long name for +this variable. + +=back diff --git a/pod/perlxs.pod b/pod/perlxs.pod index 191a78fe89..13ad669531 100644 --- a/pod/perlxs.pod +++ b/pod/perlxs.pod @@ -167,7 +167,21 @@ be received by Perl as the return value of the XSUB. If the XSUB has a return type of C<void> then the compiler will not supply a RETVAL variable for that function. When using -the PPCODE: directive the RETVAL variable may not be needed. +the PPCODE: directive the RETVAL variable is not needed, unless used +explicitly. + +If PPCODE: directive is not used, C<void> return value should be used +only for subroutines which do not return a value, I<even if> CODE: +directive is used which sets ST(0) explicitly. + +Older versions of this document recommended to use C<void> return +value in such cases. It was discovered that this could lead to +segfaults in cases when XSUB was I<truely> C<void>. This practice is +now deprecated, and may be not supported at some future version. Use +the return value C<SV *> in such cases. (Currently C<xsubpp> contains +some heuristic code which tries to disambiguate between "truely-void" +and "old-practice-declared-as-void" functions. Hence your code is at +mercy of this heuristics unless you use C<SV *> as return value.) =head2 The MODULE Keyword @@ -275,7 +289,7 @@ its parameters. The Perl usage is given first. $status = rpcb_gettime( "localhost", $timep ); -The XSUB follows. +The XSUB follows. bool_t rpcb_gettime(host,timep) @@ -305,7 +319,7 @@ above, this keyword does not affect the way the compiler handles RETVAL. =head2 The NO_INIT Keyword The NO_INIT keyword is used to indicate that a function -parameter is being used as only an output value. The B<xsubpp> +parameter is being used only as an output value. The B<xsubpp> compiler will normally generate code to read the values of all function parameters from the argument stack and assign them to C variables upon entry to the function. NO_INIT @@ -314,7 +328,7 @@ output rather than for input and that they will be handled before the function terminates. The following example shows a variation of the rpcb_gettime() function. -This function uses the timep variable as only an output variable and does +This function uses the timep variable only as an output variable and does not care about its initial contents. bool_t @@ -416,6 +430,23 @@ A correct, but error-prone example. timep RETVAL +=head2 The SCOPE: Keyword + +The SCOPE: keyword allows scoping to be enabled for a particular XSUB. If +enabled, the XSUB will invoke ENTER and LEAVE automatically. + +To support potentially complex type mappings, if a typemap entry used +by this XSUB contains a comment like C</*scope*/> then scoping will +automatically be enabled for that XSUB. + +To enable scoping: + + SCOPE: ENABLE + +To disable scoping: + + SCOPE: DISABLE + =head2 The INPUT: Keyword The XSUB's parameters are usually evaluated immediately after entering the @@ -543,7 +574,7 @@ the following statement. =head2 Returning Undef And Empty Lists -Occasionally the programmer will want to simply return +Occasionally the programmer will want to return simply C<undef> or an empty list if a function fails rather than a separate status value. The rpcb_gettime() function offers just this situation. If the function succeeds we would like @@ -553,13 +584,13 @@ of $timep will either be undef or it will be a valid time. $timep = rpcb_gettime( "localhost" ); -The following XSUB uses the C<void> return type to disable the generation of -the RETVAL variable and uses a CODE: block to indicate to the compiler +The following XSUB uses the C<SV *> return type as a mneumonic only, +and uses a CODE: block to indicate to the compiler that the programmer has supplied all the necessary code. The sv_newmortal() call will initialize the return value to undef, making that the default return value. - void + SV * rpcb_gettime(host) char * host PREINIT: @@ -573,7 +604,7 @@ the default return value. The next example demonstrates how one would place an explicit undef in the return value, should the need arise. - void + SV * rpcb_gettime(host) char * host PREINIT: @@ -614,7 +645,7 @@ other C<XSRETURN> macros. The REQUIRE: keyword is used to indicate the minimum version of the B<xsubpp> compiler needed to compile the XS module. An XS module which -contains the following statement will only compile with B<xsubpp> version +contains the following statement will compile with only B<xsubpp> version 1.922 or greater: REQUIRE: 1.922 @@ -647,7 +678,7 @@ terminate the code block. =head2 The VERSIONCHECK: Keyword The VERSIONCHECK: keyword corresponds to B<xsubpp>'s C<-versioncheck> and -C<-noversioncheck> options. This keyword overrides the commandline +C<-noversioncheck> options. This keyword overrides the command line options. Version checking is enabled by default. When version checking is enabled the XS module will attempt to verify that its version matches the version of the PM module. @@ -663,7 +694,7 @@ To disable version checking: =head2 The PROTOTYPES: Keyword The PROTOTYPES: keyword corresponds to B<xsubpp>'s C<-prototypes> and -C<-noprototypes> options. This keyword overrides the commandline options. +C<-noprototypes> options. This keyword overrides the command line options. Prototypes are enabled by default. When prototypes are enabled XSUBs will be given Perl prototypes. This keyword may be used multiple times in an XS module to enable and disable prototypes for different parts of the module. @@ -700,7 +731,7 @@ prototypes. =head2 The ALIAS: Keyword -The ALIAS: keyword allows an XSUB to have two more more unique Perl names +The ALIAS: keyword allows an XSUB to have two more unique Perl names and to know which of those names was used when it was invoked. The Perl names may be fully-qualified with package names. Each alias is given an index. The compiler will setup a variable called C<ix> which contain the @@ -760,8 +791,8 @@ variable (see L<"The ALIAS: Keyword">), or maybe via the C<items> variable B<default> case if it is not associated with a conditional. The following example shows CASE switched via C<ix> with a function C<rpcb_gettime()> having an alias C<x_gettime()>. When the function is called as -C<rpcb_gettime()> it's parameters are the usual C<(char *host, time_t *timep)>, -but when the function is called as C<x_gettime()> is parameters are +C<rpcb_gettime()> its parameters are the usual C<(char *host, time_t *timep)>, +but when the function is called as C<x_gettime()> its parameters are reversed, C<(time_t *timep, char *host)>. long @@ -827,17 +858,17 @@ C<&> through, so the function call looks like C<rpcb_gettime(host, &timep)>. =head2 Inserting Comments and C Preprocessor Directives C preprocessor directives are allowed within BOOT:, PREINIT: INIT:, -CODE:, PPCODE: and CLEANUP: blocks, as well as outside the functions. +CODE:, PPCODE:, and CLEANUP: blocks, as well as outside the functions. Comments are allowed anywhere after the MODULE keyword. The compiler will pass the preprocessor directives through untouched and will remove the commented lines. + Comments can be added to XSUBs by placing a C<#> as the first non-whitespace of a line. Care should be taken to avoid making the comment look like a C preprocessor directive, lest it be interpreted as such. The simplest way to prevent this is to put whitespace in front of the C<#>. - If you use preprocessor directives to choose one of two versions of a function, use @@ -936,7 +967,7 @@ example. # char* having the name of the package for the blessing. O_OBJECT sv_setref_pv( $arg, CLASS, (void*)$var ); - + INPUT O_OBJECT if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) @@ -1085,7 +1116,7 @@ File C<RPC.xs>: Interface to some ONC+ RPC bind library functions. MODULE = RPC PACKAGE = RPC - void + SV * rpcb_gettime(host="localhost") char *host PREINIT: @@ -1146,5 +1177,5 @@ This document covers features supported by C<xsubpp> 1.935. =head1 AUTHOR -Dean Roehrich F<E<lt>roehrich@cray.comE<gt>> -Mar 12, 1996 +Dean Roehrich <F<roehrich@cray.com>> +Jul 8, 1996 diff --git a/pod/perlxstut.pod b/pod/perlxstut.pod index 7fea4210a9..cdd4344b78 100644 --- a/pod/perlxstut.pod +++ b/pod/perlxstut.pod @@ -1,6 +1,6 @@ =head1 NAME -perlXStut - Tutorial for XSUB's +perlXStut - Tutorial for XSUBs =head1 DESCRIPTION @@ -10,8 +10,8 @@ L<perlxs>. This tutorial starts with very simple examples and becomes more complex, with each new example adding new features. Certain concepts may not be -completely explained until later in the tutorial in order to slowly ease -the reader into building extensions. +completely explained until later in the tutorial to ease the +reader slowly into building extensions. =head2 VERSION CAVEAT @@ -25,13 +25,21 @@ features were added to Perl 5. =item * -In versions of 5.002 prior to version beta 3, then the line in the .xs file +In versions of Perl 5.002 prior to the gamma version, the test script +in Example 1 will not function properly. You need to change the "use +lib" line to read: + + use lib './blib'; + +=item * + +In versions of Perl 5.002 prior to version beta 3, the line in the .xs file about "PROTOTYPES: DISABLE" will cause a compiler error. Simply remove that line from the file. =item * -In versions of 5.002 prior to version 5.002b1h, the test.pl file was not +In versions of Perl 5.002 prior to version 5.002b1h, the test.pl file was not automatically created by h2xs. This means that you cannot say "make test" to run the test script. You will need to add the following line before the "use extension" statement: @@ -47,7 +55,7 @@ to use the following line: =item * -This document assumes that the executable named "perl" is Perl version 5. +This document assumes that the executable named "perl" is Perl version 5. Some systems may have installed Perl version 5 as "perl5". =back @@ -55,7 +63,7 @@ Some systems may have installed Perl version 5 as "perl5". =head2 DYNAMIC VERSUS STATIC It is commonly thought that if a system does not have the capability to -dynamically load a library, you cannot build XSUB's. This is incorrect. +load a library dynamically, you cannot build XSUBs. This is incorrect. You I<can> build them, but you must link the XSUB's subroutines with the rest of Perl, creating a new executable. This situation is similar to Perl 4. @@ -80,7 +88,7 @@ test" is sufficient. Our first extension will be very simple. When we call the routine in the extension, it will print out a well-known message and return. -Run "h2xs -A -n Mytest". This creates a directory named Mytest, possibly under +Run C<h2xs -A -n Mytest>. This creates a directory named Mytest, possibly under ext/ if that directory exists in the current working directory. Several files will be created in the Mytest dir, including MANIFEST, Makefile.PL, Mytest.pm, Mytest.xs, test.pl, and Changes. @@ -137,7 +145,7 @@ And the Mytest.xs file should look something like this: #ifdef __cplusplus } #endif - + PROTOTYPES: DISABLE MODULE = Mytest PACKAGE = Mytest @@ -150,7 +158,7 @@ Let's edit the .xs file by adding this to the end of the file: printf("Hello, world!\n"); Now we'll run "perl Makefile.PL". This will create a real Makefile, -which make needs. It's output looks something like: +which make needs. Its output looks something like: % perl Makefile.PL Checking if your kit is complete... @@ -177,11 +185,11 @@ example only, we'll create a special test script. Create a file called hello that looks like this: #! /opt/perl5/bin/perl - - use lib './blib'; - + + use ExtUtils::testlib; + use Mytest; - + Mytest::hello(); Now we run the script and we should see the following output: @@ -193,7 +201,7 @@ Now we run the script and we should see the following output: =head2 EXAMPLE 2 Now let's add to our extension a subroutine that will take a single argument -and return 0 if the argument is even, 1 if the argument is odd. +and return 1 if the argument is even, 0 if the argument is odd. Add the following to the end of Mytest.xs: @@ -214,20 +222,18 @@ the four lines starting at the "CODE:" line to not be indented. However, for readability purposes, it is suggested that you indent them 8 spaces (or one normal tab stop). -Now re-run make to rebuild our new shared library. +Now rerun make to rebuild our new shared library. Now perform the same steps as before, generating a Makefile from the Makefile.PL file, and running make. -In order to test that our extension works, we now need to look at the +To test that our extension works, we now need to look at the file test.pl. This file is set up to imitate the same kind of testing structure that Perl itself has. Within the test script, you perform a number of tests to confirm the behavior of the extension, printing "ok" -when the test is correct, "not ok" when it is not. - -Remove the line that starts with "use lib", change the print statement in -the BEGIN block to print "1..4", and add the following code to the end of -the file: +when the test is correct, "not ok" when it is not. Change the print +statement in the BEGIN block to print "1..4", and add the following code +to the end of the file: print &Mytest::is_even(0) == 1 ? "ok 2" : "not ok 2", "\n"; print &Mytest::is_even(1) == 0 ? "ok 3" : "not ok 3", "\n"; @@ -255,7 +261,8 @@ h2xs creates a number of files in the extension directory. The file Makefile.PL is a perl script which will generate a true Makefile to build the extension. We'll take a closer look at it later. -The files <extension>.pm and <extension>.xs contain the meat of the extension. +The files E<lt>extensionE<gt>.pm and E<lt>extensionE<gt>.xs contain the meat +of the extension. The .xs file holds the C routines that make up the extension. The .pm file contains routines that tell Perl how to load your extension. @@ -265,7 +272,7 @@ contain the shared library that we will build. Once we have tested it, we can install it into its final location. Invoking the test script via "make test" did something very important. It -invoked perl with all those -I arguments so that it could find the various +invoked perl with all those C<-I> arguments so that it could find the various files that are part of the extension. It is I<very> important that while you are still testing extensions that @@ -367,9 +374,9 @@ you change the value of constants! =head2 WHAT'S NEW HERE? Two things are new here. First, we've made some changes to Makefile.PL. -In this case, we've specified an extra library to link in, in this case the -math library, libm. We'll talk later about how to write XSUBs that can call -every routine in a library. +In this case, we've specified an extra library to link in, the math library +libm. We'll talk later about how to write XSUBs that can call every routine +in a library. Second, the value of the function is being passed back not as the function's return value, but through the same variable that was passed into the function. @@ -439,14 +446,14 @@ section on the argument stack. =head2 WARNING In general, it's not a good idea to write extensions that modify their input -parameters, as in Example 3. However, in order to better accomodate calling +parameters, as in Example 3. However, to accommodate better calling pre-existing C routines, which often do modify their input parameters, -this behavior is tolerated. +this behavior is tolerated. The next example will show how to do this. =head2 EXAMPLE 4 -In this example, we'll now begin to write XSUB's that will interact with -pre-defined C libraries. To begin with, we will build a small library of +In this example, we'll now begin to write XSUBs that will interact with +predefined C libraries. To begin with, we will build a small library of our own, then let h2xs write our .pm and .xs files for us. Create a new directory called Mytest2 at the same level as the directory @@ -468,7 +475,7 @@ Also create a file mylib.c that looks like this: #include <stdlib.h> #include "./mylib.h" - + double foo(a, b, c) int a; @@ -483,12 +490,13 @@ And finally create a file Makefile.PL that looks like this: use ExtUtils::MakeMaker; $Verbose = 1; WriteMakefile( - 'NAME' => 'Mytest2::mylib', - 'clean' => {'FILES' => 'libmylib.a'}, + NAME => 'Mytest2::mylib', + SKIP => [qw(all static static_lib dynamic dynamic_lib)], + clean => {'FILES' => 'libmylib$(LIB_EXT)'}, ); - sub MY::postamble { + sub MY::top_targets { ' all :: static @@ -504,7 +512,7 @@ And finally create a file Makefile.PL that looks like this: We will now create the main top-level Mytest2 files. Change to the directory above Mytest2 and run the following command: - % h2xs -O -n Mytest2 < ./Mytest2/mylib/mylib.h + % h2xs -O -n Mytest2 ./Mytest2/mylib/mylib.h This will print out a warning about overwriting Mytest2, but that's okay. Our files are stored in Mytest2/mylib, and will be untouched. @@ -526,7 +534,8 @@ and a new replacement subroutine too: } (Note: Most makes will require that there be a tab character that indents -the line "cd mylib && $(MAKE)".) +the line "cd mylib && $(MAKE)", similarly for the Makefile in the +subdirectory.) Let's also fix the MANIFEST file so that it accurately reflects the contents of our extension. The single line that says "mylib" should be replaced by @@ -537,8 +546,9 @@ the following three lines: mylib/mylib.h To keep our namespace nice and unpolluted, edit the .pm file and change -the line setting @EXPORT to @EXPORT_OK. And finally, in the .xs file, -edit the #include line to read: +the lines setting @EXPORT to @EXPORT_OK (there are two: one in the line +beginning "use vars" and one setting the array itself). Finally, in the +.xs file, edit the #include line to read: #include "mylib/mylib.h" @@ -569,17 +579,19 @@ and add the following lines to the end of the script: print &Mytest2::foo(1, 2, "0.0") == 7 ? "ok 3\n" : "not ok 3\n"; print abs(&Mytest2::foo(0, 0, "-3.4") - 0.6) <= 0.01 ? "ok 4\n" : "not ok 4\n"; -(When dealing with floating-point comparisons, it is often useful to not check +(When dealing with floating-point comparisons, it is often useful not to check for equality, but rather the difference being below a certain epsilon factor, 0.01 in this case) Run "make test" and all should be well. -=head 2 WHAT HAS HAPPENED HERE? +=head2 WHAT HAS HAPPENED HERE? Unlike previous examples, we've now run h2xs on a real include file. This has caused some extra goodies to appear in both the .pm and .xs files. +=over 4 + =item * In the .xs file, there's now a #include declaration with the full path to @@ -597,20 +609,25 @@ C<constant> routine. The .pm file has exported the name TESTVAL in the @EXPORT array. This could lead to name clashes. A good rule of thumb is that if the #define -is only going to be used by the C routines themselves, and not by the user, +is going to be used by only the C routines themselves, and not by the user, they should be removed from the @EXPORT array. Alternately, if you don't mind using the "fully qualified name" of a variable, you could remove most or all of the items in the @EXPORT array. +=item * + +If our include file contained #include directives, these would not be +processed at all by h2xs. There is no good solution to this right now. + =back We've also told Perl about the library that we built in the mylib -subdirectory. That required only the addition of the MYEXTLIB variable +subdirectory. That required the addition of only the MYEXTLIB variable to the WriteMakefile call and the replacement of the postamble subroutine to cd into the subdirectory and run make. The Makefile.PL for the library is a bit more complicated, but not excessively so. Again we replaced the postamble subroutine to insert our own code. This code -simply specified that the library to be created here was a static +specified simply that the library to be created here was a static archive (as opposed to a dynamically loadable library) and provided the commands to build it. @@ -670,7 +687,7 @@ usually 0. The "ST" is actually a macro that points to the n'th argument on the argument stack. ST(0) is thus the first argument passed to the XSUB, ST(1) is the second argument, and so on. -When you list the arguments to the XSUB in the .xs file, that tell xsubpp +When you list the arguments to the XSUB in the .xs file, that tells xsubpp which argument corresponds to which of the argument stack (i.e., the first one listed is the first argument, and so on). You invite disaster if you do not list them in the same order as the function expects them. @@ -681,14 +698,14 @@ Sometimes you might want to provide some extra methods or subroutines to assist in making the interface between Perl and your extension simpler or easier to understand. These routines should live in the .pm file. Whether they are automatically loaded when the extension itself is loaded -or only loaded when called depends on where in the .pm file the subroutine +or loaded only when called depends on where in the .pm file the subroutine definition is placed. =head2 DOCUMENTING YOUR EXTENSION There is absolutely no excuse for not documenting your extension. Documentation belongs in the .pm file. This file will be fed to pod2man, -and the embedded documentation will be converted to the man page format, +and the embedded documentation will be converted to the manpage format, then placed in the blib directory. It will be copied to Perl's man page directory when the extension is installed. @@ -701,7 +718,7 @@ See L<perlpod> for more information about the pod format. =head2 INSTALLING YOUR EXTENSION Once your extension is complete and passes all its tests, installing it -is quite simple: you simply run "make install". You will either need +is quite simple: you simply run "make install". You will either need to have write permission into the directories where Perl is installed, or ask your system administrator to run the make for you. @@ -712,11 +729,11 @@ and L<perlpod>. =head2 Author -Jeff Okamoto <okamoto@corp.hp.com> +Jeff Okamoto <F<okamoto@corp.hp.com>> Reviewed and assisted by Dean Roehrich, Ilya Zakharevich, Andreas Koenig, and Tim Bunce. =head2 Last Changed -1996/2/9 +1996/7/10 diff --git a/pod/pod2html.PL b/pod/pod2html.PL index 817df5dc22..de36cd7fc9 100644 --- a/pod/pod2html.PL +++ b/pod/pod2html.PL @@ -12,10 +12,9 @@ use File::Basename qw(&basename &dirname); # This forces PL files to create target in same directory as PL file. # This is so that make depend always knows where to find PL derivatives. -chdir(dirname($0)); -($file = basename($0)) =~ s/\.PL$//; -$file =~ s/\.pl$// - if ($^O eq 'VMS' or $^O eq 'os2'); # "case-forgiving" +chdir dirname($0); +$file = basename($0, '.PL'); +$file .= '.com' if $^O eq 'VMS'; open OUT,">$file" or die "Can't create $file: $!"; @@ -25,523 +24,155 @@ print "Extracting $file (with variable substitutions)\n"; # You can use $Config{...} to use Configure variables. print OUT <<"!GROK!THIS!"; -$Config{'startperl'} +$Config{startperl} + eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' + if \$running_under_some_shell; !GROK!THIS! # In the following, perl variables are not expanded during extraction. print OUT <<'!NO!SUBS!'; -eval 'exec perl -S $0 ${1+"$@"}' - if $running_under_some_shell; -# -# pod2html - convert pod format to html -# Version 1.15 -# usage: pod2html [podfiles] -# Will read the cwd and parse all files with .pod extension -# if no arguments are given on the command line. -# -# Many helps, suggestions, and fixes from the perl5 porters, and all over. -# Bill Middleton - wjm@metronet.com -# -# Please send patches/fixes/features to me -# -# -# -*RS = */; -*ERRNO = *!; - -################################################################################ -# Invoke with various levels of debugging possible -################################################################################ -while ($ARGV[0] =~ /^-d(.*)/) { - shift; - $Debug{ lc($1 || shift) }++; -} - -# ck for podnames on command line -while ($ARGV[0]) { - push(@Pods,shift); -} - -################################################################################ -# CONFIGURE -# -# The beginning of the url for the anchors to the other sections. -# Edit $type to suit. It's configured for relative url's now. -# Other possibilities are: -# $type = '<A HREF="file://localhost/usr/local/htmldir/'; # file url -# $type = '<A HREF="http://www.bozo.com/perl/manual/html/' # server -# -################################################################################ - -$type = '<A HREF="'; -$dir = "."; # location of pods - -# look in these pods for things not found within the current pod -# be careful tho, namespace collisions cause stupid links - -@inclusions = qw[ - perlfunc perlvar perlrun perlop -]; -################################################################################ -# END CONFIGURE -################################################################################ - -$A = {}; # The beginning of all things - -unless (@Pods) { - opendir(DIR,$dir) or die "Can't opendir $dir: $ERRNO"; - @Pods = grep(/\.pod$/,readdir(DIR)); - closedir(DIR) or die "Can't closedir $dir: $ERRNO"; -} -@Pods or die "aak, expected pods"; - -# loop twice through the pods, first to learn the links, then to produce html -for $count (0,1) { - print STTDER "Scanning pods...\n" unless $count; - foreach $podfh ( @Pods ) { - ($pod = $podfh) =~ s/\.pod$//; - Debug("files", "opening 2 $podfh" ); - print "Creating $pod.html from $podfh\n" if $count; - $RS = "\n="; # grok pods by item (Nonstandard but effecient) - open($podfh,"<".$podfh) || die "can't open $podfh: $ERRNO"; - @all = <$podfh>; - close($podfh); - $RS = "\n"; - - $all[0] =~ s/^=//; - for (@all) { s/=$// } - $Podnames{$pod} = 1; - $in_list = 0; - $html = $pod.".html"; - if ($count) { # give us a html and rcs header - open(HTML,">$html") || die "can't create $html: $ERRNO"; - print HTML '<!-- $Id: pod2html.PL,v 1.1.1.1 1997/01/11 12:49:12 mbeattie Exp $ -->',"\n",'<HTML><HEAD>',"\n"; - print HTML "<CENTER>" unless $NO_NS; - print HTML "<TITLE>$pod</TITLE>\n</HEAD>\n<BODY>"; - print HTML "</CENTER>" unless $NO_NS; - } - for ($i = 0; $i <= $#all; $i++) { # decide what to do with each chunk - $all[$i] =~ /^(\w+)\s*(.*)\n?([^\0]*)$/ ; - ($cmd, $title, $rest) = ($1,$2,$3); - if ($cmd eq "item") { - if ($count ) { # producing html - do_list("over",$all[$i],\$in_list,\$depth) unless $depth; - do_item($title,$rest,$in_list); - } - else { - # scan item - scan_thing("item",$title,$pod); - } - } - elsif ($cmd =~ /^head([12])/) { - $num = $1; - if ($count) { # producing html - do_hdr($num,$title,$rest,$depth); - } - else { - # header scan - scan_thing($cmd,$title,$pod); # skip head1 - } - } - elsif ($cmd =~ /^over/) { - $count and $depth and do_list("over",$all[$i+1],\$in_list,\$depth); - } - elsif ($cmd =~ /^back/) { - if ($count) { # producing html - ($depth) or next; # just skip it - do_list("back",$all[$i+1],\$in_list,\$depth); - do_rest("$title.$rest"); - } - } - elsif ($cmd =~ /^cut/) { - next; - } - elsif ($cmd =~ /^for/) { # experimental pragma html - if ($count) { # producing html - if ($title =~ s/^html//) { - $in_html =1; - do_rest("$title.$rest"); - } - } - } - elsif ($cmd =~ /^begin/) { # experimental pragma html - if ($count) { # producing html - if ($title =~ s/^html//) { - print HTML $title,"\n",$rest; - } - elsif ($title =~ /^end/) { - next; - } - } - } - elsif ($Debug{"misc"}) { - warn("unrecognized header: $cmd"); - } - } - # close open lists without '=back' stmts - if ($count) { # producing html - while ($depth) { - do_list("back",$all[$i+1],\$in_list,\$depth); - } - print HTML "\n</BODY>\n</HTML>\n"; - } - } -} - -sub do_list{ # setup a list type, depending on some grok logic - my($which,$next_one,$list_type,$depth) = @_; - my($key); - if ($which eq "over") { - unless ($next_one =~ /^item\s+(.*)/) { - warn "Bad list, $1\n" if $Debug{"misc"}; - } - $key = $1; - - if ($key =~ /^1\.?/) { - $$list_type = "OL"; - } elsif ($key =~ /\*\s*$/) { - $$list_type = "UL"; - } elsif ($key =~ /\*?\s*\w/) { - $$list_type = "DL"; - } else { - warn "unknown list type for item $key" if $Debug{"misc"}; - } - - print HTML qq{\n}; - print HTML $$list_type eq 'DL' ? qq{<DL COMPACT>} : qq{<$$list_type>}; - $$depth++; - } - elsif ($which eq "back") { - print HTML qq{\n</$$list_type>\n}; - $$depth--; - } -} - -sub do_hdr{ # headers - my($num,$title,$rest,$depth) = @_; - print HTML qq{<p><hr>\n} if $num == 1; - process_thing(\$title,"NAME"); - print HTML qq{\n<H$num> }; - print HTML $title; - print HTML qq{</H$num>\n}; - do_rest($rest); -} - -sub do_item{ # list items - my($title,$rest,$list_type) = @_; - my $bullet_only = $title eq '*' and $list_type eq 'UL'; - process_thing(\$title,"NAME"); - if ($list_type eq "DL") { - print HTML qq{\n<DT><STRONG>\n}; - print HTML $title; - print HTML qq{\n</STRONG>\n}; - print HTML qq{<DD>\n}; - } - else { - print HTML qq{\n<LI>}; - unless ($bullet_only or $list_type eq "OL") { - print HTML $title,"\n"; - } - } - do_rest($rest); -} - -sub do_rest{ # the rest of the chunk handled here - my($rest) = @_; - my(@lines,$p,$q,$line,,@paras,$inpre); - @paras = split(/\n\n\n*/,$rest); - for ($p = 0; $p <= $#paras; $p++) { - $paras[$p] =~ s/^\n//mg; - @lines = split(/\n/,$paras[$p]); - if ($in_html) { # handle =for html paragraphs - print HTML $paras[0]; - $in_html = 0; - next; - } - elsif ($lines[0] =~ /^\s+\w*\t.*/) { # listing or unordered list - print HTML qq{<UL>}; - foreach $line (@lines) { - ($line =~ /^\s+(\w*)\t(.*)/) && (($key,$rem) = ($1,$2)); - print HTML defined($Podnames{$key}) - ? "<LI>$type$key.html\">$key<\/A>\t$rem</LI>\n" - : "<LI>$line</LI>\n"; - } - print HTML qq{</UL>\n}; - } - elsif ($lines[0] =~ /^\s/) { # preformatted code - if ($paras[$p] =~/>>|<</) { - print HTML qq{\n<PRE>\n}; - $inpre=1; - } - else { # Still cant beat XMP. Yes, I know - print HTML qq{\n<XMP>\n}; # it's been obsoleted... suggestions? - $inpre = 0; - } - while (defined($paras[$p])) { - @lines = split(/\n/,$paras[$p]); - foreach $q (@lines) { # mind your p's and q's here :-) - if ($paras[$p] =~ />>|<</) { - if ($inpre) { - process_thing(\$q,"HTML"); - } - else { - print HTML qq{\n</XMP>\n}; - print HTML qq{<PRE>\n}; - $inpre=1; - process_thing(\$q,"HTML"); - } - } - 1 while $q =~ s/\t+/' 'x (length($&) * 8 - length($`) % 8)/e; - print HTML $q,"\n"; - } - last if $paras[$p+1] !~ /^\s/; - $p++; - } - print HTML ($inpre==1) ? (qq{\n</PRE>\n}) : (qq{\n</XMP>\n}); - } - else { # other text - @lines = split(/\n/,$paras[$p]); - foreach $line (@lines) { - process_thing(\$line,"HTML"); - print HTML qq{$line\n}; - } - } - print HTML qq{<p>}; - } -} - -sub process_thing{ # process a chunk, order important - my($thing,$htype) = @_; - pre_escapes($thing); - find_refs($thing,$htype); - post_escapes($thing); -} - -sub scan_thing{ # scan a chunk for later references - my($cmd,$title,$pod) = @_; - $_ = $title; - s/\n$//; - s/E<(.*?)>/&$1;/g; - # remove any formatting information for the headers - s/[SFCBI]<(.*?)>/$1/g; - # the "don't format me" thing - s/Z<>//g; - if ($cmd eq "item") { - /^\*/ and return; # skip bullets - /^\d+\./ and return; # skip numbers - s/(-[a-z]).*/$1/i; - trim($_); - return if defined $A->{$pod}->{"Items"}->{$_}; - $A->{$pod}->{"Items"}->{$_} = gensym($pod, $_); - $A->{$pod}->{"Items"}->{(split(' ',$_))[0]}=$A->{$pod}->{"Items"}->{$_}; - Debug("items", "item $_"); - if (!/^-\w$/ && /([%\$\@\w]+)/ && $1 ne $_ - && !defined($A->{$pod}->{"Items"}->{$_}) && ($_ ne $1)) - { - $A->{$pod}->{"Items"}->{$1} = $A->{$pod}->{"Items"}->{$_}; - Debug("items", "item $1 REF TO $_"); - } - if ( m{^(tr|y|s|m|q[qwx])/.*[^/]} ) { - my $pf = $1 . '//'; - $pf .= "/" if $1 eq "tr" || $1 eq "y" || $1 eq "s"; - if ($pf ne $_) { - $A->{$pod}->{"Items"}->{$pf} = $A->{$pod}->{"Items"}->{$_}; - Debug("items", "item $pf REF TO $_"); - } - } - } - elsif ($cmd =~ /^head[12]/) { - return if defined($A->{$pod}->{"Headers"}->{$_}); - $A->{$pod}->{"Headers"}->{$_} = gensym($pod, $_); - Debug("headers", "header $_"); - } - else { - warn "unrecognized header: $cmd" if $Debug; - } -} - - -sub picrefs { - my($char, $bigkey, $lilkey,$htype) = @_; - my($key,$ref,$podname); - for $podname ($pod,@inclusions) { - for $ref ( "Items", "Headers" ) { - if (defined $A->{$podname}->{$ref}->{$bigkey}) { - $value = $A->{$podname}->{$ref}->{$key = $bigkey}; - Debug("subs", "bigkey is $bigkey, value is $value\n"); - } - elsif (defined $A->{$podname}->{$ref}->{$lilkey}) { - $value = $A->{$podname}->{$ref}->{$key = $lilkey}; - return "" if $lilkey eq ''; - Debug("subs", "lilkey is $lilkey, value is $value\n"); - } - } - if (length($key)) { - ($pod2,$num) = split(/_/,$value,2); - if ($htype eq "NAME") { - return "\n<A NAME=\"".$value."\">\n$bigkey</A>\n" - } - else { - return "\n$type$pod2.html\#".$value."\">$bigkey<\/A>\n"; - } - } - } - if ($char =~ /[IF]/) { - return "<EM>$bigkey</EM>"; - } elsif ($char =~ /C/) { - return "<CODE>$bigkey</CODE>"; - } else { - return "<STRONG>$bigkey</STRONG>"; - } -} - -sub find_refs { - my($thing,$htype) = @_; - my($orig) = $$thing; - # LREF: a manpage(3f) we don't know about - for ($$thing) { - #s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))>:the I<$1>$2 manpage:g; - s@(\S+?://\S*[^.,;!?\s])@noremap(qq{<A HREF="$1">$1</A>})@ge; - s,([a-z0-9_.-]+\@([a-z0-9_-]+\.)+([a-z0-9_-]+)),noremap(qq{<A HREF="MAILTO:$1">$1</A>}),gie; - s/L<([^>]*)>/lrefs($1,$htype)/ge; - s/([CIBF])<(\W*?(-?\w*).*?)>/picrefs($1, $2, $3, $htype)/ge; - s/(S)<([^\/]\W*?(-?\w*).*?)>/picrefs($1, $2, $3, $htype)/ge; - s/((\w+)\(\))/picrefs("I", $1, $2,$htype)/ge; - s/([\$\@%](?!&[gl]t)([\w:]+|\W\b))/varrefs($1,$htype)/ge; - } - if ($$thing eq $orig && $htype eq "NAME") { - $$thing = picrefs("I", $$thing, "", $htype); - } - -} - -sub lrefs { - my($page, $item) = split(m#/#, $_[0], 2); - my($htype) = $_[1]; - my($podname); - my($section) = $page =~ /\((.*)\)/; - my $selfref; - if ($page =~ /^[A-Z]/ && $item) { - $selfref++; - $item = "$page/$item"; - $page = $pod; - } elsif (!$item && $page =~ /[^a-z\-]/ && $page !~ /^\$.$/) { - $selfref++; - $item = $page; - $page = $pod; - } - $item =~ s/\(\)$//; - if (!$item) { - if (!defined $section && defined $Podnames{$page}) { - return "\n$type$page.html\">\nthe <EM>$page</EM> manpage<\/A>\n"; - } else { - (warn "Bizarre entry $page/$item") if $Debug; - return "the <EM>$_[0]</EM> manpage\n"; - } - } - - if ($item =~ s/"(.*)"/$1/ || ($item =~ /[^\w\/\-]/ && $item !~ /^\$.$/)) { - $text = "<EM>$item</EM>"; - $ref = "Headers"; - } else { - $text = "<EM>$item</EM>"; - $ref = "Items"; - } - for $podname ($pod, @inclusions) { - undef $value; - if ($ref eq "Items") { - if (defined($value = $A->{$podname}->{$ref}->{$item})) { - ($pod2,$num) = split(/_/,$value,2); - return (($pod eq $pod2) && ($htype eq "NAME")) - ? "\n<A NAME=\"".$value."\">\n$text</A>\n" - : "\n$type$pod2.html\#".$value."\">$text<\/A>\n"; - } - } - elsif ($ref eq "Headers") { - if (defined($value = $A->{$podname}->{$ref}->{$item})) { - ($pod2,$num) = split(/_/,$value,2); - return (($pod eq $pod2) && ($htype eq "NAME")) - ? "\n<A NAME=\"".$value."\">\n$text</A>\n" - : "\n$type$pod2.html\#".$value."\">$text<\/A>\n"; - } - } - } - warn "No $ref reference for $item (@_)" if $Debug; - return $text; -} - -sub varrefs { - my ($var,$htype) = @_; - for $podname ($pod,@inclusions) { - if ($value = $A->{$podname}->{"Items"}->{$var}) { - ($pod2,$num) = split(/_/,$value,2); - Debug("vars", "way cool -- var ref on $var"); - return (($pod eq $pod2) && ($htype eq "NAME")) # INHERIT $_, $pod - ? "\n<A NAME=\"".$value."\">\n$var</A>\n" - : "\n$type$pod2.html\#".$value."\">$var<\/A>\n"; - } - } - Debug( "vars", "bummer, $var not a var"); - return "<STRONG>$var</STRONG>"; -} - -sub gensym { - my ($podname, $key) = @_; - $key =~ s/\s.*//; - ($key = lc($key)) =~ tr/a-z/_/cs; - my $name = "${podname}_${key}_0"; - $name =~ s/__/_/g; - while ($sawsym{$name}++) { - $name =~ s/_?(\d+)$/'_' . ($1 + 1)/e; - } - return $name; -} - -sub pre_escapes { # twiddle these, and stay up late :-) - my($thing) = @_; - for ($$thing) { - s/"(.*?)"/``$1''/gs; - s/&/noremap("&")/ge; - s/<</noremap("<<")/eg; - s/([^ESIBLCF])</$1\<\;/g; - s/E<([^\/][^<>]*)>/\&$1\;/g; # embedded special - } -} -sub noremap { # adding translator for hibit chars soon - my $hide = $_[0]; - $hide =~ tr/\000-\177/\200-\377/; - $hide; -} - - -sub post_escapes { - my($thing) = @_; - for ($$thing) { - s/([^GM])>>/$1\>\;\>\;/g; - s/([^D][^"MGA])>/$1\>\;/g; - tr/\200-\377/\000-\177/; - } -} - -sub Debug { - my $level = shift; - print STDERR @_,"\n" if $Debug{$level}; -} - -sub dumptable { - my $t = shift; - print STDERR "TABLE DUMP $t\n"; - foreach $k (sort keys %$t) { - printf STDERR "%-20s <%s>\n", $t->{$k}, $k; - } -} -sub trim { - for (@_) { - s/^\s+//; - s/\s\n?$//; - } -} +=pod + +=head1 NAME + +pod2html - convert .pod files to .html files + +=head1 SYNOPSIS + + pod2html --help --htmlroot=<name> --infile=<name> --outfile=<name> + --podpath=<name>:...:<name> --podroot=<name> + --libpods=<name>:...:<name> --recurse --norecurse --verbose + --index --noindex --title=<name> + +=head1 DESCRIPTION + +Converts files from pod format (see L<perlpod>) to HTML format. + +=head1 ARGUMENTS + +pod2html takes the following arguments: + +=over 4 + +=item help + + --help + +Displays the usage message. + +=item htmlroot + + --htmlroot=name + +Sets the base URL for the HTML files. When cross-references are made, +the HTML root is prepended to the URL. + +=item infile + + --infile=name + +Specify the pod file to convert. Input is taken from STDIN if no +infile is specified. + +=item outfile + + --outfile=name + +Specify the HTML file to create. Output goes to STDOUT if no outfile +is specified. + +=item podroot + + --podroot=name + +Specify the base directory for finding library pods. + +=item podpath + + --podpath=name:...:name + +Specify which subdirectories of the podroot contain pod files whose +HTML converted forms can be linked-to in cross-references. + +=item libpods + + --libpods=name:...:name + +List of page names (eg, "perlfunc") which contain linkable C<=item>s. + +=item netscape + + --netscape + +Use Netscape HTML directives when applicable. + +=item nonetscape + + --nonetscape + +Do not use Netscape HTML directives (default). + +=item index + + --index + +Generate an index at the top of the HTML file (default behaviour). + +=item noindex + + --noindex + +Do not generate an index at the top of the HTML file. + + +=item recurse + + --recurse + +Recurse into subdirectories specified in podpath (default behaviour). + +=item norecurse + + --norecurse + +Do not recurse into subdirectories specified in podpath. + +=item title + + --title=title + +Specify the title of the resulting HTML file. + +=item verbose + + --verbose + +Display progress messages. + +=back + +=head1 AUTHOR + +Tom Christiansen, E<lt>tchrist@perl.comE<gt>. + +=head1 BUGS + +See L<Pod::Html> for a list of known bugs in the translator. + +=head1 SEE ALSO + +L<perlpod>, L<Pod::HTML> + +=head1 COPYRIGHT + +This program is distributed under the Artistic License. + +=cut + +use Pod::Html; + +pod2html @ARGV; !NO!SUBS! close OUT or die "Can't close $file: $!"; diff --git a/pod/pod2latex.PL b/pod/pod2latex.PL index 34b1faadba..3d0b55b32f 100644 --- a/pod/pod2latex.PL +++ b/pod/pod2latex.PL @@ -12,10 +12,9 @@ use File::Basename qw(&basename &dirname); # This forces PL files to create target in same directory as PL file. # This is so that make depend always knows where to find PL derivatives. -chdir(dirname($0)); -($file = basename($0)) =~ s/\.PL$//; -$file =~ s/\.pl$// - if ($^O eq 'VMS' or $^O eq 'os2'); # "case-forgiving" +chdir dirname($0); +$file = basename($0, '.PL'); +$file .= '.com' if $^O eq 'VMS'; open OUT,">$file" or die "Can't create $file: $!"; @@ -25,9 +24,9 @@ print "Extracting $file (with variable substitutions)\n"; # You can use $Config{...} to use Configure variables. print OUT <<"!GROK!THIS!"; -$Config{'startperl'} - eval 'exec perl -S \$0 "\$@"' - if 0; +$Config{startperl} + eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' + if \$running_under_some_shell; !GROK!THIS! # In the following, perl variables are not expanded during extraction. @@ -124,11 +123,21 @@ open(LATEX,">$pod.tex"); &do_hdr(); $cutting = 1; +$begun = ""; while (<POD>) { if ($cutting) { next unless /^=/; $cutting = 0; } + if ($begun) { + if (/^=end\s+$begun/) { + $begun = ""; + } + elsif ($begun =~ /^(tex|latex)$/) { + print LATEX $_; + } + next; + } chop; length || (print LATEX "\n") && next; @@ -146,6 +155,22 @@ while (<POD>) { next; } + if (/^=for\s+(\S+)\s*/s) { + if ($1 eq "tex" or $1 eq "latex") { + print LATEX $',"\n"; + } else { + # ignore unknown for + } + next; + } + elsif (/^=begin\s+(\S+)\s*/s) { + $begun = $1; + if ($1 eq "tex" or $1 eq "latex") { + print LATEX $'."\n"; + } + next; + } + # preserve '=item' line with pod quotes as they are. if (/^=item/) { ($bareitem = $_) =~ s/^=item\s*//; @@ -500,9 +525,8 @@ sub noremap { } sub init_noremap { - if ( /[\200-\377]/ ) { - warn "hit bit char in input stream"; - } + # escape high bit characters in input stream + s/([\200-\377])/"E<".ord($1).">"/ge; } sub clear_noremap { @@ -513,11 +537,14 @@ sub clear_noremap { sub expand_HTML_escapes { local($s) = $_[0]; - $s =~ s { E<([A-Za-z]+)> } + $s =~ s { E<((\d+)|([A-Za-z]+))> } { do { - exists $HTML_Escapes{$1} - ? do { $HTML_Escapes{$1} } + defined($2) + ? do { chr($2) } + : + exists $HTML_Escapes{$3} + ? do { $HTML_Escapes{$3} } : do { warn "Unknown escape: $& in $_"; "E<$1>"; diff --git a/pod/pod2man.PL b/pod/pod2man.PL index d8f7cbb716..cd14ce2866 100644 --- a/pod/pod2man.PL +++ b/pod/pod2man.PL @@ -8,14 +8,14 @@ use File::Basename qw(&basename &dirname); # have to mention them as if they were shell variables, not # %Config entries. Thus you write # $startperl +# $man3ext # to ensure Configure will look for $Config{startperl}. # This forces PL files to create target in same directory as PL file. # This is so that make depend always knows where to find PL derivatives. -chdir(dirname($0)); -($file = basename($0)) =~ s/\.PL$//; -$file =~ s/\.pl$// - if ($^O eq 'VMS' or $^O eq 'os2'); # "case-forgiving" +chdir dirname($0); +$file = basename($0, '.PL'); +$file .= '.com' if $^O eq 'VMS'; open OUT,">$file" or die "Can't create $file: $!"; @@ -25,14 +25,16 @@ print "Extracting $file (with variable substitutions)\n"; # You can use $Config{...} to use Configure variables. print OUT <<"!GROK!THIS!"; -$Config{'startperl'} +$Config{startperl} + eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' + if \$running_under_some_shell; + +\$DEF_PM_SECTION = '$Config{man3ext}' || '3'; !GROK!THIS! # In the following, perl variables are not expanded during extraction. print OUT <<'!NO!SUBS!'; -eval 'exec perl -S $0 "$@"' - if 0; =head1 NAME @@ -47,6 +49,7 @@ B<pod2man> [ B<--date=>I<string> ] [ B<--fixed=>I<font> ] [ B<--official> ] +[ B<--lax> ] I<inputfile> =head1 DESCRIPTION @@ -106,6 +109,10 @@ best if you put your Perl man pages in a separate tree, like F</usr/local/perl/man/>. By default, section 1 will be used unless the file ends in F<.pm> in which case section 3 will be selected. +=item lax + +Don't complain when required sections aren't present. + =back =head1 Anatomy of a Proper Man Page @@ -198,7 +205,7 @@ Who wrote it (or AUTHORS if multiple). =item HISTORY Programs derived from other sources sometimes have this, or -you might keep a modification long here. +you might keep a modification log here. =back @@ -225,12 +232,6 @@ as bold, italic, or code. (F) The input file wasn't available for the given reason. -=item high bit char in input stream - -(W) You can't use high-bit characters in the input stream, -because the translator uses them for its own nefarious purposes. -Use an HTML entity in angle brackets instead. - =item Improper man page - no dash in NAME header in paragraph %d of %s (W) The NAME header did not have an isolated dash in it. This is @@ -254,7 +255,7 @@ not having a NAME is a fatal. =item Unknown escape: %s in %s (W) An unknown HTML entity (probably for an 8-bit character) was given via -a C<E<lt>E<gt>> directive. Besides amp, lt, gt, and quot, recognized +a C<EE<lt>E<gt>> directive. Besides amp, lt, gt, and quot, recognized entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave, Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute, Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc, @@ -279,7 +280,7 @@ C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>. If you would like to print out a lot of man page continuously, you probably want to set the C and D registers to set contiguous page -numbering and even/odd paging, at least one some versions of man(7). +numbering and even/odd paging, at least on some versions of man(7). Settting the F register will get you some additional experimental indexing: @@ -292,8 +293,7 @@ directives. =head1 RESTRICTIONS -You shouldn't use 8-bit characters in the input stream, as these -will be used by the translator. +None at this time. =head1 BUGS @@ -311,7 +311,15 @@ Tom Christiansen such that Larry probably doesn't recognize it anymore. $/ = ""; $cutting = 1; -($version,$patch) = `\PATH=.:..:\$PATH; perl -v` =~ /version (\d\.\d{3}(?: +)(?:\S+)?)(?:.*patchlevel (\d\S*))?/s; +# We try first to get the version number from a local binary, in case we're +# running an installed version of Perl to produce documentation from an +# uninstalled newer version's pod files. +if ($^O ne 'plan9') { + ($version,$patch) = + `\PATH=.:..:\$PATH; perl -v` =~ /version (\d\.\d{3})(?:_(\d{2}))?/; +} +# No luck; we'll just go with the running Perl's version +($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version; $DEF_RELEASE = "perl $version"; $DEF_RELEASE .= ", patch $patch" if $patch; @@ -329,6 +337,7 @@ $DEF_SECTION = 1; $DEF_CENTER = "User Contributed Perl Documentation"; $STD_CENTER = "Perl Programmers Reference Guide"; $DEF_FIXED = 'CW'; +$DEF_LAX = 0; sub usage { warn "$0: @_\n" if @_; @@ -341,6 +350,7 @@ Options are: --date=string (default "$DEF_DATE") --fixed=font (default "$DEF_FIXED") --official (default NOT) + --lax (default NOT) EOF } @@ -351,6 +361,7 @@ $uok = GetOptions( qw( date=s fixed=s official + lax help)); $DEF_DATE = makedate((stat($ARGV[0]))[9] || time()); @@ -359,9 +370,11 @@ usage("Usage error!") unless $uok; usage() if $opt_help; usage("Need one and only one podpage argument") unless @ARGV == 1; -$section = $opt_section || ($ARGV[0] =~ /\.pm$/ ? 3 : $DEF_SECTION); +$section = $opt_section || ($ARGV[0] =~ /\.pm$/ + ? $DEF_PM_SECTION : $DEF_SECTION); $RP = $opt_release || $DEF_RELEASE; $center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER); +$lax = $opt_lax || $DEF_LAX; $CFont = $opt_fixed || $DEF_FIXED; @@ -375,7 +388,6 @@ else { die "roff font should be 1 or 2 chars, not `$CFont_embed'"; } -$section = $opt_section || $DEF_SECTION; $date = $opt_date || $DEF_DATE; for (qw{NAME DESCRIPTION}) { @@ -387,8 +399,27 @@ $wanna_see{SYNOPSIS}++ if $section =~ /^3/; $name = @ARGV ? $ARGV[0] : "<STDIN>"; $Filename = $name; -$name = uc($name) if $section =~ /^1/; -$name =~ s/\.[^.]*$//; +if ($section =~ /^1/) { + require File::Basename; + $name = uc File::Basename::basename($name); +} +$name =~ s/\.(pod|p[lm])$//i; + +# Lose everything up to the first of +# */lib/*perl* standard or site_perl module +# */*perl*/lib from -D prefix=/opt/perl +# */*perl*/ random module hierarchy +# which works. +$name =~ s-//+-/-g; +if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i + or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i + or $name =~ s-^.*?/[^/]*perl[^/]*/--i) { + # Lose ^arch/version/. + $name =~ s-^[^/]+/\d+\.\d+/--; +} + +# Translate Getopt/Long to Getopt::Long, etc. +$name =~ s(/)(::)g; if ($name ne 'something') { FCHECK: { @@ -400,14 +431,22 @@ if ($name ne 'something') { unless (/\s*-+\s+/) { $oops++; warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n" + } else { + my @n = split /\s+-+\s+/; + if (@n != 2) { + $oops++; + warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n" + } + else { + %namedesc = @n; + } } - %namedesc = split /\s+-\s+/; last FCHECK; } next if /^=cut\b/; # DB_File and Net::Ping have =cut before NAME - die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n"; + die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax; } - die "$0: Invalid man page - no documentation in $ARGV[0]\n"; + die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax; } close F; } @@ -603,11 +642,22 @@ END $indent = 0; +$begun = ""; + while (<>) { if ($cutting) { next unless /^=/; $cutting = 0; } + if ($begun) { + if (/^=end\s+$begun/) { + $begun = ""; + } + elsif ($begun =~ /^(roff|man)$/) { + print STDOUT $_; + } + next; + } chomp; # Translate verbatim paragraph @@ -632,6 +682,22 @@ while (<>) { $verbatim = 0; + if (/^=for\s+(\S+)\s*/s) { + if ($1 eq "man" or $1 eq "roff") { + print STDOUT $',"\n\n"; + } else { + # ignore unknown for + } + next; + } + elsif (/^=begin\s+(\S+)\s*/s) { + $begun = $1; + if ($1 eq "man" or $1 eq "roff") { + print STDOUT $'."\n\n"; + } + next; + } + # check for things that'll hosed our noremap scheme; affects $_ init_noremap(); @@ -653,18 +719,16 @@ while (<>) { ) } {I<$1>}gx; - # func(n) is a reference to a man page + # func(n) is a reference to a perl function or a man page s{ - (\w+) + ([:\w]+) ( - \( - [^\s,\051]+ - \) + \( [^\051]+ \) ) } {I<$1>\\|$2}gx; # convert simple variable references - s/(\s+)([\$\@%][\w:]+)/${1}C<$2>/g; + s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g; if (m{ ( [\-\w]+ @@ -754,7 +818,7 @@ while (<>) { ? "the section on I<$2> in the I<$1> manpage" : "the section on I<$2>" } - }gex; + }gesx; # s in case it goes over multiple lines, so . matches \n s/Z<>/\\&/g; @@ -800,6 +864,11 @@ while (<>) { } elsif ($Cmd eq 'item') { s/^\*( |$)/\\(bu$1/g; + # if you know how to get ":s please do + s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g; + s/\\\*\(L"([^"]+?)""/'$1'/g; + s/[^"]""([^"]+?)""[^"]/'$1'/g; + # here do something about the $" in perlvar? print STDOUT qq{.Ip "$_" $indent\n}; print qq{.IX Item "$_"\n}; } @@ -826,7 +895,7 @@ print <<"END"; .rn }` '' END -if (%wanna_see) { +if (%wanna_see && !$lax) { @missing = keys %wanna_see; warn "$0: $Filename is missing required section" . (@missing > 1 && "s") @@ -910,13 +979,7 @@ sub escapes { # make troff just be normal, but make small nroff get quoted # decided to just put the quotes in the text; sigh; sub ccvt { - local($_,$prev) = @_; - if ( /^\W+$/ && !/^\$./ ) { - ($prev && "\n") . noremap(qq{.CQ $_ \n\\&}); - # what about $" ? - } else { - noremap(qq{${CFont_embed}$_\\fR}); - } + local($_,$prev) = @_; noremap(qq{.CQ "$_" \n\\&}); } @@ -952,9 +1015,8 @@ sub noremap { } sub init_noremap { - if ( /[\200-\377]/ ) { - warn "$0: high bit char in input stream in paragraph $. of $ARGV\n"; - } + # escape high bit characters in input stream + s/([\200-\377])/"E<".ord($1).">"/ge; } sub clear_noremap { @@ -969,13 +1031,19 @@ sub clear_noremap { # otherwise the interative \w<> processing would have # been hosed by the E<gt> s { - E< - ( [A-Za-z]+ ) + E< + ( + ( \d + ) + | ( [A-Za-z]+ ) + ) > } { - do { - exists $HTML_Escapes{$1} - ? do { $HTML_Escapes{$1} } + do { + defined $2 + ? chr($2) + : + exists $HTML_Escapes{$3} + ? do { $HTML_Escapes{$3} } : do { warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n"; "E<$1>"; @@ -998,7 +1066,7 @@ sub internal_lrefs { } $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) - . " elsewhere in this document"; + . " elsewhere in this document "; # terminal space to avoid words running together (pattern used strips terminal spaces) return $retstr; diff --git a/pod/pod2text.PL b/pod/pod2text.PL index 49198078c0..da645b554e 100644 --- a/pod/pod2text.PL +++ b/pod/pod2text.PL @@ -12,10 +12,9 @@ use File::Basename qw(&basename &dirname); # This forces PL files to create target in same directory as PL file. # This is so that make depend always knows where to find PL derivatives. -chdir(dirname($0)); -($file = basename($0)) =~ s/\.PL$//; -$file =~ s/\.pl$// - if ($^O eq 'VMS' or $^O eq 'os2'); # "case-forgiving" +chdir dirname($0); +$file = basename($0, '.PL'); +$file .= '.com' if $^O eq 'VMS'; open OUT,">$file" or die "Can't create $file: $!"; @@ -25,9 +24,9 @@ print "Extracting $file (with variable substitutions)\n"; # You can use $Config{...} to use Configure variables. print OUT <<"!GROK!THIS!"; -$Config{'startperl'} - eval 'exec perl -S \$0 "\$@"' - if 0; +$Config{startperl} + eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' + if \$running_under_some_shell; !GROK!THIS! # In the following, perl variables are not expanded during extraction. diff --git a/pod/roffitall b/pod/roffitall index 024279a69e..d69054f41b 100644 --- a/pod/roffitall +++ b/pod/roffitall @@ -1,84 +1,193 @@ #!/bin/sh -#psroff -t -man -rC1 -rD1 -rF1 > /tmp/PerlDoc.ps 2>/tmp/PerlTOC.raw \ -nroff -man -rC1 -rD1 -rF1 > /tmp/PerlDoc.txt 2>/tmp/PerlTOC.nr.raw \ - /usr/local/man/man1/perl.1 \ - /usr/local/man/man1/perldata.1 \ - /usr/local/man/man1/perlsyn.1 \ - /usr/local/man/man1/perlop.1 \ - /usr/local/man/man1/perlre.1 \ - /usr/local/man/man1/perlrun.1 \ - /usr/local/man/man1/perlfunc.1 \ - /usr/local/man/man1/perlvar.1 \ - /usr/local/man/man1/perlsub.1 \ - /usr/local/man/man1/perlmod.1 \ - /usr/local/man/man1/perlref.1 \ - /usr/local/man/man1/perldsc.1 \ - /usr/local/man/man1/perllol.1 \ - /usr/local/man/man1/perlobj.1 \ - /usr/local/man/man1/perltie.1 \ - /usr/local/man/man1/perlbot.1 \ - /usr/local/man/man1/perldebug.1 \ - /usr/local/man/man1/perldiag.1 \ - /usr/local/man/man1/perlform.1 \ - /usr/local/man/man1/perlipc.1 \ - /usr/local/man/man1/perlsec.1 \ - /usr/local/man/man1/perltrap.1 \ - /usr/local/man/man1/perlstyle.1 \ - /usr/local/man/man1/perlxs.1 \ - /usr/local/man/man1/perlxstut.1 \ - /usr/local/man/man1/perlguts.1 \ - /usr/local/man/man1/perlcall.1 \ - /usr/local/man/man1/perlembed.1 \ - /usr/local/man/man1/perlpod.1 \ - /usr/local/man/man1/perlbook.1 \ +# +# Usage: roffitall [-nroff|-psroff|-groff] +# +# Authors: Tom Christiansen, Raphael Manfredi + +me=roffitall +tmp=. + +if test -f ../config.sh; then + . ../config.sh +fi + +mandir=$installman1dir +libdir=$installman3dir + +test -d $mandir || mandir=/usr/local/man/man1 +test -d $libdir || libdir=/usr/local/man/man3 + +case "$1" in +-nroff) cmd="nroff -man"; ext='txt';; +-psroff) cmd="psroff -t"; ext='ps';; +-groff) cmd="groff -man"; ext='ps';; +*) + echo "Usage: roffitall [-nroff|-psroff|-groff]" >&2 + exit 1 + ;; +esac + +toroff=` + echo \ + $mandir/perl.1 \ + $mandir/perldelta.1 \ + $mandir/perldata.1 \ + $mandir/perlsyn.1 \ + $mandir/perlop.1 \ + $mandir/perlre.1 \ + $mandir/perlrun.1 \ + $mandir/perllocale.1 \ + $mandir/perlfunc.1 \ + $mandir/perlvar.1 \ + $mandir/perlsub.1 \ + $mandir/perlmod.1 \ + $mandir/perlmodlib.1 \ + $mandir/perlref.1 \ + $mandir/perldsc.1 \ + $mandir/perllol.1 \ + $mandir/perlobj.1 \ + $mandir/perltie.1 \ + $mandir/perltoot.1 \ + $mandir/perlbot.1 \ + $mandir/perldebug.1 \ + $mandir/perldiag.1 \ + $mandir/perlform.1 \ + $mandir/perlipc.1 \ + $mandir/perlsec.1 \ + $mandir/perltrap.1 \ + $mandir/perlstyle.1 \ + $mandir/perlapio.1 \ + $mandir/perlxs.1 \ + $mandir/perlxstut.1 \ + $mandir/perlguts.1 \ + $mandir/perlcall.1 \ + $mandir/perlembed.1 \ + $mandir/perlpod.1 \ + $mandir/perlbook.1 \ + $mandir/perlfaq.1 \ + $mandir/perlfaq1.1 \ + $mandir/perlfaq2.1 \ + $mandir/perlfaq3.1 \ + $mandir/perlfaq4.1 \ + $mandir/perlfaq5.1 \ + $mandir/perlfaq6.1 \ + $mandir/perlfaq7.1 \ + $mandir/perlfaq8.1 \ + $mandir/perlfaq9.1 \ \ - /usr/local/man/man3/diagnostics.3 \ - /usr/local/man/man3/integer.3 \ - /usr/local/man/man3/less.3 \ - /usr/local/man/man3/lib.3 \ - /usr/local/man/man3/overload.3 \ - /usr/local/man/man3/sigtrap.3 \ - /usr/local/man/man3/strict.3 \ - /usr/local/man/man3/subs.3 \ + $libdir/blib.3 \ + $libdir/diagnostics.3 \ + $libdir/integer.3 \ + $libdir/less.3 \ + $libdir/lib.3 \ + $libdir/locale.3 \ + $libdir/overload.3 \ + $libdir/sigtrap.3 \ + $libdir/strict.3 \ + $libdir/subs.3 \ + $libdir/vars.3 \ \ - /usr/local/man/man3/AnyDBM_File.3 \ - /usr/local/man/man3/AutoLoader.3 \ - /usr/local/man/man3/AutoSplit.3 \ - /usr/local/man/man3/Benchmark.3 \ - /usr/local/man/man3/Carp.3 \ - /usr/local/man/man3/Config.3 \ - /usr/local/man/man3/Cwd.3 \ - /usr/local/man/man3/DB_File.3 \ - /usr/local/man/man3/Devel::SelfStubber.3 \ - /usr/local/man/man3/DynaLoader.3 \ - /usr/local/man/man3/English.3 \ - /usr/local/man/man3/Env.3 \ - /usr/local/man/man3/Exporter.3 \ - /usr/local/man/man3/ExtUtils::Liblist.3 \ - /usr/local/man/man3/ExtUtils::MakeMaker.3 \ - /usr/local/man/man3/ExtUtils::Manifest.3 \ - /usr/local/man/man3/ExtUtils::Mkbootstrap.3 \ - /usr/local/man/man3/Fcntl.3 \ - /usr/local/man/man3/File::Basename.3 \ - /usr/local/man/man3/File::CheckTree.3 \ - /usr/local/man/man3/File::Find.3 \ - /usr/local/man/man3/FileHandle.3 \ - /usr/local/man/man3/File::Path.3 \ - /usr/local/man/man3/Getopt::Long.3 \ - /usr/local/man/man3/Getopt::Std.3 \ - /usr/local/man/man3/I18N::Collate.3 \ - /usr/local/man/man3/IPC::Open2.3 \ - /usr/local/man/man3/IPC::Open3.3 \ - /usr/local/man/man3/Net::Ping.3 \ - /usr/local/man/man3/POSIX.3 \ - /usr/local/man/man3/Safe.3 \ - /usr/local/man/man3/SelfLoader.3 \ - /usr/local/man/man3/Socket.3 \ - /usr/local/man/man3/Sys::Hostname.3 \ - /usr/local/man/man3/Term::Cap.3 \ - /usr/local/man/man3/Term::Complete.3 \ - /usr/local/man/man3/Test::Harness.3 \ - /usr/local/man/man3/Text::Abbrev.3 \ - /usr/local/man/man3/Text::Soundex.3 \ - /usr/local/man/man3/TieHash.3 \ - /usr/local/man/man3/Time::Local.3 + $libdir/AnyDBM_File.3 \ + $libdir/AutoLoader.3 \ + $libdir/AutoSplit.3 \ + $libdir/Benchmark.3 \ + $libdir/Carp.3 \ + $libdir/Config.3 \ + $libdir/Cwd.3 \ + $libdir/DB_File.3 \ + $libdir/Devel::SelfStubber.3 \ + $libdir/DynaLoader.3 \ + $libdir/English.3 \ + $libdir/Env.3 \ + $libdir/Exporter.3 \ + $libdir/ExtUtils::Embed.3 \ + $libdir/ExtUtils::Install.3 \ + $libdir/ExtUtils::Liblist.3 \ + $libdir/ExtUtils::MakeMaker.3 \ + $libdir/ExtUtils::Manifest.3 \ + $libdir/ExtUtils::Mkbootstrap.3 \ + $libdir/ExtUtils::Mksymlists.3 \ + $libdir/Fcntl.3 \ + $libdir/File::Basename.3 \ + $libdir/File::CheckTree.3 \ + $libdir/File::Copy.3 \ + $libdir/File::Compare.3 \ + $libdir/File::Find.3 \ + $libdir/File::Path.3 \ + $libdir/File::stat.3 \ + $libdir/FileCache.3 \ + $libdir/FileHandle.3 \ + $libdir/FindBin.3 \ + $libdir/Getopt::Long.3 \ + $libdir/Getopt::Std.3 \ + $libdir/I18N::Collate.3 \ + $libdir/IO.3 \ + $libdir/IO::File.3 \ + $libdir/IO::Handle.3 \ + $libdir/IO::Pipe.3 \ + $libdir/IO::Seekable.3 \ + $libdir/IO::Select.3 \ + $libdir/IO::Socket.3 \ + $libdir/IPC::Open2.3 \ + $libdir/IPC::Open3.3 \ + $libdir/Math::BigFloat.3 \ + $libdir/Math::BigInt.3 \ + $libdir/Math::Complex.3 \ + $libdir/Math::Trig.3 \ + $libdir/Net::Ping.3 \ + $libdir/Net::hostent.3 \ + $libdir/Net::netent.3 \ + $libdir/Net::protoent.3 \ + $libdir/Net::servent.3 \ + $libdir/Opcode.3 \ + $libdir/POSIX.3 \ + $libdir/Pod::Text.3 \ + $libdir/Safe.3 \ + $libdir/Search::Dict.3 \ + $libdir/SelectSaver.3 \ + $libdir/SelfLoader.3 \ + $libdir/Shell.3 \ + $libdir/Socket.3 \ + $libdir/Symbol.3 \ + $libdir/Sys::Hostname.3 \ + $libdir/Sys::Syslog.3 \ + $libdir/Term::Cap.3 \ + $libdir/Term::Complete.3 \ + $libdir/Test::Harness.3 \ + $libdir/Text::Abbrev.3 \ + $libdir/Text::ParseWords.3 \ + $libdir/Text::Soundex.3 \ + $libdir/Text::Tabs.3 \ + $libdir/Tie::Hash.3 \ + $libdir/Tie::RefHash.3 \ + $libdir/Tie::Scalar.3 \ + $libdir/Tie::SubstrHash.3 \ + $libdir/Time::Local.3 \ + $libdir/Time::gmtime.3 \ + $libdir/Time::localtime.3 \ + $libdir/Time::tm.3 \ + $libdir/UNIVERSAL.3 \ + $libdir/User::grent.3 \ + $libdir/User::pwent.3 | \ +perl -ne 'map { -r && print "$_ " } split'` + +# Bypass internal shell buffer limit -- can't use case +if perl -e '$a = shift; exit($a =~ m|/|)' $toroff; then + echo "$me: empty file list -- did you run install?" >&2 + exit 1 +fi + +#psroff -t -man -rC1 -rD1 -rF1 > $tmp/PerlDoc.ps 2>$tmp/PerlTOC.raw +#nroff -man -rC1 -rD1 -rF1 > $tmp/PerlDoc.txt 2>$tmp/PerlTOC.nr.raw + +run="$cmd -rC1 -rD1 -rF1 >$tmp/PerlDoc.$ext 2>$tmp/PerlTOC.$ext.raw" +echo "$me: running $run" +eval $run $toroff +echo "$me: parsing TOC" +./rofftoc $tmp/PerlTOC.$ext.raw > $tmp/PerlTOC.tmp.man +run="$cmd $tmp/PerlTOC.tmp.man >$tmp/PerlTOC.$ext" +echo "$me: running $run" +eval $run +rm -f $tmp/PerlTOC.tmp.man $tmp/PerlTOC.$ext.raw +echo "$me: leaving you with $tmp/PerlDoc.$ext and $tmp/PerlTOC.$ext" + diff --git a/pod/rofftoc b/pod/rofftoc new file mode 100755 index 0000000000..a2d0e7ba20 --- /dev/null +++ b/pod/rofftoc @@ -0,0 +1,66 @@ +# feed this into perl + eval 'exec perl -S $0 ${1+"$@"}' + if $running_under_some_shell; + +# Usage: rofftoc PerlTOC.xxx.raw +# +# Post-processes roffitall output. Called from roffitall to produce +# a formatted table of contents. +# +# Author: Tom Christiansen + +print <<'EOF'; +.de NP +'.sp 0.8i +.tl ''- % -'' +'bp +'sp 0.5i +.tl ''\fB\s+2Perl Table of Contents\s0\fR'' +'sp 0.3i +.. +.wh -1i NP +.af % i +.sp 0.5i +.tl ''\fB\s+5Perl Table of Contents\s0\fR'' +.sp 0.5i +.nf +.na +EOF +while (<>) { + #chomp; + s/Index://; + ($type, $page, $desc) = split ' ', $_, 3; + $desc =~ s/^"(.*)"$/$1/; + if ($type eq 'Title') { + ($name = $desc) =~ s/ .*//; + next; + } elsif ($type eq 'Name') { + #print STDERR $page, "\t", $desc; + print ".ne 5\n"; + print ".in 0\n"; + print ".sp\n"; + print ".ft B\n"; + print "$desc\n"; + print ".ft P\n"; + print ".in 5n\n"; + } elsif ($type eq 'Header') { + print ".br\n", $page, "\t", $desc; + } elsif ($type eq 'Subsection') { + print ".br\n", $page, "\t\t", $desc; + } elsif ($type eq 'Item') { + next if $desc =~ /\\bu/; + next unless $name =~ /POSIX|func/i; + print ".br\n", $page, "\t\t\t", $desc; + } +} +__END__ +Index:Title 1 "PERL 1" +Index:Name 1 "perl - Practical Extraction and Report Language" +Index:Header 1 "NAME" +Index:Header 1 "SYNOPSIS" +Index:Header 2 "DESCRIPTION" +Index:Item 2 "\(bu Many usability enhancements" +Index:Item 2 "\(bu Simplified grammar" +Index:Item 2 "\(bu Lexical scoping" +Index:Item 2 "\(bu Arbitrarily nested data structures" +Index:Item 2 "\(bu Modularity and reusability" |