diff options
Diffstat (limited to 'pod')
-rw-r--r-- | pod/Makefile | 23 | ||||
-rw-r--r-- | pod/buildtoc | 2 | ||||
-rw-r--r-- | pod/perl.pod | 19 | ||||
-rw-r--r-- | pod/perlcall.pod | 86 | ||||
-rw-r--r-- | pod/perldata.pod | 4 | ||||
-rw-r--r-- | pod/perldiag.pod | 8 | ||||
-rw-r--r-- | pod/perldsc.pod | 10 | ||||
-rw-r--r-- | pod/perlembed.pod | 20 | ||||
-rw-r--r-- | pod/perlform.pod | 6 | ||||
-rw-r--r-- | pod/perlfunc.pod | 94 | ||||
-rw-r--r-- | pod/perlipc.pod | 94 | ||||
-rw-r--r-- | pod/perlmod.pod | 2 | ||||
-rw-r--r-- | pod/perlop.pod | 7 | ||||
-rw-r--r-- | pod/perlre.pod | 211 | ||||
-rw-r--r-- | pod/perlrun.pod | 20 | ||||
-rw-r--r-- | pod/perlstyle.pod | 7 | ||||
-rw-r--r-- | pod/perlsub.pod | 60 | ||||
-rw-r--r-- | pod/perlsyn.pod | 28 | ||||
-rw-r--r-- | pod/perltie.pod | 8 | ||||
-rw-r--r-- | pod/perltoc.pod | 2 | ||||
-rw-r--r-- | pod/perltrap.pod | 27 | ||||
-rw-r--r-- | pod/perlxs.pod | 333 | ||||
-rw-r--r-- | pod/perlxstut.pod | 595 | ||||
-rw-r--r-- | pod/pod2text.PL | 379 | ||||
-rwxr-xr-x[-rw-r--r--] | pod/splitpod | 0 |
25 files changed, 1244 insertions, 801 deletions
diff --git a/pod/Makefile b/pod/Makefile index 810dd9ea85..ebe4f172ce 100644 --- a/pod/Makefile +++ b/pod/Makefile @@ -1,4 +1,4 @@ -CONVERTERS = pod2html pod2latex pod2man +CONVERTERS = pod2html pod2latex pod2man pod2text all: $(CONVERTERS) man PERL = ../miniperl @@ -139,44 +139,44 @@ TEX = \ perlxs.tex \ perlxstut.tex -man: pod2man -I../lib $(MAN) +man: pod2man ($MAN) # pod2html normally runs on all the pods at once in order to build up # cross-references. html: pod2html - $(PERL) pod2html -I../lib $(POD) + $(PERL) -I../lib pod2html $(POD) tex: pod2latex $(TEX) .SUFFIXES: .pm .pod .man .pm.man: pod2man - $(PERL) pod2man -I../lib $*.pm >$*.man + $(PERL) -I../lib pod2man $*.pm >$*.man .pod.man: pod2man - $(PERL) pod2man -I../lib $*.pod >$*.man + $(PERL) -I../lib pod2man $*.pod >$*.man .SUFFIXES: .mp .pod .html .pm.html: pod2html - $(PERL) pod2html -I../lib $*.pod + $(PERL) -I../lib pod2html $*.pod .pod.html: pod2html - $(PERL) pod2html -I../lib $*.pod + $(PERL) -I../lib pod2html $*.pod .SUFFIXES: .pm .pod .tex .pod.tex: pod2latex - $(PERL) pod2latex -I../lib $*.pod + $(PERL) -I../lib pod2latex $*.pod .pm.tex: pod2latex - $(PERL) pod2latex -I../lib $*.pod + $(PERL) -I../lib pod2latex $*.pod clean: rm -f $(MAN) $(HTML) $(TEX) realclean: clean - rm -f pod2man pod2latex pod2html + rm -f $(CONVERTERS) distclean: realclean @@ -189,3 +189,6 @@ pod2html: pod2html.PL ../lib/Config.pm pod2man: pod2man.PL ../lib/Config.pm $(PERL) -I ../lib pod2man.PL + +pod2text: pod2text.PL ../lib/Config.pm + $(PERL) -I ../lib pod2text.PL diff --git a/pod/buildtoc b/pod/buildtoc index 77ddcd0ead..7a9657ae29 100644 --- a/pod/buildtoc +++ b/pod/buildtoc @@ -119,7 +119,7 @@ podset( @modules[ sort { $modname[$a] cmp $modname[$b] } 0 .. $#modules ] ); =head1 AUTHOR - Larry Wall E<lt><F<lwall\@netlabs.com>E<gt>, with the help of oodles + Larry Wall E<lt><F<lwall\@sems.com>E<gt>, with the help of oodles of other folks. diff --git a/pod/perl.pod b/pod/perl.pod index 5f3918c4c9..eb6ff63370 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -4,6 +4,17 @@ perl - Practical Extraction and Report Language =head1 SYNOPSIS +B<perl> [ B<-acdhnpPsSTuUvw> ] +[ S<B<-0>[I<octal>]> ] +[ S<B<-D>I<number/list>]> ] +[ S<B<-F>I<string>> ] +[ S<B<-i>[I<extension>]> ] +[ S<B<-I>I<dir>> ] +[ S<B<-l>I<octal>> ] +[ S<B<-x>I<dir>> ] +[ I<programfile> | S<B<-e> I<command>> ] +[ I<argument> ... ] + For ease of access, the Perl manual has been split up into a number of sections: @@ -82,7 +93,7 @@ grow as necessary to prevent degraded performance. Perl uses sophisticated pattern matching techniques to scan large amounts of data very quickly. Although optimized for scanning text, Perl can also deal with binary data, and can make dbm files look like associative -arrays (where dbm is available). Setuid Perl scripts are safer than +arrays. Setuid Perl scripts are safer than C programs through a dataflow tracing mechanism which prevents many stupid security holes. If you have a problem that would ordinarily use B<sed> or B<awk> or B<sh>, but it exceeds their capabilities or must @@ -140,7 +151,7 @@ A package can function as a class. Dynamic multiple inheritance and virtual methods are supported in a straightforward manner and with very little new syntax. Filehandles may now be treated as objects. -=item * Embeddible and Extensible +=item * Embeddable and Extensible Perl may now be embedded easily in your C or C++ application, and can either call or be called by your routines through a documented @@ -239,7 +250,7 @@ honest: =head1 AUTHOR -Larry Wall E<lt><F<lwall@netlabs.com>E<gt>, with the help of oodles of other folks. +Larry Wall E<lt><F<lwall@sems.com>E<gt>, with the help of oodles of other folks. =head1 FILES @@ -291,6 +302,8 @@ See the perl bugs database at F<http://perl.com/perl/bugs/>. You may mail your bug reports (be sure to include full configuration information as output by the myconfig program in the perl source tree) to F<perlbug@perl.com>. +If you've succeeded in compiling perl, the perlbug script in the utils/ +subdirectory can be used to help mail in a bug report. Perl actually stands for Pathologically Eclectic Rubbish Lister, but don't tell anyone I said that. diff --git a/pod/perlcall.pod b/pod/perlcall.pod index 50600f5d1c..996c9145d0 100644 --- a/pod/perlcall.pod +++ b/pod/perlcall.pod @@ -295,6 +295,37 @@ from the stack. See I<Using G_EVAL> for details of using G_EVAL. +=head2 G_KEEPERR + +You may have noticed that using the G_EVAL flag described above will +B<always> clear the C<$@> variable and set it to a string describing +the error iff there was an error in the called code. This unqualified +resetting of C<$@> can be problematic in the reliable identification of +errors using the C<eval {}> mechanism, because the possibility exists +that perl will call other code (end of block processing code, for +example) between the time the error causes C<$@> to be set within +C<eval {}>, and the subsequent statement which checks for the value of +C<$@> gets executed in the user's script. + +This scenario will mostly be applicable to code that is meant to be +called from within destructors, asynchronous callbacks, signal +handlers, C<__DIE__> or C<__WARN__> hooks, and C<tie> functions. In +such situations, you will not want to clear C<$@> at all, but simply to +append any new errors to any existing value of C<$@>. + +The G_KEEPERR flag is meant to be used in conjunction with G_EVAL in +I<perl_call_*> functions that are used to implement such code. This flag +has no effect when G_EVAL is not used. + +When G_KEEPERR is used, any errors in the called code will be prefixed +with the string "\t(in cleanup)", and appended to the current value +of C<$@>. + +The G_KEEPERR flag was introduced in Perl version 5.002. + +See I<Using G_KEEPERR> for an example of a situation that warrants the +use of this flag. + =head2 Determining the Context As mentioned above, you can determine the context of the currently @@ -892,7 +923,6 @@ and some C to call it { dSP ; int count ; - SV * sv ; ENTER ; SAVETMPS; @@ -907,10 +937,9 @@ and some C to call it SPAGAIN ; /* Check the eval first */ - sv = GvSV(gv_fetchpv("@", TRUE, SVt_PV)); - if (SvTRUE(sv)) + if (SvTRUE(GvSV(errgv))) { - printf ("Uh oh - %s\n", SvPV(sv, na)) ; + printf ("Uh oh - %s\n", SvPV(GvSV(errgv), na)) ; POPs ; } else @@ -950,10 +979,9 @@ I<Subtract>. The code - sv = GvSV(gv_fetchpv("@", TRUE, SVt_PV)); - if (SvTRUE(sv)) + if (SvTRUE(GvSV(errgv))) { - printf ("Uh oh - %s\n", SvPVx(sv, na)) ; + printf ("Uh oh - %s\n", SvPV(GvSV(errgv), na)) ; POPs ; } @@ -961,10 +989,14 @@ is the direct equivalent of this bit of Perl print "Uh oh - $@\n" if $@ ; +C<errgv> is a perl global of type C<GV *> that points to the +symbol table entry containing the error. C<GvSV(errgv)> therefore +refers to the C equivalent of C<$@>. + =item 3. Note that the stack is popped using C<POPs> in the block where -C<SvTRUE(sv)> is true. This is necessary because whenever a +C<SvTRUE(GvSV(errgv))> is true. This is necessary because whenever a I<perl_call_*> function invoked with G_EVAL|G_SCALAR returns an error, the top of the stack holds the value I<undef>. Since we want the program to continue after detecting this error, it is essential that @@ -973,6 +1005,39 @@ the stack is tidied up by removing the I<undef>. =back +=head2 Using G_KEEPERR + +Consider this rather facetious example, where we have used an XS +version of the call_Subtract example above inside a destructor: + + package Foo; + sub new { bless {}, $_[0] } + sub Subtract { + my($a,$b) = @_; + die "death can be fatal" if $a < $b ; + $a - $b; + } + sub DESTROY { call_Subtract(5, 4); } + sub foo { die "foo dies"; } + + package main; + eval { Foo->new->foo }; + print "Saw: $@" if $@; # should be, but isn't + +This example will fail to recognize that an error occurred inside the +C<eval {}>. Here's why: the call_Subtract code got executed while perl +was cleaning up temporaries when exiting the eval block, and since +call_Subtract is implemented with I<perl_call_pv> using the G_EVAL +flag, it promptly reset C<$@>. This results in the failure of the +outermost test for C<$@>, and thereby the failure of the error trap. + +Appending the G_KEEPERR flag, so that the I<perl_call_pv> call in +call_Subtract reads: + + count = perl_call_pv("Subtract", G_EVAL|G_SCALAR|G_KEEPERR); + +will preserve the error and restore reliable error handling. + =head2 Using perl_call_sv In all the previous examples I have 'hard-wired' the name of the Perl @@ -1829,8 +1894,9 @@ Paul Marquess <pmarquess@bfsec.bt.co.uk> Special thanks to the following people who assisted in the creation of the document. -Jeff Okamoto, Tim Bunce, Nick Gianniotis, Steve Kelem and Larry Wall. +Jeff Okamoto, Tim Bunce, Nick Gianniotis, Steve Kelem, Gurusamy Sarathy +and Larry Wall. =head1 DATE -Version 1.1, 17th May 1995 +Version 1.2, 16th Jan 1996 diff --git a/pod/perldata.pod b/pod/perldata.pod index 9b3798ffb1..90ac535e1c 100644 --- a/pod/perldata.pod +++ b/pod/perldata.pod @@ -122,7 +122,7 @@ declare a scalar variable to be of type "string", or of type "number", or type "filehandle", or anything else. Perl is a contextually polymorphic language whose scalars can be strings, numbers, or references (which includes objects). While strings and numbers are considered the pretty -much same thing for nearly all purposes, but references are strongly-typed +much same thing for nearly all purposes, references are strongly-typed uncastable pointers with built-in reference-counting and destructor invocation. @@ -315,7 +315,7 @@ first blank line--see the Merry Christmas example below.) The terminating string must appear by itself (unquoted and with no surrounding whitespace) on the terminating line. - print <<EOF; # same as above + print <<EOF; The price is $Price. EOF diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 83a30c3e1a..130bc8dca2 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -673,6 +673,14 @@ message indicates that such a conversion was attempted. of upgradability. Upgrading to undef indicates an error in the code calling sv_upgrade. +=item Can't use "my %s" in sort comparison + +(F) The global variables $a and $b are reserved for sort comparisons. +You mentioned $a or $b in the same line as the <=> or cmp operator, +and the variable had earlier been declared as a lexical variable. +Either qualify the sort variable with the package name, or rename the +lexical variable. + =item Can't use %s for loop variable (F) Only a simple scalar variable may be used as a loop variable on a foreach. diff --git a/pod/perldsc.pod b/pod/perldsc.pod index 258e9abe08..7e18e7405c 100644 --- a/pod/perldsc.pod +++ b/pod/perldsc.pod @@ -814,6 +814,16 @@ many different sorts: print "\n"; } +=head1 Database Ties + +You cannot easily tie a multilevel data structure (such as a hash of +hashes) to a dbm file. The first problem is that all but GDBM and +Berkeley DB have size limitations, but beyond that, you also have problems +with how references are to be represented on disk. One experimental +module that does attempt to partially address this need is the MLDBM +module. Check your nearest CPAN site as described in L<perlmod> for +source code to MLDBM. + =head1 SEE ALSO L<perlref>, L<perllol>, L<perldata>, L<perlobj> diff --git a/pod/perlembed.pod b/pod/perlembed.pod index c86f550f15..2f0e9c30fb 100644 --- a/pod/perlembed.pod +++ b/pod/perlembed.pod @@ -117,11 +117,11 @@ I<miniperlmain.c> containing the essentials of embedding: static PerlInterpreter *my_perl; /*** The Perl interpreter ***/ - int main(int argc, char **argv) + int main(int argc, char **argv, char **env) { my_perl = perl_alloc(); perl_construct(my_perl); - perl_parse(my_perl, NULL, argc, argv, (char **) NULL); + perl_parse(my_perl, NULL, argc, argv, env); perl_run(my_perl); perl_destruct(my_perl); perl_free(my_perl); @@ -164,12 +164,12 @@ That's shown below, in a program I'll call I<showtime.c>. static PerlInterpreter *my_perl; - int main(int argc, char **argv) + int main(int argc, char **argv, char **env) { my_perl = perl_alloc(); perl_construct(my_perl); - perl_parse(my_perl, NULL, argc, argv, (char **) NULL); + perl_parse(my_perl, NULL, argc, argv, env); /*** This replaces perl_run() ***/ perl_call_argv("showtime", G_DISCARD | G_NOARGS, argv); @@ -241,7 +241,7 @@ the first, a C<float> from the second, and a C<char *> from the third. perl_call_argv("_eval_", 0, argv); } - main (int argc, char **argv) + main (int argc, char **argv, char **env) { char *embedding[] = { "", "-e", "sub _eval_ { eval $_[0] }" }; STRLEN length; @@ -249,7 +249,7 @@ the first, a C<float> from the second, and a C<char *> from the third. my_perl = perl_alloc(); perl_construct( my_perl ); - perl_parse(my_perl, NULL, 3, embedding, (char **) NULL); + perl_parse(my_perl, NULL, 3, embedding, env); /** Treat $a as an integer **/ perl_eval("$a = 3; $a **= 2"); @@ -388,7 +388,7 @@ Here's a sample program, I<match.c>, that uses all three: return num_matches; } - main (int argc, char **argv) + main (int argc, char **argv, char **env) { char *embedding[] = { "", "-e", "sub _eval_ { eval $_[0] }" }; char *text, **matches; @@ -398,7 +398,7 @@ Here's a sample program, I<match.c>, that uses all three: my_perl = perl_alloc(); perl_construct( my_perl ); - perl_parse(my_perl, NULL, 3, embedding, (char **) NULL); + perl_parse(my_perl, NULL, 3, embedding, env); text = (char *) malloc(sizeof(char) * 486); /** A long string follows! **/ sprintf(text, "%s", "When he is at a convenience store and the bill comes to some amount like 76 cents, Maynard is aware that there is something he *should* do, something that will enable him to get back a quarter, but he has no idea *what*. He fumbles through his red squeezey changepurse and gives the boy three extra pennies with his dollar, hoping that he might luck into the correct amount. The boy gives him back two of his own pennies and then the big shiny quarter that is his prize. -RICHH"); @@ -517,7 +517,7 @@ deep breath... LEAVE; /* ...and the XPUSHed "mortal" args.*/ } - int main (int argc, char **argv) + int main (int argc, char **argv, char **env) { char *my_argv[2]; @@ -527,7 +527,7 @@ deep breath... my_argv[1] = (char *) malloc(10); sprintf(my_argv[1], "power.pl"); - perl_parse(my_perl, NULL, argc, my_argv, (char **) NULL); + perl_parse(my_perl, NULL, argc, my_argv, env); PerlPower(3, 4); /*** Compute 3 ** 4 ***/ diff --git a/pod/perlform.pod b/pod/perlform.pod index 3e5dd78466..cf0bc068f1 100644 --- a/pod/perlform.pod +++ b/pod/perlform.pod @@ -5,9 +5,9 @@ perlform - Perl formats =head1 DESCRIPTION Perl has a mechanism to help you generate simple reports and charts. To -facilitate this, Perl helps you lay out your output page in your code in a -fashion that's close to how it will look when it's printed. It can keep -track of things like how many lines on a page, what page you're, when to +facilitate this, Perl helps you code up your output page +close to how it will look when it's printed. It can keep +track of things like how many lines on a page, what page you're on, when to print page headers, etc. Keywords are borrowed from FORTRAN: format() to declare and write() to execute; see their entries in L<perlfunc>. Fortunately, the layout is much more legible, more like diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index fe661aac22..a8579106b5 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -119,7 +119,7 @@ pack, read, syscall, sysread, syswrite, unpack, vec =item Functions for filehandles, files, or directories -C<-X>, chdir, chmod, chown, chroot, fcntl, glob, ioctl, link, +-X, chdir, chmod, chown, chroot, fcntl, glob, ioctl, link, lstat, mkdir, open, opendir, readlink, rename, rmdir, stat, symlink, umask, unlink, utime @@ -1561,7 +1561,7 @@ or the undefined value if there is an error. Calls the System V IPC function msgsnd to send the message MSG to the message queue ID. MSG must begin with the long integer message type, -which may be created with C<pack("L", $type)>. Returns TRUE if +which may be created with C<pack("l", $type)>. Returns TRUE if successful, or FALSE if there is an error. =item msgrcv ID,VAR,SIZE,TYPE,FLAGS @@ -1647,7 +1647,6 @@ and those that don't is their text file formats. Systems like Unix and Plan9 that delimit lines with a single character, and that encode that character in C as '\n', do not need C<binmode>. The rest need it. - Examples: $ARTICLE = 100; @@ -1751,6 +1750,24 @@ Note: on any operation which may do a fork, unflushed buffers remain unflushed in both processes, which means you may need to set $| to avoid duplicate output. +Using the FileHandle constructor from the FileHandle package, +you can generate anonymous filehandles which have the scope of whatever +variables hold references to them, and automatically close whenever +and however you leave that scope: + + use FileHandle; + ... + sub read_myfile_munged { + my $ALL = shift; + my $handle = new FileHandle; + open($handle, "myfile") or die "myfile: $!"; + $first = <$handle> + or return (); # Automatically closed here. + mung $first or die "mung failed"; # Or here. + return $first, <$handle> if $ALL; # Or here. + $first; # Or here. + } + The filename that is passed to open will have leading and trailing whitespace deleted. In order to open a file with arbitrary weird characters in it, it's necessary to protect any leading and trailing @@ -1759,19 +1776,17 @@ whitespace thusly: $file =~ s#^(\s)#./$1#; open(FOO, "< $file\0"); -If you want a "real" C open() (see L<open(2)) on your system, then -you should probably use the POSIX::open() function as found in the L<POSIX> -documents. For example: +If you want a "real" C open() (see L<open(2)> on your system), then +you should use the sysopen() function. This is another way to +protect your filenames from interpretation. For example: use FileHandle; - use POSIX qw(:fcntl_h); - $fd = POSIX::open($path, O_RDWR|O_CREAT|O_EXCL, 0700); - die "POSIX::open $path: $!" unless defined $fd; - $fh = FileHandle->new_from_fd($fd, $amode) || die "fdopen: $!"; - $fh->autoflush(1); - $fh->print("stuff $$\n"); - seek($fh, 0, SEEK_SET); - print "File contains: ", <$fh>; + sysopen(HANDLE, $path, O_RDWR|O_CREAT|O_EXCL, 0700) + or die "sysopen $path: $!"; + HANDLE->autoflush(1); + HANDLE->print("stuff $$\n"); + seek(HANDLE, 0, 0); + print "File contains: ", <HANDLE>; See L</seek()> for some details about mixing reading and writing. @@ -2326,10 +2341,13 @@ The usual idiom is: ($nfound,$timeleft) = select($rout=$rin, $wout=$win, $eout=$ein, $timeout); -or to block until something becomes ready: +or to block until something becomes ready just do this $nfound = select($rout=$rin, $wout=$win, $eout=$ein, undef); +Most systems do not both to return anything useful in $timeleft, so +calling select() in a scalar context just returns $nfound. + Any of the bitmasks can also be undef. The timeout, if specified, is in seconds, which may be fractional. Note: not all implementations are capable of returning the $timeleft. If not, they always return @@ -2525,6 +2543,10 @@ Examples: } @sortedclass = sort byage @class; + # this sorts the %age associative arrays by value + # instead of key using an inline function + @eldest = sort { $age{$b} <=> $age{$a} } keys %age; + sub backwards { $b cmp $a; } @harry = ('dog','cat','x','Cain','Abel'); @george = ('gone','chased','yz','Punished','Axed'); @@ -2732,6 +2754,25 @@ the stat fails. Typically used as follows: $atime,$mtime,$ctime,$blksize,$blocks) = stat($filename); +Not all fields are supported on all filesystem types. Here are the +meaning of the fields: + + dev device number of filesystem + ino inode number + mode file mode (type and permissions) + nlink number of (hard) links to the file + uid numeric user ID of file's owner + gid numer group ID of file's owner + rdev the device identifier (special files only) + size total size of file, in bytes + atime last access time since the epoch + mtime last modify time since the epoch + ctime inode change time (NOT creation type!) since the epoch + blksize preferred blocksize for file system I/O + blocks actual number of blocks allocated + +(The epoch was at 00:00 January 1, 1970 GMT.) + If stat is passed the special filehandle consisting of an underline, no stat is done, but the current contents of the stat structure from the last stat or filetest are returned. Example: @@ -2858,6 +2899,27 @@ like numbers. Note that Perl only supports passing of up to 14 arguments to your system call, which in practice should usually suffice. +=item sysopen FILEHANDLE,FILENAME,MODE + +=item sysopen FILEHANDLE,FILENAME,MODE,PERMS + +Opens the file whose filename is given by FILENAME, and associates it +with FILEHANDLE. If FILEHANDLE is an expression, its value is used as +the name of the real filehandle wanted. This function calls the +underlying operating system's C<open> function with the parameters +FILENAME, MODE, PERMS. + +The possible values and flag bits of the MODE parameter are +system-dependent; they are available via the standard module C<Fcntl>. +However, for historical reasons, some values are universal: zero means +read-only, one means write-only, and two means read/write. + +If the file named by FILENAME does not exist and the C<open> call +creates it (typically because MODE includes the O_CREAT flag), then +the value of PERMS specifies the permissions of the newly created +file. If PERMS is omitted, the default value is 0666, which allows +read and write for all. This default is reasonable: see C<umask>. + =item sysread FILEHANDLE,SCALAR,LENGTH,OFFSET =item sysread FILEHANDLE,SCALAR,LENGTH @@ -3151,7 +3213,7 @@ Returns a normal array consisting of all the values of the named associative array. (In a scalar context, returns the number of values.) The values are returned in an apparently random order, but it is the same order as either the keys() or each() function would produce -on the same array. See also keys() and each(). +on the same array. See also keys(), each(), and sort(). =item vec EXPR,OFFSET,BITS diff --git a/pod/perlipc.pod b/pod/perlipc.pod index 1a3bdad77f..ac2c5fd584 100644 --- a/pod/perlipc.pod +++ b/pod/perlipc.pod @@ -273,7 +273,7 @@ you opened whatever your kid writes to his STDOUT. my $sleep_count = 0; do { - $pid = open(KID, "-|"); + $pid = open(KID_TO_WRITE, "|-"); unless (defined $pid) { warn "cannot fork: $!"; die "bailing out" if $sleep_count++ > 6; @@ -282,8 +282,8 @@ you opened whatever your kid writes to his STDOUT. } until defined $pid; if ($pid) { # parent - print KID @some_data; - close(KID) || warn "kid exited $?"; + print KID_TO_WRITE @some_data; + close(KID_TO_WRITE) || warn "kid exited $?"; } else { # child ($EUID, $EGID) = ($UID, $GID); # suid progs only open (FILE, "> /safe/file") @@ -303,13 +303,13 @@ your arguments. Instead, use lower-level control to call exec() directly. Here's a safe backtick or pipe open for read: # add error processing as above - $pid = open(KID, "-|"); + $pid = open(KID_TO_READ, "-|"); if ($pid) { # parent - while (<KID>) { + while (<KID_TO_READ>) { # do something interesting } - close(KID) || warn "kid exited $?"; + close(KID_TO_READ) || warn "kid exited $?"; } else { # child ($EUID, $EGID) = ($UID, $GID); # suid only @@ -322,14 +322,14 @@ Here's a safe backtick or pipe open for read: And here's a safe pipe open for writing: # add error processing as above - $pid = open(KID, "|-"); + $pid = open(KID_TO_WRITE, "|-"); $SIG{ALRM} = sub { die "whoops, $program pipe broke" }; if ($pid) { # parent for (@data) { - print KID; + print KID_TO_WRITE; } - close(KID) || warn "kid exited $?"; + close(KID_TO_WRITE) || warn "kid exited $?"; } else { # child ($EUID, $EGID) = ($UID, $GID); @@ -349,9 +349,9 @@ While this works reasonably well for unidirectional communication, what about bidirectional communication? The obvious thing you'd like to do doesn't actually work: - open(KID, "| some program |") + open(PROG_FOR_READING_AND_WRITING, "| some program |") -and if you forgot to use the B<-w> flag, then you'll miss out +and if you forget to use the B<-w> flag, then you'll miss out entirely on the diagnostic message: Can't do bidirectional pipe at -e line 1. @@ -458,7 +458,50 @@ Here's a sample TCP client using Internet-domain sockets: And here's a corresponding server to go along with it. We'll leave the address as INADDR_ANY so that the kernel can choose -the appropriate interface on multihomed hosts: +the appropriate interface on multihomed hosts. If you want sit +on a particular interface (like the external side of a gateway +or firewall machine), you should fill this in with your real address +instead. + + #!/usr/bin/perl -Tw + require 5.002; + use strict; + BEGIN { $ENV{PATH} = '/usr/ucb:/bin' } + use Socket; + use Carp; + + sub logmsg { print "$0 $$: @_ at ", scalar localtime, "\n" } + + my $port = shift || 2345; + my $proto = getprotobyname('tcp'); + socket(Server, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; + setsockopt(Server, SOL_SOCKET, SO_REUSEADDR, + pack("l", 1)) || die "setsockopt: $!"; + bind(Server, sockaddr_in($port, INADDR_ANY)) || die "bind: $!"; + listen(Server,SOMAXCONN) || die "listen: $!"; + + logmsg "server started on port $port"; + + my $paddr; + + $SIG{CHLD} = \&REAPER; + + for ( ; $paddr = accept(Client,Server); close Client) { + my($port,$iaddr) = sockaddr_in($paddr); + my $name = gethostbyaddr($iaddr,AF_INET); + + logmsg "connection from $name [", + inet_ntoa($iaddr), "] + at port $port"; + + print CLIENT "Hello there, $name, it's now ", + scalar localtime, "\n"; + } + +And here's a multithreaded version. It's multithreaded in that +like most typical servers, it spawns (forks) a slave server to +handle the client request so that the master server can quickly +go back to service a new client. #!/usr/bin/perl -Tw require 5.002; @@ -472,10 +515,11 @@ the appropriate interface on multihomed hosts: my $port = shift || 2345; my $proto = getprotobyname('tcp'); - socket(SERVER, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; - setsockopt(SERVER, SOL_SOCKET, SO_REUSEADDR, 1) || die "setsockopt: $!"; - bind(SERVER, sockaddr_in($port, INADDR_ANY)) || die "bind: $!"; - listen(SERVER,5) || die "listen: $!"; + socket(Server, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; + setsockopt(Server, SOL_SOCKET, SO_REUSEADDR, + pack("l", 1)) || die "setsockopt: $!"; + bind(Server, sockaddr_in($port, INADDR_ANY)) || die "bind: $!"; + listen(Server,SOMAXCONN) || die "listen: $!"; logmsg "server started on port $port"; @@ -491,8 +535,8 @@ the appropriate interface on multihomed hosts: $SIG{CHLD} = \&REAPER; for ( $waitedpid = 0; - ($paddr = accept(CLIENT,SERVER)) || $waitedpid; - $waitedpid = 0, close CLIENT) + ($paddr = accept(Client,Server)) || $waitedpid; + $waitedpid = 0, close Client) { next if $waitedpid; my($port,$iaddr) = sockaddr_in($paddr); @@ -527,8 +571,8 @@ the appropriate interface on multihomed hosts: } # else i'm the child -- go spawn - open(STDIN, "<&CLIENT") || die "can't dup client to stdin"; - open(STDOUT, ">&CLIENT") || die "can't dup client to stdout"; + open(STDIN, "<&Client") || die "can't dup client to stdin"; + open(STDOUT, ">&Client") || die "can't dup client to stdout"; ## open(STDERR, ">&STDOUT") || die "can't dup stdout to stderr"; exit &$coderef(); } @@ -628,18 +672,18 @@ And here's a corresponding server. my $uaddr = sockaddr_un($NAME); my $proto = getprotobyname('tcp'); - socket(SERVER,PF_UNIX,SOCK_STREAM,0) || die "socket: $!"; + socket(Server,PF_UNIX,SOCK_STREAM,0) || die "socket: $!"; unlink($NAME); - bind (SERVER, $uaddr) || die "bind: $!"; - listen(SERVER,5) || die "listen: $!"; + bind (Server, $uaddr) || die "bind: $!"; + listen(Server,SOMAXCONN) || die "listen: $!"; logmsg "server started on $NAME"; $SIG{CHLD} = \&REAPER; for ( $waitedpid = 0; - accept(CLIENT,SERVER) || $waitedpid; - $waitedpid = 0, close CLIENT) + accept(Client,Server) || $waitedpid; + $waitedpid = 0, close Client) { next if $waitedpid; logmsg "connection on $NAME"; diff --git a/pod/perlmod.pod b/pod/perlmod.pod index 0328bd5d9d..7a8431b15b 100644 --- a/pod/perlmod.pod +++ b/pod/perlmod.pod @@ -190,7 +190,7 @@ For more on this, see L<perlobj>. =head2 Perl Modules -A module is a just package that is defined in a library file of +A module is just a package that is defined in a library file of the same name, and is designed to be reusable. It may do this by providing a mechanism for exporting some of its symbols into the symbol table of any package using it. Or it may function as a class diff --git a/pod/perlop.pod b/pod/perlop.pod index 13655a7d9c..d96afc55a2 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -8,7 +8,8 @@ Perl operators have the following associativity and precedence, listed from highest precedence to lowest. Note that all operators borrowed from C keep the same precedence relationship with each other, even where C's precedence is slightly screwy. (This makes learning -Perl easier for C folks.) +Perl easier for C folks.) With very few exceptions, these all +operate on scalar values only, not array values. left terms and list operators (leftward) left -> @@ -88,7 +89,7 @@ well as subroutine and method calls, and the anonymous constructors C<[]> and C<{}>. See also L<Quote and Quotelike Operators> toward the end of this section, -as well as L<I/O Operators>. +as well as L<"I/O Operators">. =head2 The Arrow Operator @@ -157,7 +158,7 @@ thing from interpretation. =head2 Binding Operators -Binary "=~" binds an expression to a pattern match. Certain operations +Binary "=~" binds a scalar expression to a pattern match. Certain operations search or modify the string $_ by default. This operator makes that kind of operation work on some other string. The right argument is a search pattern, substitution, or translation. The left argument is what is diff --git a/pod/perlre.pod b/pod/perlre.pod index 014ee3c818..1c7855c041 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -16,7 +16,7 @@ the regular expression inside. These are: i Do case-insensitive pattern matching. m Treat string as multiple lines. s Treat string as single line. - x Extend your pattern's legibilty with whitespace and comments. + x Extend your pattern's legibility with whitespace and comments. These are usually written as "the C</x> modifier", even though the delimiter in question might not actually be a slash. In fact, any of these @@ -46,7 +46,7 @@ meanings: \ Quote the next metacharacter ^ Match the beginning of the line . Match any character (except newline) - $ Match the end of the line + $ Match the end of the line (or before newline at the end) | Alternation () Grouping [] Character class @@ -80,7 +80,7 @@ The following standard quantifiers are recognized: (If a curly bracket occurs in any other context, it is treated as a regular character.) The "*" modifier is equivalent to C<{0,}>, the "+" modifier to C<{1,}>, and the "?" modifier to C<{0,1}>. n and m are limited -to integral values less than 65536. +to integral values less than 65536. By default, a quantified subpattern is "greedy", that is, it will match as many times as possible without causing the rest pattern not to match. The @@ -136,7 +136,7 @@ Perl defines the following zero-width assertions: \b Match a word boundary \B Match a non-(word boundary) \A Match only at beginning of string - \Z Match only at end of string + \Z Match only at end of string (or before newline at the end) \G Match only where previous m//g left off A word boundary (C<\b>) is defined as a spot between two characters that @@ -146,7 +146,8 @@ end of the string as matching a C<\W>. (Within character classes C<\b> represents backspace rather than a word boundary.) The C<\A> and C<\Z> are just like "^" and "$" except that they won't match multiple times when the C</m> modifier is used, while "^" and "$" will match at every internal line -boundary. +boundary. To match the actual end of the string, not ignoring newline, +you can use C<\Z(?!\n)>. When the bracketing construct C<( ... )> is used, \<digit> matches the digit'th substring. Outside of the pattern, always use "$" instead of "\" @@ -162,7 +163,7 @@ You may have as many parentheses as you wish. If you have more than 9 substrings, the variables $10, $11, ... refer to the corresponding substring. Within the pattern, \10, \11, etc. refer back to substrings if there have been at least that many left parens before -the backreference. Otherwise (for backward compatibilty) \10 is the +the backreference. Otherwise (for backward compatibility) \10 is the same as \010, a backspace, and \11 the same as \011, a tab. And so on. (\1 through \9 are always backreferences.) @@ -192,7 +193,7 @@ non-alphanumeric characters: You can also use the built-in quotemeta() function to do this. An even easier way to quote metacharacters right in the match operator -is to say +is to say /$unquoted\Q$quoted\E$unquoted/ @@ -237,10 +238,10 @@ the C<(?!foo)> is just saying that the next thing cannot be "foo"--and it's not, it's a "bar", so "foobar" will match. You would have to do something like C</(?foo)...bar/> for that. We say "like" because there's the case of your "bar" not having three characters before it. You could -cover that this way: C</(?:(?!foo)...|^..?)bar/>. Sometimes it's still +cover that this way: C</(?:(?!foo)...|^..?)bar/>. Sometimes it's still easier just to say: - if (/foo/ && $` =~ /bar$/) + if (/foo/ && $` =~ /bar$/) =item (?imsx) @@ -252,12 +253,12 @@ insensitive ones merely need to include C<(?i)> at the front of the pattern. For example: $pattern = "foobar"; - if ( /$pattern/i ) + if ( /$pattern/i ) # more flexible: $pattern = "(?i)foobar"; - if ( /$pattern/ ) + if ( /$pattern/ ) =back @@ -266,6 +267,192 @@ matching construct was because 1) question mark is pretty rare in older regular expressions, and 2) whenever you see one, you should stop and "question" exactly what is going on. That's psychology... +=head2 Backtracking + +A fundamental feature of regular expression matching involves the notion +called I<backtracking>. which is used (when needed) by all regular +expression quantifiers, namely C<*>, C<*?>, C<+>, C<+?>, C<{n,m}>, and +C<{n,m}?>. + +For a regular expression to match, the I<entire> regular expression must +match, not just part of it. So if the beginning of a pattern containing a +quantifier succeeds in a way that causes later parts in the pattern to +fail, the matching engine backs up and recalculates the beginning +part--that's why it's called backtracking. + +Here is an example of backtracking: Let's say you want to find the +word following "foo" in the string "Food is on the foo table.": + + $_ = "Food is on the foo table."; + if ( /\b(foo)\s+(\w+)/i ) { + print "$2 follows $1.\n"; + } + +When the match runs, the first part of the regular expression (C<\b(foo)>) +finds a possible match right at the beginning of the string, and loads up +$1 with "Foo". However, as soon as the matching engine sees that there's +no whitespace following the "Foo" that it had saved in $1, it realizes its +mistake and starts over again one character after where it had had the +tentative match. This time it goes all the way until the next occurrence +of "foo". The complete regular expression matches this time, and you get +the expected output of "table follows foo." + +Sometimes minimal matching can help a lot. Imagine you'd like to match +everything between "foo" and "bar". Initially, you write something +like this: + + $_ = "The food is under the bar in the barn."; + if ( /foo(.*)bar/ ) { + print "got <$1>\n"; + } + +Which perhaps unexpectedly yields: + + got <d is under the bar in the > + +That's because C<.*> was greedy, so you get everything between the +I<first> "foo" and the I<last> "bar". In this case, it's more effective +to use minimal matching to make sure you get the text between a "foo" +and the first "bar" thereafter. + + if ( /foo(.*?)bar/ ) { print "got <$1>\n" } + got <d is under the > + +Here's another example: let's say you'd like to match a number at the end +of a string, and you also want to keep the preceding part the match. +So you write this: + + $_ = "I have 2 numbers: 53147"; + if ( /(.*)(\d*)/ ) { # Wrong! + print "Beginning is <$1>, number is <$2>.\n"; + } + +That won't work at all, because C<.*> was greedy and gobbled up the +whole string. As C<\d*> can match on an empty string the complete +regular expression matched successfully. + + Beginning is <I have 2: 53147>, number is <>. + +Here are some variants, most of which don't work: + + $_ = "I have 2 numbers: 53147"; + @pats = qw{ + (.*)(\d*) + (.*)(\d+) + (.*?)(\d*) + (.*?)(\d+) + (.*)(\d+)$ + (.*?)(\d+)$ + (.*)\b(\d+)$ + (.*\D)(\d+)$ + }; + + for $pat (@pats) { + printf "%-12s ", $pat; + if ( /$pat/ ) { + print "<$1> <$2>\n"; + } else { + print "FAIL\n"; + } + } + +That will print out: + + (.*)(\d*) <I have 2 numbers: 53147> <> + (.*)(\d+) <I have 2 numbers: 5314> <7> + (.*?)(\d*) <> <> + (.*?)(\d+) <I have > <2> + (.*)(\d+)$ <I have 2 numbers: 5314> <7> + (.*?)(\d+)$ <I have 2 numbers: > <53147> + (.*)\b(\d+)$ <I have 2 numbers: > <53147> + (.*\D)(\d+)$ <I have 2 numbers: > <53147> + +As you see, this can be a bit tricky. It's important to realize that a +regular expression is merely a set of assertions that gives a definition +of success. There may be 0, 1, or several different ways that the +definition might succeed against a particular string. And if there are +multiple ways it might succeed, you need to understand backtracking in +order to know which variety of success you will achieve. + +When using lookahead assertions and negations, this can all get even +tricker. Imagine you'd like to find a sequence of nondigits not +followed by "123". You might try to write that as + + $_ = "ABC123"; + if ( /^\D*(?!123)/ ) { # Wrong! + print "Yup, no 123 in $_\n"; + } + +But that isn't going to match; at least, not the way you're hoping. It +claims that there is no 123 in the string. Here's a clearer picture of +why it that pattern matches, contrary to popular expectations: + + $x = 'ABC123' ; + $y = 'ABC445' ; + + print "1: got $1\n" if $x =~ /^(ABC)(?!123)/ ; + print "2: got $1\n" if $y =~ /^(ABC)(?!123)/ ; + + print "3: got $1\n" if $x =~ /^(\D*)(?!123)/ ; + print "4: got $1\n" if $y =~ /^(\D*)(?!123)/ ; + +This prints + + 2: got ABC + 3: got AB + 4: got ABC + +You might have expected test 3 to fail because it just seems to a more +general purpose version of test 1. The important difference between +them is that test 3 contains a quantifier (C<\D*>) and so can use +backtracking, whereas test 1 will not. What's happening is +that you've asked "Is it true that at the start of $x, following 0 or more +nondigits, you have something that's not 123?" If the pattern matcher had +let C<\D*> expand to "ABC", this would have caused the whole pattern to +fail. +The search engine will initially match C<\D*> with "ABC". Then it will +try to match C<(?!123> with "123" which, of course, fails. But because +a quantifier (C<\D*>) has been used in the regular expression, the +search engine can backtrack and retry the match differently +in the hope of matching the complete regular expression. + +Well now, +the pattern really, I<really> wants to succeed, so it uses the +standard regexp backoff-and-retry and lets C<\D*> expand to just "AB" this +time. Now there's indeed something following "AB" that is not +"123". It's in fact "C123", which suffices. + +We can deal with this by using both an assertion and a negation. We'll +say that the first part in $1 must be followed by a digit, and in fact, it +must also be followed by something that's not "123". Remember that the +lookaheads are zero-width expressions--they only look, but don't consume +any of the string in their match. So rewriting this way produces what +you'd expect; that is, case 5 will fail, but case 6 succeeds: + + print "5: got $1\n" if $x =~ /^(\D*)(?=\d)(?!123)/ ; + print "6: got $1\n" if $y =~ /^(\D*)(?=\d)(?!123)/ ; + + 6: got ABC + +In other words, the two zero-width assertions next to each other work like +they're ANDed together, just as you'd use any builtin assertions: C</^$/> +matches only if you're at the beginning of the line AND the end of the +line simultaneously. The deeper underlying truth is that juxtaposition in +regular expressions always means AND, except when you write an explicit OR +using the vertical bar. C</ab/> means match "a" AND (then) match "b", +although the attempted matches are made at different positions because "a" +is not a zero-width assertion, but a one-width assertion. + +One warning: particularly complicated regular expressions can take +exponential time to solve due to the immense number of possible ways they +can use backtracking to try match. For example this will take a very long +time to run + + /((a{0,5}){0,5}){0,5}/ + +And if you used C<*>'s instead of limiting it to 0 through 5 matches, then +it would take literally forever--or until you ran out of stack space. + =head2 Version 8 Regular Expressions In case you're not familiar with the "regular" Version 8 regexp @@ -309,7 +496,7 @@ matching C<[feio|]>. Within a pattern, you may designate subpatterns for later reference by enclosing them in parentheses, and you may refer back to the I<n>th -subpattern later in the pattern using the metacharacter \I<n>. +subpattern later in the pattern using the metacharacter \I<n>. Subpatterns are numbered based on the left to right order of their opening parenthesis. Note that a backreference matches whatever actually matched the subpattern in the string being examined, not the diff --git a/pod/perlrun.pod b/pod/perlrun.pod index 1e1a0cb814..d684bf908e 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -4,7 +4,10 @@ perlrun - how to execute the Perl interpreter =head1 SYNOPSIS -B<perl> [switches] filename args +B<perl> [B<-acdhnpPsSTuUvw>] [B<-0[octal>]] [B<-D[number/list]>] + [B<-F regexp>] [B<-i[extension>]] [B<-I<lt>dir<gt>>] + [B<-l[octal]>] [B<-x[dir]>] + [programfile | B<-e command>] [argument ...] =head1 DESCRIPTION @@ -246,14 +249,21 @@ separator if the B<-l> switch is followed by a B<-0> switch: This sets $\ to newline and then sets $/ to the null character. +=item B<-m>I<module> + =item B<-M>I<module> -executes C<use> I<module> C<;> before executing your script. You can -also do C<-M'Foo qw(Bar Baz)'>. +C<-m>I<module> executes C<use> I<module> C<();> before executing your +script. -=item B<-m>I<module> +C<-M>I<module> executes C<use> I<module> C<;> before executing your +script. You can use quotes to add extra code after the module name, +e.g., C<-M'module qw(foo bar)'>. -executes C<use> I<module> C<();> before executing your script. +A little built-in syntactic sugar means you can also say +C<-mmodule=foo> or C<-Mmodule=foo> as a shortcut for +C<-M'module qw(foo)'>. Note that using the C<=> form +removes the distinction between -m and -M. =item B<-n> diff --git a/pod/perlstyle.pod b/pod/perlstyle.pod index 8bc269de8a..e4a5aab41f 100644 --- a/pod/perlstyle.pod +++ b/pod/perlstyle.pod @@ -159,6 +159,13 @@ previous example. =item * +Avoid using grep() (or map()) or `backticks` in a void context, that is, +when you just throw away their return values. Those functions all +have return values, so use them. Otherwise use a foreach() loop or +the system() function instead. + +=item * + For portability, when using features that may not be implemented on every machine, test the construct in an eval to see if it fails. If you know what version or patchlevel a particular feature was diff --git a/pod/perlsub.pod b/pod/perlsub.pod index a893ff5478..80d02d1ca5 100644 --- a/pod/perlsub.pod +++ b/pod/perlsub.pod @@ -32,7 +32,8 @@ Like many languages, Perl provides for user-defined subroutines. These may be located anywhere in the main program, loaded in from other files via the C<do>, C<require>, or C<use> keywords, or even generated on the fly using C<eval> or anonymous subroutines (closures). You can even call -a function indirectly using a variable containing its name or a CODE reference. +a function indirectly using a variable containing its name or a CODE reference +to it, as in C<$var = \&function>. The Perl model for function call and return values is simple: all functions are passed as parameters one single flat list of scalars, and @@ -126,7 +127,8 @@ of changing them in place: sub upcase { my @parms = @_; for (@parms) { tr/a-z/A-Z/ } - return @parms; + # wantarray checks if we were called in list context + return wantarray ? @parms : $parms[0]; } Notice how this (unprototyped) function doesn't care whether it was passed @@ -170,6 +172,11 @@ new users may wish to avoid. &foo; # foo() get current args, like foo(@_) !! foo; # like foo() IFF sub foo pre-declared, else "foo" +Not only does the "&" form make the argument list optional, but it also +disables any prototype checking on the arguments you do provide. This +is partly for historical reasons, and partly for having a convenient way +to cheat if you know what you're doing. See the section on Prototypes below. + =head2 Private Variables via my() Synopsis: @@ -450,7 +457,8 @@ the individual arrays. For more on typeglobs, see L<perldata/"Typeglobs">. If you want to pass more than one array or hash into a function--or return them from it--and have them maintain their integrity, then you're going to have to use an explicit pass-by-reference. -Before you do that, you need to understand references; see L<perlref>. +Before you do that, you need to understand references as detailed in L<perlref>. +This section may not make much sense to you otherwise. Here are a few simple examples. First, let's pass in several arrays to a function and have it pop all of then, return a new @@ -509,7 +517,7 @@ in order of how many elements they have in them: if (@$cref > @$dref) { return ($cref, $dref); } else { - return ($cref, $cref); + return ($dref, $cref); } } @@ -564,13 +572,23 @@ As of the 5.002 release of perl, if you declare sub mypush (\@@) -then mypush() takes arguments exactly like push() does. (This only works -for function calls that are visible at compile time, not indirect function -calls through a C<&$func> reference nor for method calls as described in -L<perlobj>.) +then mypush() takes arguments exactly like push() does. The declaration +of the function to be called must be visible at compile time. The prototype +only affects the interpretation of new-style calls to the function, where +new-style is defined as not using the C<&> character. In other words, +if you call it like a builtin function, then it behaves like a builtin +function. If you call it like an old-fashioned subroutine, then it +behaves like an old-fashioned subroutine. It naturally falls out from +this rule that prototypes have no influence on subroutine references +like C<\&foo> or on indirect subroutine calls like C<&{$subref}>. + +Method calls are not influenced by prototypes either, because the +function to be called is indeterminate at compile time, since it depends +on inheritance. -Here are the prototypes for some other functions that parse almost exactly -like the corresponding builtins. +Since the intent is primarily to let you define subroutines that work +like builtin commands, here are the prototypes for some other functions +that parse almost exactly like the corresponding builtins. Declared as Called as @@ -589,15 +607,21 @@ like the corresponding builtins. sub myrand ($) myrand 42 sub mytime () mytime -Any backslashed prototype character must be passed something starting -with that character. Any unbackslashed @ or % eats all the rest of the -arguments, and forces list context. An argument represented by $ -forces scalar context. An & requires an anonymous subroutine, and * -does whatever it has to do to turn the argument into a reference to a -symbol table entry. A semicolon separates mandatory arguments from -optional arguments. +Any backslashed prototype character represents an actual argument +that absolutely must start with that character. + +Unbackslashed prototype characters have special meanings. Any +unbackslashed @ or % eats all the rest of the arguments, and forces +list context. An argument represented by $ forces scalar context. An +& requires an anonymous subroutine, which, if passed as the first +argument, does not require the "sub" keyword or a subsequent comma. A +* does whatever it has to do to turn the argument into a reference to a +symbol table entry. + +A semicolon separates mandatory arguments from optional arguments. +(It is redundant before @ or %.) -Note that the last three are syntactically distinguished by the lexer. +Note how the last three examples above are treated specially by the parser. mygrep() is parsed as a true list operator, myrand() is parsed as a true unary operator with unary precedence the same as rand(), and mytime() is truly argumentless, just like time(). That is, if you diff --git a/pod/perlsyn.pod b/pod/perlsyn.pod index e41caee3ec..037ede1099 100644 --- a/pod/perlsyn.pod +++ b/pod/perlsyn.pod @@ -39,9 +39,9 @@ as the my if you expect to to be able to access those private variables. Declaring a subroutine allows a subroutine name to be used as if it were a list operator from that point forward in the program. You can declare a -subroutine without defining it by saying just +subroutine (prototyped to take one scalar parameter) without defining it by saying just: - sub myname; + sub myname ($); $me = myname $0 or die "can't get myname"; Note that it functions as a list operator though, not as a unary @@ -316,17 +316,19 @@ do it: See how much easier this is? It's cleaner, safer, and faster. It's cleaner because it's less noisy. It's safer because if code gets added -between the inner and outer loops later, you won't accidentally excecute -it because you've explicitly asked to iterate the other loop rather than -merely terminating the inner one. And it's faster because Perl executes a -C<foreach> statement more rapidly than it would the equivalent C<for> -loop. +between the inner and outer loops later on, the new code won't be +accidentally excecuted: the C<next> explicitly iterates the other loop +rather than merely terminating the inner one. And it's faster because +Perl executes a C<foreach> statement more rapidly than it would the +equivalent C<for> loop. =head2 Basic BLOCKs and Switch Statements A BLOCK by itself (labeled or not) is semantically equivalent to a loop that executes once. Thus you can use any of the loop control -statements in it to leave or restart the block. The C<continue> block +statements in it to leave or restart the block. (Note that this +is I<NOT> true in C<eval{}>, C<sub{}>, or contrary to popular belief C<do{}> blocks, +which do I<NOT> count as loops.) The C<continue> block is optional. The BLOCK construct is particularly nice for doing case @@ -419,10 +421,10 @@ for a C<do> block to return the proper value: $amode = do { if ($flag & O_RDONLY) { "r" } - elsif ($flag & O_WRONLY) { ($flag & O_APPEND) ? "w" : "a" } + elsif ($flag & O_WRONLY) { ($flag & O_APPEND) ? "a" : "w" } elsif ($flag & O_RDWR) { if ($flag & O_CREAT) { "w+" } - else { ($flag & O_APPEND) ? "r+" : "a+" } + else { ($flag & O_APPEND) ? "a+" : "r+" } } }; @@ -456,15 +458,15 @@ pretend that the other subroutine had been called in the first place propagated to the other subroutine.) After the C<goto>, not even caller() will be able to tell that this routine was called first. -In almost cases like this, it's usually a far, far better idea to use the -structured control flow mechanisms of C<next>, C<last>, or C<redo> insetad +In almost all cases like this, it's usually a far, far better idea to use the +structured control flow mechanisms of C<next>, C<last>, or C<redo> instead of resorting to a C<goto>. For certain applications, the catch and throw pair of C<eval{}> and die() for exception processing can also be a prudent approach. =head2 PODs: Embedded Documentation Perl has a mechanism for intermixing documentation with source code. -If while expecting the beginning of a new statement, the compiler +While it's expecting the beginning of a new statement, if the compiler encounters a line that begins with an equal sign and a word, like this =head1 Here There Be Pods! diff --git a/pod/perltie.pod b/pod/perltie.pod index 7898700f39..ad5d66ff6e 100644 --- a/pod/perltie.pod +++ b/pod/perltie.pod @@ -604,6 +604,14 @@ for the C<$#ARRAY> access (which is hard, as it's an lvalue), as well as the other obvious array functions, like push(), pop(), shift(), unshift(), and splice(). +You cannot easily tie a multilevel data structure (such as a hash of +hashes) to a dbm file. The first problem is that all but GDBM and +Berkeley DB have size limitations, but beyond that, you also have problems +with how references are to be represented on disk. One experimental +module that does attempt to partially address this need is the MLDBM +module. Check your nearest CPAN site as described in L<perlmod> for +source code to MLDBM. + =head1 AUTHOR Tom Christiansen diff --git a/pod/perltoc.pod b/pod/perltoc.pod index cf5ba8cbb7..0f8de0c88a 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -2793,7 +2793,7 @@ have man pages yet: =head1 AUTHOR -Larry Wall E<lt><F<lwall@netlabs.com>E<gt>, with the help of oodles of +Larry Wall E<lt><F<lwall@sems.com>E<gt>, with the help of oodles of other folks. diff --git a/pod/perltrap.pod b/pod/perltrap.pod index 79e3ae5ca9..dd219c064b 100644 --- a/pod/perltrap.pod +++ b/pod/perltrap.pod @@ -324,6 +324,12 @@ Using local() actually gives a local value to a global variable, which leaves you open to unforeseen side-effects of dynamic scoping. +=item * + +If you localize an exported variable in a module, its exported value will +not change. The local name becomes an alias to a new value but the +external name is still an alias for the original. + =back =head2 Perl4 Traps @@ -486,6 +492,27 @@ works fine, however. =item * +The meaning of foreach has changed slightly when it is iterating over a +list which is not an array. This used to assign the list to a +temporary array, but no longer does so (for efficiency). This means +that you'll now be iterating over the actual values, not over copies of +the values. Modifications to the loop variable can change the original +values. To retain Perl 4 semantics you need to assign your list +explicitly to a temporary array and then iterate over that. For +example, you might need to change + + foreach $var (grep /x/, @list) { ... } + +to + + foreach $var (my @tmp = grep /x/, @list) { ... } + +Otherwise changing C<$var> will clobber the values of @list. (This most often +happens when you use C<$_> for the loop variable, and call subroutines in +the loop that don't properly localize C<$_>.) + +=item * + Some error messages will be different. =item * diff --git a/pod/perlxs.pod b/pod/perlxs.pod index b663dcfa2d..0c376047ba 100644 --- a/pod/perlxs.pod +++ b/pod/perlxs.pod @@ -129,6 +129,16 @@ separate lines. double x sin(x) double x +The function body may be indented or left-adjusted. The following example +shows a function with its body left-adjusted. Most examples in this +document will indent the body. + + CORRECT + + double + sin(x) + double x + =head2 The Argument Stack The argument stack is used to store the values which are @@ -278,10 +288,20 @@ The XSUB follows. timep RETVAL -In many of the examples shown here the CODE: block (and -other blocks) will often be contained within braces ( C<{> and -C<}> ). This protects the CODE: block from complex INPUT -typemaps and ensures the resulting C code is legal. +=head2 The INIT: Keyword + +The INIT: keyword allows initialization to be inserted into the XSUB before +the compiler generates the call to the C function. Unlike the CODE: keyword +above, this keyword does not affect the way the compiler handles RETVAL. + + bool_t + rpcb_gettime(host,timep) + char *host + time_t &timep + INIT: + printf("# Host is %s\n", host ); + OUTPUT: + timep =head2 The NO_INIT Keyword @@ -362,6 +382,86 @@ the parameters in the correct order for that function. timep RETVAL +=head2 The PREINIT: Keyword + +The PREINIT: keyword allows extra variables to be declared before the +typemaps are expanded. If a variable is declared in a CODE: block then that +variable will follow any typemap code. This may result in a C syntax +error. To force the variable to be declared before the typemap code, place +it into a PREINIT: block. The PREINIT: keyword may be used one or more +times within an XSUB. + +The following examples are equivalent, but if the code is using complex +typemaps then the first example is safer. + + bool_t + rpcb_gettime(timep) + time_t timep = NO_INIT + PREINIT: + char *host = "localhost"; + CODE: + RETVAL = rpcb_gettime( host, &timep ); + OUTPUT: + timep + RETVAL + +A correct, but error-prone example. + + bool_t + rpcb_gettime(timep) + time_t timep = NO_INIT + CODE: + char *host = "localhost"; + RETVAL = rpcb_gettime( host, &timep ); + OUTPUT: + timep + RETVAL + +=head2 The INPUT: Keyword + +The XSUB's parameters are usually evaluated immediately after entering the +XSUB. The INPUT: keyword can be used to force those parameters to be +evaluated a little later. The INPUT: keyword can be used multiple times +within an XSUB and can be used to list one or more input variables. This +keyword is used with the PREINIT: keyword. + +The following example shows how the input parameter C<timep> can be +evaluated late, after a PREINIT. + + bool_t + rpcb_gettime(host,timep) + char *host + PREINIT: + time_t tt; + INPUT: + time_t timep + CODE: + RETVAL = rpcb_gettime( host, &tt ); + timep = tt; + OUTPUT: + timep + RETVAL + +The next example shows each input parameter evaluated late. + + bool_t + rpcb_gettime(host,timep) + PREINIT: + time_t tt; + INPUT: + char *host + PREINIT: + char *h; + INPUT: + time_t timep + CODE: + h = host; + RETVAL = rpcb_gettime( h, &tt ); + timep = tt; + OUTPUT: + timep + RETVAL + =head2 Variable-length Parameter Lists XSUBs can have variable-length parameter lists by specifying an ellipsis @@ -385,14 +485,12 @@ The XS code, with ellipsis, follows. bool_t rpcb_gettime(timep, ...) time_t timep = NO_INIT - CODE: - { + PREINIT: char *host = "localhost"; - - if( items > 1 ) - host = (char *)SvPV(ST(1), na); - RETVAL = rpcb_gettime( host, &timep ); - } + CODE: + if( items > 1 ) + host = (char *)SvPV(ST(1), na); + RETVAL = rpcb_gettime( host, &timep ); OUTPUT: timep RETVAL @@ -414,15 +512,14 @@ Perl as a single list. void rpcb_gettime(host) char *host - PPCODE: - { + PREINIT: time_t timep; bool_t status; + PPCODE: status = rpcb_gettime( host, &timep ); EXTEND(sp, 2); PUSHs(sv_2mortal(newSViv(status))); PUSHs(sv_2mortal(newSViv(timep))); - } Notice that the programmer must supply the C code necessary to have the real rpcb_gettime() function called and to have @@ -466,14 +563,13 @@ the default return value. void rpcb_gettime(host) char * host - CODE: - { + PREINIT: time_t timep; bool_t x; + CODE: ST(0) = sv_newmortal(); if( rpcb_gettime( host, &timep ) ) sv_setnv( ST(0), (double)timep); - } The next example demonstrates how one would place an explicit undef in the return value, should the need arise. @@ -481,10 +577,10 @@ return value, should the need arise. void rpcb_gettime(host) char * host - CODE: - { + PREINIT: time_t timep; bool_t x; + CODE: ST(0) = sv_newmortal(); if( rpcb_gettime( host, &timep ) ){ sv_setnv( ST(0), (double)timep); @@ -492,7 +588,6 @@ return value, should the need arise. else{ ST(0) = &sv_undef; } - } To return an empty list one must use a PPCODE: block and then not push return values on the stack. @@ -500,16 +595,15 @@ then not push return values on the stack. void rpcb_gettime(host) char *host - PPCODE: - { + PREINIT: time_t timep; + PPCODE: if( rpcb_gettime( host, &timep ) ) PUSHs(sv_2mortal(newSViv(timep))); else{ /* Nothing pushed on stack, so an empty */ /* list is implicitly returned. */ } - } =head2 The REQUIRE: Keyword @@ -545,6 +639,186 @@ terminate the code block. # bootstrap function executes. printf("Hello from the bootstrap!\n"); +=head2 The VERSIONCHECK: Keyword + +The VERSIONCHECK: keyword corresponds to B<xsubpp>'s C<-versioncheck> and +C<-noversioncheck> options. This keyword overrides the commandline +options. Version checking is enabled by default. When version checking is +enabled the XS module will attempt to verify that its version matches the +version of the PM module. + +To enable version checking: + + VERSIONCHECK: ENABLE + +To disable version checking: + + VERSIONCHECK: DISABLE + +=head2 The PROTOTYPES: Keyword + +The PROTOTYPES: keyword corresponds to B<xsubpp>'s C<-prototypes> and +C<-noprototypes> options. This keyword overrides the commandline options. +Prototypes are enabled by default. When prototypes are enabled XSUBs will +be given Perl prototypes. This keyword may be used multiple times in an XS +module to enable and disable prototypes for different parts of the module. + +To enable prototypes: + + PROTOTYPES: ENABLE + +To disable prototypes: + + PROTOTYPES: DISABLE + +=head2 The PROTOTYPE: Keyword + +This keyword is similar to the PROTOTYPES: keyword above but can be used to +force B<xsubpp> to use a specific prototype for the XSUB. This keyword +overrides all other prototype options and keywords but affects only the +current XSUB. Consult L<perlsub/Prototypes> for information about Perl +prototypes. + + bool_t + rpcb_gettime(timep, ...) + time_t timep = NO_INIT + PROTOTYPE: $;$ + PREINIT: + char *host = "localhost"; + CODE: + if( items > 1 ) + host = (char *)SvPV(ST(1), na); + RETVAL = rpcb_gettime( host, &timep ); + OUTPUT: + timep + RETVAL + +=head2 The ALIAS: Keyword + +The ALIAS: keyword allows an XSUB to have two more more unique Perl names +and to know which of those names was used when it was invoked. The Perl +names may be fully-qualified with package names. Each alias is given an +index. The compiler will setup a variable called C<ix> which contain the +index of the alias which was used. When the XSUB is called with its +declared name C<ix> will be 0. + +The following example will create aliases C<FOO::gettime()> and +C<BAR::getit()> for this function. + + bool_t + rpcb_gettime(host,timep) + char *host + time_t &timep + ALIAS: + FOO::gettime = 1 + BAR::getit = 2 + INIT: + printf("# ix = %d\n", ix ); + OUTPUT: + timep + +=head2 The INCLUDE: Keyword + +This keyword can be used to pull other files into the XS module. The other +files may have XS code. INCLUDE: can also be used to run a command to +generate the XS code to be pulled into the module. + +The file F<Rpcb1.xsh> contains our C<rpcb_gettime()> function: + + bool_t + rpcb_gettime(host,timep) + char *host + time_t &timep + OUTPUT: + timep + +The XS module can use INCLUDE: to pull that file into it. + + INCLUDE: Rpcb1.xsh + +If the parameters to the INCLUDE: keyword are followed by a pipe (C<|>) then +the compiler will interpret the parameters as a command. + + INCLUDE: cat Rpcb1.xsh | + +=head2 The CASE: Keyword + +The CASE: keyword allows an XSUB to have multiple distinct parts with each +part acting as a virtual XSUB. CASE: is greedy and if it is used then all +other XS keywords must be contained within a CASE:. This means nothing may +precede the first CASE: in the XSUB and anything following the last CASE: is +included in that case. + +A CASE: might switch via a parameter of the XSUB, via the C<ix> ALIAS: +variable (see L<"The ALIAS: Keyword">), or maybe via the C<items> variable +(see L<"Variable-length Parameter Lists">). The last CASE: becomes the +B<default> case if it is not associated with a conditional. The following +example shows CASE switched via C<ix> with a function C<rpcb_gettime()> +having an alias C<x_gettime()>. When the function is called as +C<rpcb_gettime()> it's parameters are the usual C<(char *host, time_t +*timep)>, but when the function is called as C<x_gettime()> is parameters are +reversed, C<(time_t *timep, char *host)>. + + long + rpcb_gettime(a,b) + CASE: ix == 1 + ALIAS: + x_gettime = 1 + INPUT: + # 'a' is timep, 'b' is host + char *b + time_t a = NO_INIT + CODE: + RETVAL = rpcb_gettime( b, &a ); + OUTPUT: + a + RETVAL + CASE: + # 'a' is host, 'b' is timep + char *a + time_t &b = NO_INIT + OUTPUT: + b + RETVAL + +That function can be called with either of the following statements. Note +the different argument lists. + + $status = rpcb_gettime( $host, $timep ); + + $status = x_gettime( $timep, $host ); + +=head2 The & Unary Operator + +The & unary operator is used to tell the compiler that it should dereference +the object when it calls the C function. This is used when a CODE: block is +not used and the object is a not a pointer type (the object is an C<int> or +C<long> but not a C<int*> or C<long*>). + +The following XSUB will generate incorrect C code. The xsubpp compiler will +turn this into code which calls C<rpcb_gettime()> with parameters C<(char +*host, time_t timep)>, but the real C<rpcb_gettime()> wants the C<timep> +parameter to be of type C<time_t*> rather than C<time_t>. + + bool_t + rpcb_gettime(host,timep) + char *host + time_t timep + OUTPUT: + timep + +That problem is corrected by using the C<&> operator. The xsubpp compiler +will now turn this into code which calls C<rpcb_gettime()> correctly with +parameters C<(char *host, time_t *timep)>. It does this by carrying the +C<&> through, so the function call looks like C<rpcb_gettime(host, &timep)>. + + bool_t + rpcb_gettime(host,timep) + char *host + time_t &timep + OUTPUT: + timep + =head2 Inserting Comments and C Preprocessor Directives Comments and C preprocessor directives are allowed within @@ -635,7 +909,7 @@ example. # char* having the name of the package for the blessing. O_OBJECT sv_setref_pv( $arg, CLASS, (void*)$var ); - + INPUT O_OBJECT if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) @@ -787,13 +1061,12 @@ File C<RPC.xs>: Interface to some ONC+ RPC bind library functions. void rpcb_gettime(host="localhost") char *host - CODE: - { + PREINIT: time_t timep; + CODE: ST(0) = sv_newmortal(); if( rpcb_gettime( host, &timep ) ) sv_setnv( ST(0), (double)timep ); - } Netconfig * getnetconfigent(netid="udp") @@ -840,7 +1113,11 @@ File C<rpctest.pl>: Perl test program for the RPC extension. print "netconf = $netconf\n"; +=head1 XS VERSION + +This document covers features supported by C<xsubpp> 1.931. + =head1 AUTHOR Dean Roehrich F<E<lt>roehrich@cray.comE<gt>> -Dec 10, 1995 +Jan 25, 1996 diff --git a/pod/perlxstut.pod b/pod/perlxstut.pod index 082e2cd02d..16601a0c29 100644 --- a/pod/perlxstut.pod +++ b/pod/perlxstut.pod @@ -5,28 +5,81 @@ perlXStut - Tutorial for XSUB's =head1 DESCRIPTION This tutorial will educate the reader on the steps involved in creating -a Perl 5 extension. The reader is assumed to have access to L<perlguts> and +a Perl extension. The reader is assumed to have access to L<perlguts> and L<perlxs>. This tutorial starts with very simple examples and becomes more complex, -bringing in more features that are available. Thus, certain statements -towards the beginning may be incomplete. The reader is encouraged to -read the entire document before lambasting the author about apparent -mistakes. +with each new example adding new features. Certain concepts may not be +completely explained until later in the tutorial in order to slowly ease +the reader into building extensions. -This tutorial is still under construction. Constructive comments -are welcome. +=head2 VERSION CAVEAT -=head1 EXAMPLE 1 +This tutorial tries hard to keep up with the latest development versions +of Perl. This often means that it is sometimes in advance of the latest +released version of Perl, and that certain features described here might +not work on earlier versions. This section will keep track of when various +features were added to Perl 5. + +=over 4 + +=item * + +In version 5.002 before version 5.002b1h, the test.pl file was not +automatically created by xsubpp. This means that you cannot say "make test" +to run the test script. You will need to add the following line before the +"use extension" statement: + + use lib './blib'; + +=item * + +In versions 5.000 and 5.001, instead of using the above line, you will need +to use the following line: + + BEGIN { unshift(@INC, "./blib") } + +=item * + +This document assumes that the executable named "perl" is Perl version 5. +Some systems may have installed Perl version 5 as "perl5". + +=back + +=head2 DYNAMIC VERSUS STATIC + +It is commonly thought that if a system does not have the capability to +dynamically load a library, you cannot build XSUB's. This is incorrect. +You I<can> build them, but you must link the XSUB's subroutines with the +rest of Perl, creating a new executable. This situation is similar to +Perl 4. + +This tutorial can still be used on such a system. The XSUB build mechanism +will check the system and build a dynamically-loadable library if possible, +or else a static library and then, optionally, a new statically-linked +executable with that static library linked in. + +Should you wish to build a statically-linked executable on a system which +can dynamically load libraries, you may, in all the following examples, +where the command "make" with no arguments is executed, run the command +"make perl" instead. + +If you have generated such a statically-linked executable by choice, then +instead of saying "make test", you should say "make test_static". On systems +that cannot build dynamically-loadable libraries at all, simply saying "make +test" is sufficient. + +=head2 EXAMPLE 1 Our first extension will be very simple. When we call the routine in the -extension, it will print out a well-known message and terminate. +extension, it will print out a well-known message and return. -Run C<h2xs -A -n Test1>. This creates a directory named Test1, possibly under -ext/ if it exists in the current working directory. Four files will be -created in the Test1 dir: MANIFEST, Makefile.PL, Test1.pm, Test1.xs. +Run "h2xs -A -n mytest". This creates a directory named mytest, possibly under +ext/ if that directory exists in the current working directory. Several files +will be created in the mytest dir, including MANIFEST, Makefile.PL, mytest.pm, +mytest.xs, test.pl, and Changes. -The MANIFEST file should contain the names of the four files created. +The MANIFEST file contains the names of all the files created. The file Makefile.PL should look something like this: @@ -34,142 +87,162 @@ The file Makefile.PL should look something like this: # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( - 'NAME' => 'Test1', - 'VERSION' => '0.1', + 'NAME' => 'mytest', + 'VERSION_FROM' => 'mytest.pm', # finds $VERSION 'LIBS' => [''], # e.g., '-lm' 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' 'INC' => '', # e.g., '-I/usr/include/other' ); -The file Test1.pm should look something like this: +The file mytest.pm should start with something like this: + + package mytest; - package Test1; - require Exporter; require DynaLoader; - + @ISA = qw(Exporter DynaLoader); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. @EXPORT = qw( - + ); - bootstrap Test1; - + $VERSION = '0.01'; + + bootstrap mytest $VERSION; + # Preloaded methods go here. - + # Autoload methods go after __END__, and are processed by the autosplit program. - + 1; __END__ + # Below is the stub of documentation for your module. You better edit it! -And the Test1.xs file should look something like this: +And the mytest.xs file should look something like this: + #ifdef __cplusplus + extern "C" { + #endif #include "EXTERN.h" #include "perl.h" #include "XSUB.h" + #ifdef __cplusplus + } + #endif - MODULE = Test1 PACKAGE = Test1 + MODULE = mytest PACKAGE = mytest Let's edit the .xs file by adding this to the end of the file: void hello() - CODE: printf("Hello, world!\n"); -Now we'll run C<perl Makefile.PL>. This will create a real Makefile, +Now we'll run "perl Makefile.PL". This will create a real Makefile, which make needs. It's output looks something like: % perl Makefile.PL Checking if your kit is complete... Looks good - Writing Makefile for Test1 + Writing Makefile for mytest % -Now, running make will produce output that looks something like this: +Now, running make will produce output that looks something like this +(some long lines shortened for clarity): % make - mkdir ./blib - mkdir ./blib/auto - mkdir ./blib/auto/Test1 - perl xsubpp -typemap typemap Test1.xs >Test1.tc && mv Test1.tc Test1.c - cc -c Test1.c - Running Mkbootstrap for Test1 () - chmod 644 Test1.bs - LD_RUN_PATH="" ld -o ./blib/auto/Test1/Test1.sl -b Test1.o - chmod 755 ./blib/auto/Test1/Test1.sl - cp Test1.bs ./blib/auto/Test1/Test1.bs - chmod 644 ./blib/auto/Test1/Test1.bs - cp Test1.pm ./blib/Test1.pm - chmod 644 ./blib/Test1.pm - -Now we'll create a test script, test1.pl in the Test1 directory. It should + umask 0 && cp mytest.pm ./blib/mytest.pm + perl xsubpp -typemap typemap mytest.xs >mytest.tc && mv mytest.tc mytest.c + cc -c mytest.c + Running Mkbootstrap for mytest () + chmod 644 mytest.bs + LD_RUN_PATH="" ld -o ./blib/PA-RISC1.1/auto/mytest/mytest.sl -b mytest.o + chmod 755 ./blib/PA-RISC1.1/auto/mytest/mytest.sl + cp mytest.bs ./blib/PA-RISC1.1/auto/mytest/mytest.bs + chmod 644 ./blib/PA-RISC1.1/auto/mytest/mytest.bs + +Now, although there is already a test.pl template ready for us, for this +example only, we'll create a special test script. Create a file called hello +that looks like this: + +Now we'll create a test script, test1.pl in the mytest directory. It should look like this: - #! /usr/local/bin/perl + #! /opt/perl5/bin/perl - BEGIN { unshift(@INC, "./blib") } + use lib './blib'; - use Test1; + use mytest; - Test1::hello(); + mytest::hello(); Now we run the script and we should see the following output: - % perl test1.pl + % perl hello Hello, world! % -=head1 EXAMPLE 2 +=head2 EXAMPLE 2 -Now let's create a simple extension that will take a single argument and -return 0 if the argument is even, 1 if the argument is odd. +Now let's add to our extension a subroutine that will take a single argument +and return 0 if the argument is even, 1 if the argument is odd. -Run C<h2xs -A -n Test2>. This will create a Test2 directory with a file -Test2.xs underneath it. Add the following to the end of the XS file: +Add the following to the end of mytest.xs: int is_even(input) int input - CODE: - RETVAL = input % 2; - + RETVAL = (input % 2 == 0); OUTPUT: RETVAL -(Note that the line after the declaration of is_even is indented one tab -stop. Although there is a tab between "int" and "input", this can be any -amount of white space. Also notice that there is no semi-colon following -the "declaration" of the variable input) +There must be some white space at the start of the "int input" line, and +there must not be a semi-colon at the end of the line (as you'd expect in +a C program). -Now perform the same steps before, generating a Makefile from the -Makefile.PL file, and running make. - -Our test file test2.pl will now look like: +Any white space may be between the "int" and "input". It is also okay for +the four lines starting at the "CODE:" line to not be indented. However, +for readability purposes, it is suggested that you indent them 8 spaces +(or one normal tab stop). - BEGIN { unshift(@INC, "./blib"); } - - use Test2; - - $a = &Test2::is_even(2); - $b = &Test2::is_even(3); - - print "\$a is $a, \$b is $b\n"; +Now re-run make to rebuild our new shared library. -The output should look like: +Now perform the same steps as before, generating a Makefile from the +Makefile.PL file, and running make. - % perl test2.pl - $a is 0, $b is 1 +In order to test that our extension works, we now need to look at the +file test.pl. This file is set up to imitate the same kind of testing +structure that Perl itself has. Within the test script, you perform a +number of tests to confirm the behavior of the extension, printing "ok" +when the test is correct, "not ok" when it is not. + +Let's change the print statement in the BEGIN block to print "1..4" and +add the following code to the end of the file: + + print &mytest::is_even(0) == 1 ? "ok 2" : "not ok 2", "\n"; + print &mytest::is_even(1) == 0 ? "ok 3" : "not ok 3", "\n"; + print &mytest::is_even(2) == 1 ? "ok 4" : "not ok 4", "\n"; + +We will be calling the test script through the command "make test". You +should see output that looks something like this: + + % make test + PERL_DL_NONLAZY=1 /opt/perl5.002b2/bin/perl (lots of -I arguments) test.pl + 1..4 + ok 1 + ok 2 + ok 3 + ok 4 % -=head1 WHAT HAS GONE ON? +=head2 WHAT HAS GONE ON? The program h2xs is the starting point for creating extensions. In later -examples, we'll see how we can use h2xs to read header files and generate +examples we'll see how we can use h2xs to read header files and generate templates to connect to C routines. h2xs creates a number of files in the extension directory. The file @@ -178,11 +251,25 @@ the extension. We'll take a closer look at it later. The files <extension>.pm and <extension>.xs contain the meat of the extension. The .xs file holds the C routines that make up the extension. The .pm file -contains routines that tells Perl how to load your extension. +contains routines that tell Perl how to load your extension. -Generating the invoking the Makefile created a directory blib in the current -working directory. This directory will contain the shared library that we -will build. Once we have tested it, we can install it into its final location. +Generating and invoking the Makefile created a directory blib (which stands +for "build library") in the current working directory. This directory will +contain the shared library that we will build. Once we have tested it, we +can install it into its final location. + +Invoking the test script via "make test" did something very important. It +invoked perl with all those -I arguments so that it could find the various +files that are part of the extension. + +It is I<very> important that while you are still testing extensions that +you use "make test". If you try to run the test script all by itself, you +will get a fatal error. + +Another reason it is important to use "make test" to run your test script +is that if you are testing an upgrade to an already-existing version, using +"make test" insures that you use your new extension, not the already-existing +version. Finally, our test scripts do two important things. First of all, they place the directory "blib" at the head of the @INC array. Placing this inside a @@ -191,58 +278,45 @@ before looking in the system directories. This could be important if you are upgrading an already-existing extension and do not want to disturb the system version until you are ready to install it. -Second, the test scripts tell Perl to C<use extension;>. When Perl sees this, -it searches for a .pm file of the same name in the various directories kept -in the @INC array. If it cannot be found, perl will die with an error that -will look something like: +When Perl sees a C<use extension;>, it searches for a file with the same name +as the use'd extension that has a .pm suffix. If that file cannot be found, +Perl dies with a fatal error. The default search path is contained in the +@INC array. - Can't locate Test2.pm in @INC at ./test2.pl line 5. - BEGIN failed--compilation aborted at ./test2.pl line 5. +In our case, mytest.pm tells perl that it will need the Exporter and Dynamic +Loader extensions. It then sets the @ISA and @EXPORT arrays and the $VERSION +scalar; finally it tells perl to bootstrap the module. Perl will call its +dynamic loader routine (if there is one) and load the shared library. -The .pm file tells perl that it will need the Exporter and Dynamic Loader -extensions. It then sets the @ISA array, which is used for looking up -methods that might not exist in the current package, and finally tells perl -to bootstrap the module. Perl will call its dynamic loader routine and load -the shared library. +The two arrays that are set in the .pm file are very important. The @ISA +array contains a list of other packages in which to search for methods (or +subroutines) that do not exist in the current package. The @EXPORT array +tells Perl which of the extension's routines should be placed into the +calling package's namespace. -The @EXPORT array in the .pm file tells Perl which of the extension's -routines should be placed into the calling package's namespace. In our two -examples so far, we have not modified the @EXPORT array, so our test -scripts must call the routines by their complete name (e.g., Test1::hello). -If we placed the name of the routine in the @EXPORT array, so that the -.pm file looked like: +It's important to select what to export carefully. Do NOT export method names +and do NOT export anything else I<by default> without a good reason. - @EXPORT = qw( hello ); - -Then the hello routine would also be callable from the "main" package. -We could therefore change test1.pl to look like: - - #! /usr/local/bin/perl - - BEGIN { unshift(@INC, "./blib") } - - use Test1; - - hello(); +As a general rule, if the module is trying to be object-oriented then don't +export anything. If it's just a collection of functions then you can export +any of the functions via another array, called @EXPORT_OK. -And we would get the same output, "Hello, world!". +See L<perlmod> for more information. -Most of the time you do not want to export the names of your extension's -subroutines, because they might accidentally clash with other subroutines -from other extensions or from the calling program itself. +The $VERSION variable is used to ensure that the .pm file and the shared +library are "in sync" with each other. Any time you make changes to the +.pm or .xs files, you should increment the value of this variable. -=head1 EXAMPLE 3 +=head2 EXAMPLE 3 Our third extension will take one argument as its input, round off that -value, and set the argument to the rounded value. +value, and set the I<argument> to the rounded value. -Run C<h2xs -A -n Test3>. This will create a Test3 directory with a file -Test3.xs underneath it. Add the following to the end of the XS file: +Add the following to the end of mytest.xs: void round(arg) double arg - CODE: if (arg > 0.0) { arg = floor(arg + 0.5); @@ -254,36 +328,30 @@ Test3.xs underneath it. Add the following to the end of the XS file: OUTPUT: arg -Edit the file Makefile.PL so that the corresponding line looks like this: +Edit the Makefile.PL file so that the corresponding line looks like this: 'LIBS' => ['-lm'], # e.g., '-lm' -Generate the Makefile and run make. The test script test3.pl looks like: +Generate the Makefile and run make. Change the BEGIN block to print out +"1..9" and add the following to test.pl: - #! /usr/local/bin/perl - - BEGIN { unshift(@INC, "./blib"); } - - use Test3; - - foreach $i (-1.4, -0.5, 0.0, 0.4, 0.5) { - $j = $i; - &Test3::round($j); - print "Rounding $i results in $j\n"; - } - - print STDERR "Trying to round a constant -- "; - &Test3::round(2.0); + $i = -1.5; &mytest::round($i); print $i == -2.0 ? "ok 5" : "not ok 5", "\n"; + $i = -1.1; &mytest::round($i); print $i == -1.0 ? "ok 6" : "not ok 6", "\n"; + $i = 0.0; &mytest::round($i); print $i == 0.0 ? "ok 7" : "not ok 7", "\n"; + $i = 0.5; &mytest::round($i); print $i == 1.0 ? "ok 8" : "not ok 8", "\n"; + $i = 1.2; &mytest::round($i); print $i == 1.0 ? "ok 9" : "not ok 9", "\n"; + +Running "make test" should now print out that all nine tests are okay. -Notice the output from trying to send a constant in to the routine. Perl -reports: +You might be wondering if you can round a constant. To see what happens, add +the following line to test.pl temporarily: - Modification of a read-only value attempted at ./test3.pl line 15. + &mytest::round(3); -Perl won't let you change the value of two to, say, three, unlike a FORTRAN -compiler from long, long ago! +Run "make test" and notice that Perl dies with a fatal error. Perl won't let +you change the value of constants! -=head1 WHAT'S NEW HERE? +=head2 WHAT'S NEW HERE? Two things are new here. First, we've made some changes to Makefile.PL. In this case, we've specified an extra library to link in, in this case the @@ -293,7 +361,7 @@ every routine in a library. Second, the value of the function is being passed back not as the function's return value, but through the same variable that was passed into the function. -=head1 INPUT AND OUTPUT PARAMETERS +=head2 INPUT AND OUTPUT PARAMETERS You specify the parameters that will be passed into the XSUB just after you declare the function return value and name. The list of parameters looks @@ -302,17 +370,17 @@ may not have an ending semi-colon. The list of output parameters occurs after the OUTPUT: directive. The use of RETVAL tells Perl that you wish to send this value back as the return -value of the XSUB function. Otherwise, you specify which variables used -in the XSUB function should be placed into the respective Perl variables -passed in. +value of the XSUB function. In Example 3, the value we wanted returned was +contained in the same variable we passed in, so we listed it (and not RETVAL) +in the OUTPUT: section. -=head1 THE XSUBPP COMPILER +=head2 THE XSUBPP COMPILER The compiler xsubpp takes the XS code in the .xs file and converts it into C code, placing it in a file whose suffix is .c. The C code created makes heavy use of the C functions within Perl. -=head1 THE TYPEMAP FILE +=head2 THE TYPEMAP FILE The xsubpp compiler uses rules to convert from Perl's data types (scalar, array, etc.) to C's data types (int, char *, etc.). These rules are stored @@ -325,50 +393,26 @@ C code which xsubpp uses for input parameters. The third part contains C code which xsubpp uses for output parameters. We'll talk more about the C code later. -Let's now take a look at the .c file created for the Test3 extension. +Let's now take a look at a portion of the .c file created for our extension. - /* - * This file was generated automatically by xsubpp version 1.9 from the - * contents of Test3.xs. Don't edit this file, edit Test3.xs instead. - * - * ANY CHANGES MADE HERE WILL BE LOST! - * - */ - - #include "EXTERN.h" - #include "perl.h" - #include "XSUB.h" - - - XS(XS_Test3_round) + XS(XS_mytest_round) { dXSARGS; - if (items != 1) { - croak("Usage: Test3::round(arg)"); - } + if (items != 1) + croak("Usage: mytest::round(arg)"); { - double arg = (double)SvNV(ST(0)); /* XXXXX */ - + double arg = (double)SvNV(ST(0)); /* XXXXX */ if (arg > 0.0) { arg = floor(arg + 0.5); } else if (arg < 0.0) { arg = ceil(arg - 0.5); + } else { + arg = 0.0; } - - sv_setnv(ST(0), (double)arg); /* XXXXX */ + sv_setnv(ST(0), (double)arg); /* XXXXX */ } XSRETURN(1); } - - XS(boot_Test3) - { - dXSARGS; - char* file = __FILE__; - - newXS("Test3::round", XS_Test3_round, file); - ST(0) = &sv_yes; - XSRETURN(1); - } Notice the two lines marked with "XXXXX". If you check the first section of the typemap file, you'll see that doubles are of type T_DOUBLE. In the @@ -377,153 +421,112 @@ arg by calling the routine SvNV on something, then casting it to double, then assigned to the variable arg. Similarly, in the OUTPUT section, once arg has its final value, it is passed to the sv_setnv function to be passed back to the calling subroutine. These two functions are explained -in perlguts; we'll talk more later about what that "ST(0)" means in the +in L<perlguts>; we'll talk more later about what that "ST(0)" means in the section on the argument stack. -=head1 WARNING +=head2 WARNING -In general, it's not agood idea to write extensions that modify their input +In general, it's not a good idea to write extensions that modify their input parameters, as in Example 3. However, in order to better accomodate calling pre-existing C routines, which often do modify their input parameters, -this behavior is tolerated. The next example will show to do this. - -=head1 EXAMPLE 4 +this behavior is tolerated. The next example will show how to do this. -We'll now show how we can call routines in libraries, such as the curses -screen handling package, or a DBM module like GDBM. Each of these libraries -has a header file from which we will generate an XS template that we'll then -fine-tune. +[Examples 4 and 5 have not been re-worked yet and are not included.] -Rather than attempt to find a library that exists on all systems, we'll -first create our own C library, then create an XSUB to it. +=head2 SPECIFYING ARGUMENTS TO XSUBPP -Let's create the files libtest4.h and libtest4.c as follows: +After completing Example 5, we now have an easy way to simulate some +real-life libraries whose interfaces may not be the cleanest in the world. +We shall now continue with a discussion of the arguments passed to the +xsubpp compiler. - /* libtest4.h */ +When you specify arguments in the .xs file, you are really passing three +pieces of information for each one listed. The first piece is the order +of that argument relative to the others (first, second, etc). The second +is the type of argument, and consists of the type declaration of the +argument (e.g., int, char*, etc). The third piece is the exact way in +which the argument should be used in the call to the library function +from this XSUB. This would mean whether or not to place a "&" before +the argument or not, meaning the argument expects to be passed the address +of the specified data type. - #define TESTVAL 4 +There is a difference between the two arguments in this hypothetical function: - extern int test4(int, long, const char*); - - /* libtest4.c */ - - #include <stdlib.h> - #include "./libtest4.h" - int - test4(a, b, c) - int a; - long b; - const char * c; - { - return (a + b + atof(c) + TESTVAL); - } + foo(a,b) + char &a + char * b -Now let's compile it into a library. Since we'll be eventually using this -archive to create a shared library, be sure to use the correct flags to -generate position-independent code. In HP-UX, that's: +The first argument to this function would be treated as a char and assigned +to the variable a, and its address would be passed into the function foo. +The second argument would be treated as a string pointer and assigned to the +variable b. The I<value> of b would be passed into the function foo. The +actual call to the function foo that xsubpp generates would look like this: - % cc -Aa -D_HPUX_SOURCE -c +z libtest4.c - % ar cr libtest4.a libtest4.o + foo(&a, b); -Now let's move the libtest4.h and libtest.a files into a sub-directory under -/tmp, so we don't interfere with anything. - - % mkdir /tmp/test4 - % mkdir /tmp/test4/include - % mkdir /tmp/test4/lib - % cp libtest4.h /tmp/test4/include - % cp libtest4.a /tmp/test4/lib - -Okay, now that we have a header file and a library, let's begin actually -writing the extension. - -Run C<h2xs -n Test4 /tmp/test4/include/libtest4.h> (notice we are no longer -specifying B<-A> as an argument). This will create a Test4 directory with a file -F<Test4.xs> underneath it. If we look at it now, we'll see some interesting -things have been added to the various files. - -=over 2 - -=item * - -In the .xs file, there's now a #include declaration with the full path to -the libtest4.h header file. - -=item * - -There's now some new C code that's been added to the .xs file. The purpose -of the C<constant> routine is to make the values that are #define'd in the -header file available to the Perl script by calling C<&main::TESTVAL>. -There's also some XS code to allow calls to the C<constant> routine. - -=item * - -The .pm file has exported the name TESTVAL in the @EXPORT array. This -could lead to name clashes. A good rule of thumb is that if the #define -is only going to be used by the C routines themselves, and not by the user, -they should be removed from the @EXPORT array. Alternately, if you don't -mind using the "fully qualified name" of a variable, you could remove most -or all of the items in the @EXPORT array. - -=back - -Let's now add a definition for the routine in our library. Add the following -code to the end of the .xs file: - - int - test4(a,b,c) - int a - long b - const char * c +In other words, whatever is in the last column (or the variable name) is +what is passed into the C function. -Now we also need to create a typemap file because the default Perl doesn't -currently support the const char * type. Create a file called typemap and -place the following in it: +You should take great pains to try to pass the function the type of variable +it wants, when possible. It will save you a lot of trouble in the long run. - const char * T_PV +=head2 THE ARGUMENT STACK -Now we must tell our Makefile template where our new library is. Edit the -Makefile.PL and change the following line: +If we look at any of the C code generated by any of the examples except +example 1, you will notice a number of references to ST(n), where n is +usually 0. The "ST" is actually a macro that points to the n'th argument +on the argument stack. ST(0) is thus the first argument passed to the +XSUB, ST(1) is the second argument, and so on. - 'LIBS' => ['-ltest4 -L/tmp/test4'], # e.g., '-lm' +When you list the arguments to the XSUB in the .xs file, that tell xsubpp +which argument corresponds to which of the argument stack (i.e., the first +one listed is the first argument, and so on). You invite disaster if you +do not list them in the same order as the function expects them. -This specifies that we want the library test4 linked into our XSUB, and that -it should also look in the directory /tmp/test4. +=head2 EXTENDING YOUR EXTENSION -Let's also change the following line in the Makefile.PL to this: +Sometimes you might want to provide some extra methods or subroutines +to assist in making the interface between Perl and your extension simpler +or easier to understand. These routines should live in the .pm file. +Whether they are automatically loaded when the extension itself is loaded +or only loaded when called depends on where in the .pm file the subroutine +definition is placed. - 'INC' => '-I/tmp/test/include', # e.g., '-I/usr/include/other' +=head2 DOCUMENTING YOUR EXTENSION -and also change the #include in test4.xs to be: +There is absolutely no excuse for not documenting your extension. +Documentation belongs in the .pm file. This file will be fed to pod2man, +and the documentation embedded within it converted to man page format, +then placed in the blib directory. It will be copied to Perl's man +page directory when the extension is installed. - #include <libtest4.h> +You may intersperse documentation and Perl code within the .pm file. +In fact, if you want to use method autoloading, you must do this, +as the comment inside the .pm file explains. -Now we don't have to specify the absolute path of the header file in the -.xs file, relying on the Makefile to tell the compiler where to find the -header files. This is generally considered a Good Thing. +See L<perlpod> for more information about the pod format. -Okay, let's create the Makefile, and run make. You can ignore a message that -may look like: +=head2 INSTALLING YOUR EXTENSION - Warning (non-fatal): No library found for -ltest4 +Once your extension is complete and passes all its tests, installing it +is quite simple: you simply run "make install". You will either need +to have write permission into the directories where Perl is installed, +or ask your system administrator to run the make for you. -If you forgot to create the typemap file, you might see output that looks -like this: +=head2 SEE ALSO - Error: 'const char *' not in typemap in test4.xs, line 102 +For more information, consult L<perlguts>, L<perlxs>, L<perlmod>, +and L<perlpod>. -This error means that you have used a C datatype that xsubpp doesn't know -how to convert between Perl and C. You'll have to create a typemap file to -tell xsubpp how to do the conversions. +=head2 Author -=head1 Author +Jeff Okamoto <okamoto@corp.hp.com> -Jeff Okamoto +Reviewed and assisted by Dean Roehrich, Ilya Zakharevich, Andreas Koenig, +and Tim Bunce. -=head1 Last Changed +=head2 Last Changed -1995/11/20 +1996/1/19 -Jeff Okamoto -F<E<lt>okamoto@hpcc123.corp.hp.comE<gt>> diff --git a/pod/pod2text.PL b/pod/pod2text.PL index caa6ec4b51..1402f0dbe9 100644 --- a/pod/pod2text.PL +++ b/pod/pod2text.PL @@ -1,359 +1,50 @@ #!/usr/local/bin/perl -$SCREEN = ($ARGV[0] =~ /^-(\d+)/ && (shift, $1)) - || ($ENV{TERMCAP} =~ /co#(\d+)/)[0] - || $ENV{COLUMNS} - || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0] - || 72; +use Config; +use File::Basename qw(&basename &dirname); -$/ = ""; +# List explicitly here the variables you want Configure to +# generate. Metaconfig only looks for shell variables, so you +# have to mention them as if they were shell variables, not +# %Config entries. Thus you write +# $startperl +# to ensure Configure will look for $Config{startperl}. -$FANCY = 0; +# This forces PL files to create target in same directory as PL file. +# This is so that make depend always knows where to find PL derivatives. +chdir(dirname($0)); +($file = basename($0)) =~ s/\.PL$//; +$file =~ s/\.pl$// + if ($Config{'osname'} eq 'VMS' or + $Config{'osname'} eq 'OS2'); # "case-forgiving" -$cutting = 1; -$DEF_INDENT = 4; -$indent = $DEF_INDENT; -$needspace = 0; +open OUT,">$file" or die "Can't create $file: $!"; -POD_DIRECTIVE: while (<>) { - if ($cutting) { - next unless /^=/; - $cutting = 0; - } - 1 while s{^(.*?)(\t+)(.*)$}{ - $1 - . (' ' x (length($2) * 8 - length($1) % 8)) - . $3 - }me; - # Translate verbatim paragraph - if (/^\s/) { - $needspace = 1; - output($_); - next; - } +print "Extracting $file (with variable substitutions)\n"; -sub prepare_for_output { +# In this section, perl variables will be expanded during extraction. +# You can use $Config{...} to use Configure variables. - s/\s*$/\n/; - &init_noremap; +print OUT <<"!GROK!THIS!"; +$Config{'startperl'} + eval 'exec perl -S \$0 "\$@"' + if 0; +!GROK!THIS! - # need to hide E<> first; they're processed in clear_noremap - s/(E<[^<>]+>)/noremap($1)/ge; - $maxnest = 10; - while ($maxnest-- && /[A-Z]</) { - unless ($FANCY) { - s/C<(.*?)>/`$1'/g; - } else { - s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/ge; - } - # s/[IF]<(.*?)>/italic($1)/ge; - s/I<(.*?)>/*$1*/g; - # s/[CB]<(.*?)>/bold($1)/ge; - s/X<.*?>//g; - # LREF: a manpage(3f) - s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g; - # LREF: an =item on another manpage - s{ - L< - ([^/]+) - / - ( - [:\w]+ - (\(\))? - ) - > - } {the "$2" entry in the $1 manpage}gx; +# In the following, perl variables are not expanded during extraction. - # LREF: an =item on this manpage - s{ - ((?: - L< - / - ( - [:\w]+ - (\(\))? - ) - > - (,?\s+(and\s+)?)? - )+) - } { internal_lrefs($1) }gex; +print OUT <<'!NO!SUBS!'; - # LREF: a =head2 (head1?), maybe on a manpage, maybe right here - # the "func" can disambiguate - s{ - L< - (?: - ([a-zA-Z]\S+?) / - )? - "?(.*?)"? - > - }{ - do { - $1 # if no $1, assume it means on this page. - ? "the section on \"$2\" in the $1 manpage" - : "the section on \"$2\"" - } - }gex; +use Pod::Text; - s/[A-Z]<(.*?)>/$1/g; - } - clear_noremap(1); +if(@ARGV) { + pod2text($ARGV[0]); +} else { + pod2text("<&STDIN"); } - &prepare_for_output; - - if (s/^=//) { - # $needspace = 0; # Assume this. - # s/\n/ /g; - ($Cmd, $_) = split(' ', $_, 2); - # clear_noremap(1); - if ($Cmd eq 'cut') { - $cutting = 1; - } - elsif ($Cmd eq 'head1') { - makespace(); - print; - #print uc($_); - } - elsif ($Cmd eq 'head2') { - makespace(); - # s/(\w+)/\u\L$1/g; - #print ' ' x $DEF_INDENT, $_; - # print "\xA7"; - s/(\w)/\xA7 $1/ if $FANCY; - print ' ' x ($DEF_INDENT/2), $_, "\n"; - } - elsif ($Cmd eq 'over') { - push(@indent,$indent); - $indent += ($_ + 0) || $DEF_INDENT; - } - elsif ($Cmd eq 'back') { - $indent = pop(@indent); - warn "Unmatched =back\n" unless defined $indent; - $needspace = 1; - } - elsif ($Cmd eq 'item') { - makespace(); - # s/\A(\s*)\*/$1\xb7/ if $FANCY; - # s/^(\s*\*\s+)/$1 /; - { - if (length() + 3 < $indent) { - my $paratag = $_; - $_ = <>; - if (/^=/) { # tricked! - local($indent) = $indent[$#index - 1] || $DEF_INDENT; - output($paratag); - redo POD_DIRECTIVE; - } - &prepare_for_output; - IP_output($paratag, $_); - } else { - local($indent) = $indent[$#index - 1] || $DEF_INDENT; - output($_); - } - } - } - else { - warn "Unrecognized directive: $Cmd\n"; - } - } - else { - # clear_noremap(1); - makespace(); - output($_, 1); - } -} - -######################################################################### - -sub makespace { - if ($needspace) { - print "\n"; - $needspace = 0; - } -} - -sub bold { - my $line = shift; - $line =~ s/(.)/$1\b$1/g; - return $line; -} - -sub italic { - my $line = shift; - $line =~ s/(.)/_\b$1/g; - return $line; -} - -sub IP_output { - local($tag, $_) = @_; - local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT; - $tag_cols = $SCREEN - $tag_indent; - $cols = $SCREEN - $indent; - $tag =~ s/\s*$//; - s/\s+/ /g; - s/^ //; - $str = "format STDOUT = \n" - . (" " x ($tag_indent)) - . '@' . ('<' x ($indent - $tag_indent - 1)) - . "^" . ("<" x ($cols - 1)) . "\n" - . '$tag, $_' - . "\n~~" - . (" " x ($indent-2)) - . "^" . ("<" x ($cols - 5)) . "\n" - . '$_' . "\n\n.\n1"; - #warn $str; warn "tag is $tag, _ is $_"; - eval $str || die; - write; -} - -sub output { - local($_, $reformat) = @_; - if ($reformat) { - $cols = $SCREEN - $indent; - s/\s+/ /g; - s/^ //; - $str = "format STDOUT = \n~~" - . (" " x ($indent-2)) - . "^" . ("<" x ($cols - 5)) . "\n" - . '$_' . "\n\n.\n1"; - eval $str || die; - write; - } else { - s/^/' ' x $indent/gem; - s/^\s+\n$/\n/gm; - print; - } -} - -sub noremap { - local($thing_to_hide) = shift; - $thing_to_hide =~ tr/\000-\177/\200-\377/; - return $thing_to_hide; -} - -sub init_noremap { - die "unmatched init" if $mapready++; - if ( /[\200-\377]/ ) { - warn "hit bit char in input stream"; - } -} - -sub clear_noremap { - my $ready_to_print = $_[0]; - die "unmatched clear" unless $mapready--; - tr/\200-\377/\000-\177/; - # now for the E<>s, which have been hidden until now - # otherwise the interative \w<> processing would have - # been hosed by the E<gt> - s { - E< - ( [A-Za-z]+ ) - > - } { - do { - defined $HTML_Escapes{$1} - ? do { $HTML_Escapes{$1} } - : do { - warn "Unknown escape: $& in $_"; - "E<$1>"; - } - } - }egx if $ready_to_print; -} - -sub internal_lrefs { - local($_) = shift; - s{L</([^>]+)>}{$1}g; - my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); - my $retstr = "the "; - my $i; - for ($i = 0; $i <= $#items; $i++) { - $retstr .= "C<$items[$i]>"; - $retstr .= ", " if @items > 2 && $i != $#items; - $retstr .= " and " if $i+2 == @items; - } - - $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) - . " elsewhere in this document "; - - return $retstr; - -} - -BEGIN { - -%HTML_Escapes = ( - 'amp' => '&', # ampersand - 'lt' => '<', # left chevron, less-than - 'gt' => '>', # right chevron, greater-than - 'quot' => '"', # double quote - - "Aacute" => "\xC1", # capital A, acute accent - "aacute" => "\xE1", # small a, acute accent - "Acirc" => "\xC2", # capital A, circumflex accent - "acirc" => "\xE2", # small a, circumflex accent - "AElig" => "\xC6", # capital AE diphthong (ligature) - "aelig" => "\xE6", # small ae diphthong (ligature) - "Agrave" => "\xC0", # capital A, grave accent - "agrave" => "\xE0", # small a, grave accent - "Aring" => "\xC5", # capital A, ring - "aring" => "\xE5", # small a, ring - "Atilde" => "\xC3", # capital A, tilde - "atilde" => "\xE3", # small a, tilde - "Auml" => "\xC4", # capital A, dieresis or umlaut mark - "auml" => "\xE4", # small a, dieresis or umlaut mark - "Ccedil" => "\xC7", # capital C, cedilla - "ccedil" => "\xE7", # small c, cedilla - "Eacute" => "\xC9", # capital E, acute accent - "eacute" => "\xE9", # small e, acute accent - "Ecirc" => "\xCA", # capital E, circumflex accent - "ecirc" => "\xEA", # small e, circumflex accent - "Egrave" => "\xC8", # capital E, grave accent - "egrave" => "\xE8", # small e, grave accent - "ETH" => "\xD0", # capital Eth, Icelandic - "eth" => "\xF0", # small eth, Icelandic - "Euml" => "\xCB", # capital E, dieresis or umlaut mark - "euml" => "\xEB", # small e, dieresis or umlaut mark - "Iacute" => "\xCD", # capital I, acute accent - "iacute" => "\xED", # small i, acute accent - "Icirc" => "\xCE", # capital I, circumflex accent - "icirc" => "\xEE", # small i, circumflex accent - "Igrave" => "\xCD", # capital I, grave accent - "igrave" => "\xED", # small i, grave accent - "Iuml" => "\xCF", # capital I, dieresis or umlaut mark - "iuml" => "\xEF", # small i, dieresis or umlaut mark - "Ntilde" => "\xD1", # capital N, tilde - "ntilde" => "\xF1", # small n, tilde - "Oacute" => "\xD3", # capital O, acute accent - "oacute" => "\xF3", # small o, acute accent - "Ocirc" => "\xD4", # capital O, circumflex accent - "ocirc" => "\xF4", # small o, circumflex accent - "Ograve" => "\xD2", # capital O, grave accent - "ograve" => "\xF2", # small o, grave accent - "Oslash" => "\xD8", # capital O, slash - "oslash" => "\xF8", # small o, slash - "Otilde" => "\xD5", # capital O, tilde - "otilde" => "\xF5", # small o, tilde - "Ouml" => "\xD6", # capital O, dieresis or umlaut mark - "ouml" => "\xF6", # small o, dieresis or umlaut mark - "szlig" => "\xDF", # small sharp s, German (sz ligature) - "THORN" => "\xDE", # capital THORN, Icelandic - "thorn" => "\xFE", # small thorn, Icelandic - "Uacute" => "\xDA", # capital U, acute accent - "uacute" => "\xFA", # small u, acute accent - "Ucirc" => "\xDB", # capital U, circumflex accent - "ucirc" => "\xFB", # small u, circumflex accent - "Ugrave" => "\xD9", # capital U, grave accent - "ugrave" => "\xF9", # small u, grave accent - "Uuml" => "\xDC", # capital U, dieresis or umlaut mark - "uuml" => "\xFC", # small u, dieresis or umlaut mark - "Yacute" => "\xDD", # capital Y, acute accent - "yacute" => "\xFD", # small y, acute accent - "yuml" => "\xFF", # small y, dieresis or umlaut mark - - "lchevron" => "\xAB", # left chevron (double less than) - "rchevron" => "\xBB", # right chevron (double greater than) -); -} +!NO!SUBS! +close OUT or die "Can't close $file: $!"; +chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; +exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; diff --git a/pod/splitpod b/pod/splitpod index fd327d80da..fd327d80da 100644..100755 --- a/pod/splitpod +++ b/pod/splitpod |