summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/Pod-Html/lib/Pod/Html.pm2218
-rw-r--r--ext/Pod-Html/t/crossref.pod41
-rw-r--r--ext/Pod-Html/t/crossref.t104
-rw-r--r--ext/Pod-Html/t/feature.pod21
-rw-r--r--ext/Pod-Html/t/feature.t70
-rw-r--r--ext/Pod-Html/t/feature2.pod21
-rw-r--r--ext/Pod-Html/t/feature2.t71
-rw-r--r--ext/Pod-Html/t/htmldir1.pod15
-rw-r--r--ext/Pod-Html/t/htmldir1.t86
-rw-r--r--ext/Pod-Html/t/htmldir2.pod15
-rw-r--r--ext/Pod-Html/t/htmldir2.t74
-rw-r--r--ext/Pod-Html/t/htmldir3.pod15
-rw-r--r--ext/Pod-Html/t/htmldir3.t83
-rw-r--r--ext/Pod-Html/t/htmldir4.pod15
-rw-r--r--ext/Pod-Html/t/htmldir4.t70
-rw-r--r--ext/Pod-Html/t/htmldir5.pod15
-rw-r--r--ext/Pod-Html/t/htmldir5.t74
-rw-r--r--ext/Pod-Html/t/htmlescp.t40
-rw-r--r--ext/Pod-Html/t/htmllink.t176
-rw-r--r--ext/Pod-Html/t/htmlview.pod36
-rw-r--r--ext/Pod-Html/t/htmlview.t250
-rw-r--r--ext/Pod-Html/t/pod2html-lib.pl44
-rw-r--r--ext/Pod-Html/t/poderr.pod19
-rw-r--r--ext/Pod-Html/t/poderr.t68
-rw-r--r--ext/Pod-Html/t/podnoerr.pod19
-rw-r--r--ext/Pod-Html/t/podnoerr.t55
-rw-r--r--ext/Pod-Html/testdir/perlpodspec-copy.pod1899
-rw-r--r--ext/Pod-Html/testdir/perlvar-copy.pod1737
28 files changed, 5242 insertions, 2109 deletions
diff --git a/ext/Pod-Html/lib/Pod/Html.pm b/ext/Pod-Html/lib/Pod/Html.pm
index b945419c97..5ae242b96f 100644
--- a/ext/Pod-Html/lib/Pod/Html.pm
+++ b/ext/Pod-Html/lib/Pod/Html.pm
@@ -3,7 +3,7 @@ use strict;
require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
-$VERSION = 1.11;
+$VERSION = 1.12;
@ISA = qw(Exporter);
@EXPORT = qw(pod2html htmlify);
@EXPORT_OK = qw(anchorify);
@@ -11,11 +11,13 @@ $VERSION = 1.11;
use Carp;
use Config;
use Cwd;
+use File::Basename;
use File::Spec;
use File::Spec::Unix;
use Getopt::Long;
+use Pod::Simple::Search;
-use locale; # make \w work right in non-ASCII lands
+use locale; # make \w work right in non-ASCII lands
=head1 NAME
@@ -29,8 +31,7 @@ Pod::Html - module to convert pod files to HTML
=head1 DESCRIPTION
Converts files from pod format (see L<perlpod>) to HTML format. It
-can automatically generate indexes and cross-references, and it keeps
-a cache of things it knows how to cross-reference.
+can automatically generate indexes and cross-references.
=head1 FUNCTIONS
@@ -40,7 +41,6 @@ a cache of things it knows how to cross-reference.
"--podpath=lib:ext:pod:vms",
"--podroot=/usr/src/perl",
"--htmlroot=/perl/nmanual",
- "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
"--recurse",
"--infile=foo.pod",
"--outfile=/perl/nmanual/foo.html");
@@ -51,16 +51,10 @@ pod2html takes the following arguments:
=item backlink
- --backlink="Back to Top"
+ --backlink
-Adds "Back to Top" links in front of every C<head1> heading (except for
-the first). By default, no backlinks are generated.
-
-=item cachedir
-
- --cachedir=name
-
-Creates the item and directory caches in the given directory.
+Turns every C<head1> heading into a link back to the top of the page.
+By default, no backlinks are generated.
=item css
@@ -69,12 +63,6 @@ Creates the item and directory caches in the given directory.
Specify the URL of a cascading style sheet. Also disables all HTML/CSS
C<style> attributes that are output by default (to avoid conflicts).
-=item flush
-
- --flush
-
-Flushes the item and directory caches.
-
=item header
--header
@@ -89,20 +77,6 @@ section. By default, no headers are generated.
Displays the usage message.
-=item hiddendirs
-
- --hiddendirs
- --nohiddendirs
-
-Include hidden directories in the search for POD's in podpath if recurse
-is set.
-The default is not to traverse any directory whose name begins with C<.>.
-See L</"podpath"> and L</"recurse">.
-
-[This option is for backward compatibility only.
-It's hard to imagine that one would usefully create a module with a
-name component beginning with C<.>.]
-
=item htmldir
--htmldir=name
@@ -134,19 +108,6 @@ behaviour.
Specify the pod file to convert. Input is taken from STDIN if no
infile is specified.
-=item libpods
-
- --libpods=name:...:name
-
-List of page names (eg, "perlfunc") which contain linkable C<=item>s.
-
-=item netscape
-
- --netscape
- --nonetscape
-
-B<Deprecated>, has no effect. For backwards compatibility only.
-
=item outfile
--outfile=name
@@ -154,6 +115,14 @@ B<Deprecated>, has no effect. For backwards compatibility only.
Specify the HTML file to create. Output goes to STDOUT if no outfile
is specified.
+=item poderrors
+
+ --poderrors
+ --nopoderrors
+
+Include a "POD ERRORS" section in the outfile if there were any POD
+errors in the infile. This section is included by default.
+
=item podpath
--podpath=name:...:name
@@ -165,7 +134,8 @@ HTML converted forms can be linked to in cross references.
--podroot=name
-Specify the base directory for finding library pods.
+Specify the base directory for finding library pods. Default is the
+current working directory.
=item quiet
@@ -219,7 +189,9 @@ Uses C<$Config{pod2html}> to setup default options.
=head1 AUTHOR
-Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
+Marc Green, E<lt>marcgreen@cpan.orgE<gt>.
+
+Original version by Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
=head1 SEE ALSO
@@ -231,258 +203,136 @@ This program is distributed under the Artistic License.
=cut
-my($Cachedir);
-my($Dircache, $Itemcache);
-my @Begin_Stack;
-my @Libpods;
my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
my($Podfile, @Podpath, $Podroot);
+my $Poderrors;
my $Css;
my $Recurse;
my $Quiet;
-my $HiddenDirs;
my $Verbose;
my $Doindex;
my $Backlink;
-my($Listlevel, @Listtype);
-my $ListNewTerm;
-use vars qw($Ignore); # need to localize it later.
-my(%Items_Named, @Items_Seen);
my($Title, $Header);
-my $Top;
-my $Paragraph;
-
-my %Sections;
-
-# Caches
-my %Pages = (); # associative array used to find the location
- # of pages referenced by L<> links.
-my %Items = (); # associative array used to find the location
- # of =item directives referenced by C<> links
-
-my %Local_Items;
-my $Is83;
+my %Pages = (); # associative array used to find the location
+ # of pages referenced by L<> links.
my $Curdir = File::Spec->curdir;
init_globals();
sub init_globals {
- $Cachedir = "."; # The directory to which item and directory
- # caches will be written.
-
- $Dircache = "pod2htmd.tmp";
- $Itemcache = "pod2htmi.tmp";
-
- @Begin_Stack = (); # begin/end stack
-
- @Libpods = (); # files to search for links from C<> directives
- $Htmlroot = "/"; # http-server base directory from which all
- # relative paths in $podpath stem.
- $Htmldir = ""; # The directory to which the html pages
- # will (eventually) be written.
- $Htmlfile = ""; # write to stdout by default
- $Htmlfileurl = ""; # The url that other files would use to
- # refer to this file. This is only used
- # to make relative urls that point to
- # other files.
-
- $Podfile = ""; # read from stdin by default
- @Podpath = (); # list of directories containing library pods.
- $Podroot = $Curdir; # filesystem base directory from which all
- # relative paths in $podpath stem.
+ $Htmlroot = "/"; # http-server base directory from which all
+ # relative paths in $podpath stem.
+ $Htmldir = ""; # The directory to which the html pages
+ # will (eventually) be written.
+ $Htmlfile = ""; # write to stdout by default
+ $Htmlfileurl = ""; # The url that other files would use to
+ # refer to this file. This is only used
+ # to make relative urls that point to
+ # other files.
+
+ $Poderrors = 1;
+ $Podfile = ""; # read from stdin by default
+ @Podpath = (); # list of directories containing library pods.
+ $Podroot = $Curdir; # filesystem base directory from which all
+ # relative paths in $podpath stem.
$Css = ''; # Cascading style sheet
- $Recurse = 1; # recurse on subdirectories in $podpath.
- $Quiet = 0; # not quiet by default
- $Verbose = 0; # not verbose by default
- $Doindex = 1; # non-zero if we should generate an index
- $Backlink = ''; # text for "back to top" links
- $Listlevel = 0; # current list depth
- @Listtype = (); # list types for open lists
- $ListNewTerm = 0; # indicates new term in definition list; used
- # to correctly open/close <dd> tags
- $Ignore = 1; # whether or not to format text. we don't
- # format text until we hit our first pod
- # directive.
-
- @Items_Seen = (); # for multiples of the same item in perlfunc
- %Items_Named = ();
- $Header = 0; # produce block header/footer
- $Title = ''; # title to give the pod(s)
- $Top = 1; # true if we are at the top of the doc. used
- # to prevent the first <hr /> directive.
- $Paragraph = ''; # which paragraph we're processing (used
- # for error messages)
- %Sections = (); # sections within this page
-
- %Local_Items = ();
- $Is83 = $^O eq 'dos'; # Is it an 8.3 filesystem?
-}
-
-#
-# clean_data: global clean-up of pod data
-#
-sub clean_data($){
- my( $dataref ) = @_;
- for my $i ( 0..$#{$dataref} ) {
- ${$dataref}[$i] =~ s/\s+\Z//;
-
- # have a look for all-space lines
- if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
- my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
- splice( @$dataref, $i, 1, @chunks );
- }
- }
+ $Recurse = 1; # recurse on subdirectories in $podpath.
+ $Quiet = 0; # not quiet by default
+ $Verbose = 0; # not verbose by default
+ $Doindex = 1; # non-zero if we should generate an index
+ $Backlink = 0; # no backlinks added by default
+ $Header = 0; # produce block header/footer
+ $Title = ''; # title to give the pod(s)
}
-
sub pod2html {
local(@ARGV) = @_;
- local($/);
local $_;
init_globals();
-
- $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
-
- # cache of %Pages and %Items from last time we ran pod2html
-
- #undef $opt_help if defined $opt_help;
-
- # parse the command-line parameters
parse_command_line();
- # escape the backlink argument (same goes for title but is done later...)
- $Backlink = html_escape($Backlink) if defined $Backlink;
+ # prevent '//' in urls
+ $Htmlroot = "" if $Htmlroot eq "/";
+ $Htmldir =~ s#/\z##;
- # set some variables to their default values if necessary
- my $pod;
- unless (@ARGV && $ARGV[0]) {
- if ($Podfile and $Podfile ne '-') {
- open $pod, '<', $Podfile
- or die "$0: cannot open $Podfile file for input: $!\n";
- } else {
- open $pod, '-';
- }
- } else {
- $Podfile = $ARGV[0]; # XXX: might be more filenames
- $pod = *ARGV;
- }
- $Htmlfile = "-" unless $Htmlfile; # stdout
- $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
- $Htmldir =~ s#/\z## ; # so we don't get a //
if ( $Htmlroot eq ''
&& defined( $Htmldir )
&& $Htmldir ne ''
&& substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
- )
- {
- # Set the 'base' url for this file, so that we can use it
- # as the location from which to calculate relative links
- # to other files. If this is '', then absolute links will
- # be used throughout.
- $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
- }
-
- # read the pod a paragraph at a time
- warn "Scanning for sections in input file(s)\n" if $Verbose;
- $/ = "";
- my @poddata = <$pod>;
- close $pod;
-
- # be eol agnostic
- for (@poddata) {
- if (/\r/) {
- if (/\r\n/) {
- @poddata = map { s/\r\n/\n/g;
- /\n\n/ ?
- map { "$_\n\n" } split /\n\n/ :
- $_ } @poddata;
- } else {
- @poddata = map { s/\r/\n/g;
- /\n\n/ ?
- map { "$_\n\n" } split /\n\n/ :
- $_ } @poddata;
- }
- last;
- }
- }
-
- clean_data( \@poddata );
-
- # scan the pod for =head[1-6] directives and build an index
- my $index = scan_headings(\%Sections, @poddata);
-
- unless($index) {
- warn "No headings in $Podfile\n" if $Verbose;
- }
-
- # open the output file
- my $html;
- if($Htmlfile and $Htmlfile ne '-') {
- open $html, ">", $Htmlfile
- or die "$0: cannot open $Htmlfile file for output: $!\n";
- } else {
- open $html, ">-";
- }
-
- # put a title in the HTML file if one wasn't specified
- if ($Title eq '') {
- TITLE_SEARCH: {
- for (my $i = 0; $i < @poddata; $i++) {
- if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
- for my $para ( @poddata[$i, $i+1] ) {
- last TITLE_SEARCH
- if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
- }
- }
-
- }
- }
- }
- if (!$Title and $Podfile =~ /\.pod\z/) {
- # probably a split pod so take first =head[12] as title
- for (my $i = 0; $i < @poddata; $i++) {
- last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
- }
- warn "adopted '$Title' as title for $Podfile\n"
- if $Verbose and $Title;
- }
- if ($Title) {
- $Title =~ s/\s*\(.*\)//;
- } else {
- warn "$0: no title for $Podfile.\n" unless $Quiet;
- $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
- $Title = ($Podfile eq "-" ? 'No Title' : $1);
- warn "using $Title" if $Verbose;
- }
+ ) {
+ # Set the 'base' url for this file, so that we can use it
+ # as the location from which to calculate relative links
+ # to other files. If this is '', then absolute links will
+ # be used throughout.
+ #$Htmlfileurl = "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
+ # Is the above not just "$Htmlfileurl = $Htmlfile"?
+ $Htmlfileurl = Unixify::unixify($Htmlfile);
+
+ }
+
+ my $pwd = getcwd();
+ chdir($Podroot) || die "$0: error changing to directory $Podroot: $!\n";
+
+ # find all pod modules/pages in podpath, store in %Pages
+ # - callback used to remove Podroot and extension from each file
+ # - laborious to allow '.' in dirnames (e.g., /usr/share/perl/5.14.1)
+ Pod::Simple::Search->new->inc(0)->verbose($Verbose)->laborious(1)
+ ->callback(\&_save_page)->recurse($Recurse)->survey(@Podpath);
+
+ chdir($pwd) || die "$0: error changing to directory $pwd: $!\n";
+
+ # set options for the parser
+ my $parser = Pod::Simple::XHTML::LocalPodLinks->new();
+ $parser->anchor_items(1); # the old Pod::Html always did
+ $parser->backlink($Backlink); # linkify =head1 directives
+ $parser->htmldir($Htmldir);
+ $parser->htmlfileurl($Htmlfileurl);
+ $parser->htmlroot($Htmlroot);
+ $parser->index($Doindex);
+ $parser->no_errata_section(!$Poderrors); # note the inverse
+ $parser->output_string(\my $output); # written to file later
+ $parser->pages(\%Pages);
+ $parser->quiet($Quiet);
+ $parser->verbose($Verbose);
+
+ # XXX: implement default title generator in pod::simple::xhtml
+ # copy the way the old Pod::Html did it
$Title = html_escape($Title);
+ # We need to add this ourselves because we use our own header, not
+ # ::XHTML's header. We need to set $parser->backlink to linkify
+ # the =head1 directives
+ my $bodyid = $Backlink ? ' id="_podtop_"' : '';
+
my $csslink = '';
my $bodystyle = ' style="background-color: white"';
my $tdstyle = ' style="background-color: #cccccc"';
if ($Css) {
- $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
- $csslink =~ s,\\,/,g;
- $csslink =~ s,(/.):,$1|,;
- $bodystyle = '';
- $tdstyle = '';
+ $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
+ $csslink =~ s,\\,/,g;
+ $csslink =~ s,(/.):,$1|,;
+ $bodystyle = '';
+ $tdstyle= '';
}
- my $block = $Header ? <<END_OF_BLOCK : '';
+ # header/footer block
+ my $block = $Header ? <<END_OF_BLOCK : '';
<table border="0" width="100%" cellspacing="0" cellpadding="3">
-<tr><td class="block"$tdstyle valign="middle">
-<big><strong><span class="block">&nbsp;$Title</span></strong></big>
+<tr><td class="_podblock_"$tdstyle valign="middle">
+<big><strong><span class="_podblock_">&nbsp;$Title</span></strong></big>
</td></tr>
</table>
END_OF_BLOCK
- print $html <<END_OF_HEAD;
+ # create own header/footer because of --header
+ $parser->html_header(<<"HTMLHEAD");
<?xml version="1.0" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
@@ -492,147 +342,43 @@ END_OF_BLOCK
<link rev="made" href="mailto:$Config{perladmin}" />
</head>
-<body$bodystyle>
+<body$bodyid$bodystyle>
$block
-END_OF_HEAD
-
- # load/reload/validate/cache %Pages and %Items
- get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse);
-
- # scan the pod for =item directives
- scan_items( \%Local_Items, "", @poddata);
-
- # put an index at the top of the file. note, if $Doindex is 0 we
- # still generate an index, but surround it with an html comment.
- # that way some other program can extract it if desired.
- $index =~ s/--+/-/g;
-
- my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : "";
-
- unless ($Doindex)
- {
- $index = qq(<!--\n$index\n-->\n);
- }
+HTMLHEAD
- print $html <<"END_OF_INDEX";
-
-<!-- INDEX BEGIN -->
-<div name="index">
-<p><a name=\"__index__\"></a></p>
-$index
-$hr
-</div>
-<!-- INDEX END -->
-
-END_OF_INDEX
-
- # now convert this file
- my $after_item; # set to true after an =item
- warn "Converting input file $Podfile\n" if $Verbose;
- foreach my $i (0..$#poddata){
- $_ = $poddata[$i];
- $Paragraph = $i+1;
- if (/^(=.*)/s) { # is it a pod directive?
- $Ignore = 0;
- $after_item = 0;
- $_ = $1;
- if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
- process_begin($html, $1, $2);
- } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
- process_end($1, $2);
- } elsif (/^=cut/) { # =cut
- process_cut();
- } elsif (/^=pod/) { # =pod
- process_pod();
- } else {
- next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
-
- if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
- process_head( $html, $1, $2, $Doindex && $index );
- } elsif (/^=item\s*(.*\S)?/sm) { # =item text
- process_item( $html, $1 );
- $after_item = 1;
- } elsif (/^=over\s*(.*)/) { # =over N
- process_over();
- } elsif (/^=back/) { # =back
- process_back( $html );
- } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
- process_for( $html, $1, $2 );
- } else {
- /^=(\S*)\s*/;
- warn "$0: $Podfile: unknown pod directive '$1' in "
- . "paragraph $Paragraph. ignoring.\n" unless $Quiet;
- }
- }
- $Top = 0;
- }
- else {
- next if $Ignore;
- if (@Begin_Stack) {
- print $html $_ if $Begin_Stack[-1] eq 'html';
- next;
- }
- my $text = $_;
-
- # Open tag for definition list as we have something to put in it
- if( $ListNewTerm ){
- print $html "<dd>\n";
- $ListNewTerm = 0;
- }
-
- if( $text =~ /\A\s+/ ){
- process_pre( \$text );
- print $html "<pre>\n$text</pre>\n";
-
- } else {
- process_text( \$text );
-
- # experimental: check for a paragraph where all lines
- # have some ...\t...\t...\n pattern
- if( $text =~ /\t/ ){
- my @lines = split( "\n", $text );
- if( @lines > 1 ){
- my $all = 2;
- foreach my $line ( @lines ){
- if( $line =~ /\S/ && $line !~ /\t/ ){
- $all--;
- last if $all == 0;
- }
- }
- if( $all > 0 ){
- $text =~ s/\t+/<td>/g;
- $text =~ s/^/<tr><td>/gm;
- $text = '<table cellspacing="0" cellpadding="0">' .
- $text . '</table>';
- }
- }
- }
- ## end of experimental
-
- print $html "<p>$text</p>\n";
- }
- $after_item = 0;
- }
- }
-
- # finish off any pending directives
- finish_list( $html );
-
- # link to page index
- print $html "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
- if $Doindex and $index and $Backlink;
-
- print $html <<END_OF_TAIL;
+ $parser->html_footer(<<"HTMLFOOT");
$block
</body>
</html>
-END_OF_TAIL
+HTMLFOOT
- # close the html file
- close $html or die "Failed to close $Htmlfile: $!";
+ my $input;
+ unless (@ARGV && $ARGV[0]) {
+ if ($Podfile and $Podfile ne '-') {
+ $input = $Podfile;
+ } else {
+ $input = '-'; # XXX: make a test case for this
+ }
+ } else {
+ $Podfile = $ARGV[0];
+ $input = *ARGV;
+ }
+
+ warn "Converting input file $Podfile\n" if $Verbose;
+ $parser->parse_file($input);
- warn "Finished\n" if $Verbose;
+ # Write output to file
+ $Htmlfile = "-" unless $Htmlfile; # stdout
+ my $fhout;
+ if($Htmlfile and $Htmlfile ne '-') {
+ open $fhout, ">", $Htmlfile
+ or die "$0: cannot open $Htmlfile file for output: $!\n";
+ } else {
+ open $fhout, ">-";
+ }
+ print $fhout $output;
+ close $fhout or die "Failed to close $Htmlfile: $!";
}
##############################################################################
@@ -643,1167 +389,84 @@ sub usage {
die <<END_OF_USAGE;
Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
--podpath=<name>:...:<name> --podroot=<name>
- --libpods=<name>:...:<name> --recurse --verbose --index
- --netscape --norecurse --noindex --cachedir=<name>
-
- --backlink - set text for "back to top" links (default: none).
- --cachedir - directory for the item and directory cache files.
- --css - stylesheet URL
- --flush - flushes the item and directory caches.
- --[no]header - produce block header/footer (default is no headers).
- --help - prints this message.
- --hiddendirs - search hidden directories in podpath
- --htmldir - directory for resulting HTML files.
- --htmlroot - http-server base directory from which all relative paths
- in podpath stem (default is /).
- --[no]index - generate an index at the top of the resulting html
- (default behaviour).
- --infile - filename for the pod to convert (input taken from stdin
- by default).
- --libpods - colon-separated list of pages to search for =item pod
- directives in as targets of C<> and implicit links (empty
- by default). note, these are not filenames, but rather
- page names like those that appear in L<> links.
- --outfile - filename for the resulting html file (output sent to
- stdout by default).
- --podpath - colon-separated list of directories containing library
- pods (empty by default).
- --podroot - filesystem base directory from which all relative paths
- in podpath stem (default is .).
- --[no]quiet - suppress some benign warning messages (default is off).
- --[no]recurse - recurse on those subdirectories listed in podpath
- (default behaviour).
- --title - title that will appear in resulting html file.
- --[no]verbose - self-explanatory (off by default).
- --[no]netscape - deprecated, has no effect. for backwards compatibility only.
+ --recurse --verbose --index --norecurse --noindex
+
+ --[no]backlink - turn =head1 directives into links pointing to the top of
+ the page (off by default).
+ --css - stylesheet URL
+ --[no]header - produce block header/footer (default is no headers).
+ --help - prints this message.
+ --htmldir - directory for resulting HTML files.
+ --htmlroot - http-server base directory from which all relative paths
+ in podpath stem (default is /).
+ --[no]index - generate an index at the top of the resulting html
+ (default behaviour).
+ --infile - filename for the pod to convert (input taken from stdin
+ by default).
+ --outfile - filename for the resulting html file (output sent to
+ stdout by default).
+ --[no]poderrors - include a POD ERRORS section in the output if there were
+ any POD errors in the input (default behavior).
+ --podpath - colon-separated list of directories containing library
+ pods (empty by default).
+ --podroot - filesystem base directory from which all relative paths
+ in podpath stem (default is .).
+ --[no]quiet - suppress some benign warning messages (default is off).
+ --[no]recurse - recurse on those subdirectories listed in podpath
+ (default behaviour).
+ --title - title that will appear in resulting html file.
+ --[no]verbose - self-explanatory (off by default).
END_OF_USAGE
}
sub parse_command_line {
- my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
- $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
- $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
- $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
+ my ($opt_backlink,$opt_css,$opt_header,$opt_help,
+ $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,
+ $opt_outfile,$opt_poderrors,$opt_podpath,$opt_podroot,
+ $opt_quiet,$opt_recurse,$opt_title,$opt_verbose);
unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
my $result = GetOptions(
- 'backlink=s' => \$opt_backlink,
- 'cachedir=s' => \$opt_cachedir,
- 'css=s' => \$opt_css,
- 'flush' => \$opt_flush,
- 'header!' => \$opt_header,
- 'help' => \$opt_help,
- 'hiddendirs!'=> \$opt_hiddendirs,
- 'htmldir=s' => \$opt_htmldir,
- 'htmlroot=s' => \$opt_htmlroot,
- 'index!' => \$opt_index,
- 'infile=s' => \$opt_infile,
- 'libpods=s' => \$opt_libpods,
- 'netscape!' => \$opt_netscape,
- 'outfile=s' => \$opt_outfile,
- 'podpath=s' => \$opt_podpath,
- 'podroot=s' => \$opt_podroot,
- 'quiet!' => \$opt_quiet,
- 'recurse!' => \$opt_recurse,
- 'title=s' => \$opt_title,
- 'verbose!' => \$opt_verbose,
- );
+ 'backlink!' => \$opt_backlink,
+ 'css=s' => \$opt_css,
+ 'help' => \$opt_help,
+ 'header!' => \$opt_header,
+ 'htmldir=s' => \$opt_htmldir,
+ 'htmlroot=s' => \$opt_htmlroot,
+ 'index!' => \$opt_index,
+ 'infile=s' => \$opt_infile,
+ 'outfile=s' => \$opt_outfile,
+ 'poderrors!' => \$opt_poderrors,
+ 'podpath=s' => \$opt_podpath,
+ 'podroot=s' => \$opt_podroot,
+ 'quiet!' => \$opt_quiet,
+ 'recurse!' => \$opt_recurse,
+ 'title=s' => \$opt_title,
+ 'verbose!' => \$opt_verbose,
+ );
usage("-", "invalid parameters") if not $result;
- usage("-") if defined $opt_help; # see if the user asked for help
- $opt_help = ""; # just to make -w shut-up.
+ usage("-") if defined $opt_help; # see if the user asked for help
+ $opt_help = ""; # just to make -w shut-up.
@Podpath = split(":", $opt_podpath) if defined $opt_podpath;
- @Libpods = split(":", $opt_libpods) if defined $opt_libpods;
-
- $Backlink = $opt_backlink if defined $opt_backlink;
- $Cachedir = $opt_cachedir if defined $opt_cachedir;
- $Css = $opt_css if defined $opt_css;
- $Header = $opt_header if defined $opt_header;
- $Htmldir = $opt_htmldir if defined $opt_htmldir;
- $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
- $Doindex = $opt_index if defined $opt_index;
- $Podfile = $opt_infile if defined $opt_infile;
- $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
- $Htmlfile = $opt_outfile if defined $opt_outfile;
- $Podroot = $opt_podroot if defined $opt_podroot;
- $Quiet = $opt_quiet if defined $opt_quiet;
- $Recurse = $opt_recurse if defined $opt_recurse;
- $Title = $opt_title if defined $opt_title;
- $Verbose = $opt_verbose if defined $opt_verbose;
-
- warn "Flushing item and directory caches\n"
- if $opt_verbose && defined $opt_flush;
- $Dircache = "$Cachedir/pod2htmd.tmp";
- $Itemcache = "$Cachedir/pod2htmi.tmp";
- if (defined $opt_flush) {
- 1 while unlink($Dircache, $Itemcache);
- }
-}
-
-
-my $Saved_Cache_Key;
-
-sub get_cache {
- my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
- my @cache_key_args = @_;
- # A first-level cache:
- # Don't bother reading the cache files if they still apply
- # and haven't changed since we last read them.
-
- my $this_cache_key = cache_key(@cache_key_args);
-
- return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
-
- # load the cache of %Pages and %Items if possible. $tests will be
- # non-zero if successful.
- my $tests = 0;
- if (-f $dircache && -f $itemcache) {
- warn "scanning for item cache\n" if $Verbose;
- $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
- }
-
- # if we didn't succeed in loading the cache then we must (re)build
- # %Pages and %Items.
- if (!$tests) {
- warn "scanning directories in pod-path\n" if $Verbose;
- scan_podpath($podroot, $recurse, 0);
- }
- $Saved_Cache_Key = cache_key(@cache_key_args);
-}
-
-sub cache_key {
- my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
- return join('!', $dircache, $itemcache, $recurse,
- @$podpath, $podroot, stat($dircache), stat($itemcache));
-}
-
-#
-# load_cache - tries to find if the caches stored in $dircache and $itemcache
-# are valid caches of %Pages and %Items. if they are valid then it loads
-# them and returns a non-zero value.
-#
-sub load_cache {
- my($dircache, $itemcache, $podpath, $podroot) = @_;
- my($tests);
- local $_;
-
- $tests = 0;
-
- open(CACHE, "<$itemcache") ||
- die "$0: error opening $itemcache for reading: $!\n";
- $/ = "\n";
-
- # is it the same podpath?
- $_ = <CACHE>;
- chomp($_);
- $tests++ if (join(":", @$podpath) eq $_);
-
- # is it the same podroot?
- $_ = <CACHE>;
- chomp($_);
- $tests++ if ($podroot eq $_);
-
- # load the cache if its good
- if ($tests != 2) {
- close(CACHE);
- return 0;
- }
-
- warn "loading item cache\n" if $Verbose;
- while (<CACHE>) {
- /(.*?) (.*)$/;
- $Items{$1} = $2;
- }
- close(CACHE);
-
- warn "scanning for directory cache\n" if $Verbose;
- open(CACHE, "<$dircache") ||
- die "$0: error opening $dircache for reading: $!\n";
- $/ = "\n";
- $tests = 0;
-
- # is it the same podpath?
- $_ = <CACHE>;
- chomp($_);
- $tests++ if (join(":", @$podpath) eq $_);
-
- # is it the same podroot?
- $_ = <CACHE>;
- chomp($_);
- $tests++ if ($podroot eq $_);
-
- # load the cache if its good
- if ($tests != 2) {
- close(CACHE);
- return 0;
- }
-
- warn "loading directory cache\n" if $Verbose;
- while (<CACHE>) {
- /(.*?) (.*)$/;
- $Pages{$1} = $2;
- }
-
- close(CACHE);
-
- return 1;
-}
-
-#
-# scan_podpath - scans the directories specified in @podpath for directories,
-# .pod files, and .pm files. it also scans the pod files specified in
-# @Libpods for =item directives.
-#
-sub scan_podpath {
- my($podroot, $recurse, $append) = @_;
- my($pwd, $dir);
- my($libpod, $dirname, $pod, @files, @poddata);
-
- unless($append) {
- %Items = ();
- %Pages = ();
- }
-
- # scan each directory listed in @Podpath
- $pwd = getcwd();
- chdir($podroot)
- || die "$0: error changing to directory $podroot: $!\n";
- foreach $dir (@Podpath) {
- scan_dir($dir, $recurse);
- }
-
- # scan the pods listed in @Libpods for =item directives
- foreach $libpod (@Libpods) {
- # if the page isn't defined then we won't know where to find it
- # on the system.
- next unless defined $Pages{$libpod} && $Pages{$libpod};
-
- # if there is a directory then use the .pod and .pm files within it.
- # NOTE: Only finds the first so-named directory in the tree.
-# if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
- if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
- # find all the .pod and .pm files within the directory
- $dirname = $1;
- opendir(DIR, $dirname) ||
- die "$0: error opening directory $dirname: $!\n";
- @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
- closedir(DIR);
-
- # scan each .pod and .pm file for =item directives
- foreach $pod (@files) {
- open my $fh, '<', "$dirname/$pod"
- or die "$0: error opening $dirname/$pod for input: $!\n";
- @poddata = <$fh>;
- close $fh;
- clean_data( \@poddata );
-
- scan_items( \%Items, "$dirname/$pod", @poddata);
- }
-
- # use the names of files as =item directives too.
-### Don't think this should be done this way - confuses issues.(WL)
-### foreach $pod (@files) {
-### $pod =~ /^(.*)(\.pod|\.pm)$/;
-### $Items{$1} = "$dirname/$1.html" if $1;
-### }
- } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
- $Pages{$libpod} =~ /([^:]*\.pm):/) {
- # scan the .pod or .pm file for =item directives
- $pod = $1;
- open my $fh, '<', $pod
- or die "$0: error opening $pod for input: $!\n";
- @poddata = <$fh>;
- close $fh;
- clean_data( \@poddata );
-
- scan_items( \%Items, "$pod", @poddata);
- } else {
- warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
- }
- }
- @poddata = (); # clean-up a bit
-
- chdir($pwd)
- || die "$0: error changing to directory $pwd: $!\n";
-
- # cache the item list for later use
- warn "caching items for later use\n" if $Verbose;
- open my $cache, '>', $Itemcache
- or die "$0: error open $Itemcache for writing: $!\n";
-
- print $cache join(":", @Podpath) . "\n$podroot\n";
- foreach my $key (keys %Items) {
- print $cache "$key $Items{$key}\n";
- }
-
- close $cache or die "error closing $Itemcache: $!";
-
- # cache the directory list for later use
- warn "caching directories for later use\n" if $Verbose;
- open $cache, '>', $Dircache
- or die "$0: error open $Dircache for writing: $!\n";
-
- print $cache join(":", @Podpath) . "\n$podroot\n";
- foreach my $key (keys %Pages) {
- print $cache "$key $Pages{$key}\n";
- }
-
- close $cache or die "error closing $Dircache: $!";
-}
-
-#
-# scan_dir - scans the directory specified in $dir for subdirectories, .pod
-# files, and .pm files. notes those that it finds. this information will
-# be used later in order to figure out where the pages specified in L<>
-# links are on the filesystem.
-#
-sub scan_dir {
- my($dir, $recurse) = @_;
- my($t, @subdirs, @pods, $pod, $dirname, @dirs);
- local $_;
-
- @subdirs = ();
- @pods = ();
-
- opendir(DIR, $dir) ||
- die "$0: error opening directory $dir: $!\n";
- while (defined($_ = readdir(DIR))) {
- if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
- && ($HiddenDirs || !/^\./)
- ) { # directory
- $Pages{$_} = "" unless defined $Pages{$_};
- $Pages{$_} .= "$dir/$_:";
- push(@subdirs, $_);
- } elsif (/\.pod\z/) { # .pod
- s/\.pod\z//;
- $Pages{$_} = "" unless defined $Pages{$_};
- $Pages{$_} .= "$dir/$_.pod:";
- push(@pods, "$dir/$_.pod");
- } elsif (/\.html\z/) { # .html
- s/\.html\z//;
- $Pages{$_} = "" unless defined $Pages{$_};
- $Pages{$_} .= "$dir/$_.pod:";
- } elsif (/\.pm\z/) { # .pm
- s/\.pm\z//;
- $Pages{$_} = "" unless defined $Pages{$_};
- $Pages{$_} .= "$dir/$_.pm:";
- push(@pods, "$dir/$_.pm");
- } elsif (-T "$dir/$_") { # script(?)
- local *F;
- if (open(F, "$dir/$_")) {
- my $line;
- while (defined($line = <F>)) {
- if ($line =~ /^=(?:pod|head1)/) {
- $Pages{$_} = "" unless defined $Pages{$_};
- $Pages{$_} .= "$dir/$_.pod:";
- last;
- }
- }
- close(F);
- }
- }
- }
- closedir(DIR);
-
- # recurse on the subdirectories if necessary
- if ($recurse) {
- foreach my $subdir (@subdirs) {
- scan_dir("$dir/$subdir", $recurse);
- }
- }
-}
-
-#
-# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
-# build an index.
-#
-sub scan_headings {
- my($sections, @data) = @_;
- my($tag, $which_head, $otitle, $listdepth, $index);
-
- local $Ignore = 0;
-
- $listdepth = 0;
- $index = "";
-
- # scan for =head directives, note their name, and build an index
- # pointing to each of them.
- foreach my $line (@data) {
- if ($line =~ /^=(head)([1-6])\s+(.*)/) {
- ($tag, $which_head, $otitle) = ($1,$2,$3);
-
- my $title = depod( $otitle );
- my $name = anchorify( $title );
- $$sections{$name} = 1;
- $title = process_text( \$otitle );
-
- while ($which_head != $listdepth) {
- if ($which_head > $listdepth) {
- $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
- $listdepth++;
- } elsif ($which_head < $listdepth) {
- $listdepth--;
- $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
- }
- }
-
- $index .= "\n" . ("\t" x $listdepth) . "<li>" .
- "<a href=\"#" . $name . "\">" .
- $title . "</a></li>";
- }
- }
-
- # finish off the lists
- while ($listdepth--) {
- $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
- }
-
- # get rid of bogus lists
- $index =~ s,\t*<ul>\s*</ul>\n,,g;
-
- return $index;
-}
-
-#
-# scan_items - scans the pod specified by $pod for =item directives. we
-# will use this information later on in resolving C<> links.
-#
-sub scan_items {
- my( $itemref, $pod, @poddata ) = @_;
- my($i, $item);
- local $_;
-
- $pod =~ s/\.pod\z//;
- $pod .= ".html" if $pod;
-
- foreach $i (0..$#poddata) {
- my $txt = depod( $poddata[$i] );
-
- # figure out what kind of item it is.
- # Build string for referencing this item.
- if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bulleted list
- next unless $1;
- $item = $1;
- } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
- $item = $1;
- } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # definition list
- $item = $1;
- } else {
- next;
- }
- my $fid = fragment_id( $item );
- $$itemref{$fid} = "$pod" if $fid;
- }
-}
-
-#
-# process_head - convert a pod head[1-6] tag and convert it to HTML format.
-#
-sub process_head {
- my($fh, $tag, $heading, $hasindex) = @_;
-
- # figure out the level of the =head
- $tag =~ /head([1-6])/;
- my $level = $1;
-
- finish_list( $fh );
-
- print $fh "<p>\n";
- if( $level == 1 && ! $Top ){
- print $fh "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
- if $hasindex and $Backlink;
- print $fh "</p>\n<hr />\n"
- } else {
- print $fh "</p>\n";
- }
-
- my $name = anchorify( depod( $heading ) );
- my $convert = process_text( \$heading );
- print $fh "<h$level><a name=\"$name\">$convert</a></h$level>\n";
-}
-
-
-#
-# emit_item_tag - print an =item's text
-# Note: The global $EmittedItem is used for inhibiting self-references.
-#
-my $EmittedItem;
-
-sub emit_item_tag {
- my( $fh, $otext, $text, $compact ) = @_;
- my $item = fragment_id( depod($text) , -generate);
- Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile")
- if !defined $item;
- $EmittedItem = $item;
- ### print STDERR "emit_item_tag=$item ($text)\n";
-
- print $fh '<strong>';
- if ($Items_Named{$item}++) {
- print $fh process_text( \$otext );
- } else {
- my $name = $item;
- $name = anchorify($name);
- print $fh qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>';
- }
- print $fh "</strong>";
- undef( $EmittedItem );
-}
-
-sub new_listitem {
- my ($fh, $tag) = @_;
- # Open tag for definition list as we have something to put in it
- if( ($tag ne 'dl') && ($ListNewTerm) ){
- print $fh "<dd>\n";
- $ListNewTerm = 0;
- }
-
- if( $Items_Seen[$Listlevel]++ == 0 ){
- # start of new list
- push( @Listtype, "$tag" );
- print $fh "<$tag>\n";
- } else {
- # if this is not the first item, close the previous one
- if ( $tag eq 'dl' ){
- print $fh "</dd>\n" unless $ListNewTerm;
- } else {
- print $fh "</li>\n";
- }
- }
- my $opentag = $tag eq 'dl' ? 'dt' : 'li';
- print $fh "<$opentag>";
-}
-
-#
-# process_item - convert a pod item tag and convert it to HTML format.
-#
-sub process_item {
- my ($fh, $otext) = @_;
-
- # lots of documents start a list without doing an =over. this is
- # bad! but, the proper thing to do seems to be to just assume
- # they did do an =over. so warn them once and then continue.
- if( $Listlevel == 0 ){
- warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph. ignoring.\n" unless $Quiet;
- process_over();
- }
-
- # remove formatting instructions from the text
- my $text = depod( $otext );
-
- # all the list variants:
- if( $text =~ /\A\*/ ){ # bullet
- new_listitem( $fh, 'ul' );
- if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text
- my $tag = $1;
- $otext =~ s/\A\*\s+//;
- emit_item_tag( $fh, $otext, $tag, 1 );
- print $fh "\n";
- }
-
- } elsif( $text =~ /\A\d+/ ){ # numbered list
- new_listitem( $fh, 'ol' );
- if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text
- my $tag = $1;
- $otext =~ s/\A\d+\.?\s*//;
- emit_item_tag( $fh, $otext, $tag, 1 );
- print $fh "\n";
- }
-
- } else { # definition list
- # new_listitem takes care of opening the <dt> tag
- new_listitem( $fh, 'dl' );
- if ($text =~ /\A(.+)\Z/s ){ # should have text
- emit_item_tag( $fh, $otext, $text, 1 );
- # write the definition term and close <dt> tag
- print $fh "</dt>\n";
- }
- # trigger opening a <dd> tag for the actual definition; will not
- # happen if next paragraph is also a definition term (=item)
- $ListNewTerm = 1;
- }
- print $fh "\n";
-}
-
-#
-# process_over - process a pod over tag and start a corresponding HTML list.
-#
-sub process_over {
- # start a new list
- $Listlevel++;
- push( @Items_Seen, 0 );
-}
-
-#
-# process_back - process a pod back tag and convert it to HTML format.
-#
-sub process_back {
- my $fh = shift;
- if( $Listlevel == 0 ){
- warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph. ignoring.\n" unless $Quiet;
- return;
- }
-
- # close off the list. note, I check to see if $Listtype[$Listlevel] is
- # defined because an =item directive may have never appeared and thus
- # $Listtype[$Listlevel] may have never been initialized.
- $Listlevel--;
- if( defined $Listtype[$Listlevel] ){
- if ( $Listtype[$Listlevel] eq 'dl' ){
- print $fh "</dd>\n" unless $ListNewTerm;
- } else {
- print $fh "</li>\n";
- }
- print $fh "</$Listtype[$Listlevel]>\n";
- pop( @Listtype );
- $ListNewTerm = 0;
- }
-
- # clean up item count
- pop( @Items_Seen );
-}
-
-#
-# process_cut - process a pod cut tag, thus start ignoring pod directives.
-#
-sub process_cut {
- $Ignore = 1;
-}
-
-#
-# process_pod - process a pod tag, thus stop ignoring pod directives
-# until we see a corresponding cut.
-#
-sub process_pod {
- # no need to set $Ignore to 0 cause the main loop did it
-}
-
-#
-# process_for - process a =for pod tag. if it's for html, spit
-# it out verbatim, if illustration, center it, otherwise ignore it.
-#
-sub process_for {
- my ($fh, $whom, $text) = @_;
- if ( $whom =~ /^(pod2)?html$/i) {
- print $fh $text;
- } elsif ($whom =~ /^illustration$/i) {
- 1 while chomp $text;
- for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
- $text .= $ext, last if -r "$text$ext";
- }
- print $fh qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
- }
-}
-
-#
-# process_begin - process a =begin pod tag. this pushes
-# whom we're beginning on the begin stack. if there's a
-# begin stack, we only print if it us.
-#
-sub process_begin {
- my ($fh, $whom, $text) = @_;
- $whom = lc($whom);
- push (@Begin_Stack, $whom);
- if ( $whom =~ /^(pod2)?html$/) {
- print $fh $text if $text;
- }
-}
-
-#
-# process_end - process a =end pod tag. pop the
-# begin stack. die if we're mismatched.
-#
-sub process_end {
- my($whom, $text) = @_;
- $whom = lc($whom);
- if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) {
- Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n")
- }
- pop( @Begin_Stack );
-}
-
-#
-# process_pre - indented paragraph, made into <pre></pre>
-#
-sub process_pre {
- my( $text ) = @_;
- my( $rest );
- return if $Ignore;
-
- $rest = $$text;
-
- # insert spaces in place of tabs
- $rest =~ s#(.+)#
- my $line = $1;
- 1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
- $line;
- #eg;
-
- # convert some special chars to HTML escapes
- $rest = html_escape($rest);
-
- # try and create links for all occurrences of perl.* within
- # the preformatted text.
- $rest =~ s{
- (\s*)(perl\w+)
- }{
- if ( defined $Pages{$2} ){ # is a link
- qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
- } elsif (defined $Pages{dosify($2)}) { # is a link
- qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
- } else {
- "$1$2";
- }
- }xeg;
- $rest =~ s{
- (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
- }{
- my $url ;
- if ( $Htmlfileurl ne '' ){
- # Here, we take advantage of the knowledge
- # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
- # Since $Htmlroot eq '', we need to prepend $Htmldir
- # on the fron of the link to get the absolute path
- # of the link's target. We check for a leading '/'
- # to avoid corrupting links that are #, file:, etc.
- my $old_url = $3 ;
- $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
- $url = relativize_url( "$old_url.html", $Htmlfileurl );
- } else {
- $url = "$3.html" ;
- }
- "$1$url" ;
- }xeg;
-
- # Look for embedded URLs and make them into links. We don't
- # relativize them since they are best left as the author intended.
-
- my $urls = '(' . join ('|', qw{
- http
- telnet
- mailto
- news
- gopher
- file
- wais
- ftp
- } )
- . ')';
-
- my $ltrs = '\w';
- my $gunk = '/#~:.?+=&%@!\-';
- my $punc = '.:!?\-;';
- my $any = "${ltrs}${gunk}${punc}";
-
- $rest =~ s{
- \b # start at word boundary
- ( # begin $1 {
- $urls : # need resource and a colon
- (?!:) # Ignore File::, among others.
- [$any] +? # followed by one or more of any valid
- # character, but be conservative and
- # take only what you need to....
- ) # end $1 }
- (?=
- &quot; &gt; # maybe pre-quoted '<a href="...">'
- | # or:
- [$punc]* # 0 or more punctuation
- (?: # followed
- [^$any] # by a non-url char
- | # or
- $ # end of the string
- ) #
- | # or else
- $ # then end of the string
- )
- }{<a href="$1">$1</a>}igox;
-
- # text should be as it is (verbatim)
- $$text = $rest;
-}
-
-
-#
-# pure text processing
-#
-# pure_text/inIS_text: differ with respect to automatic C<> recognition.
-# we don't want this to happen within IS
-#
-sub pure_text($){
- my $text = shift();
- process_puretext( $text, 1 );
-}
-
-sub inIS_text($){
- my $text = shift();
- process_puretext( $text, 0 );
-}
-
-#
-# process_puretext - process pure text (without pod-escapes) converting
-# double-quotes and handling implicit C<> links.
-#
-sub process_puretext {
- my($text, $notinIS) = @_;
-
- ## Guessing at func() or [\$\@%&]*var references in plain text is destined
- ## to produce some strange looking ref's. uncomment to disable:
- ## $notinIS = 0;
-
- my(@words, $lead, $trail);
-
- # keep track of leading and trailing white-space
- $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
- $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
-
- # split at space/non-space boundaries
- @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
-
- # process each word individually
- foreach my $word (@words) {
- # skip space runs
- next if $word =~ /^\s*$/;
- # see if we can infer a link or a function call
- #
- # NOTE: This is a word based search, it won't automatically
- # mark "substr($var, 1, 2)" because the 1st word would be "substr($var"
- # User has to enclose those with proper C<>
-
- if( $notinIS && $word =~
- m/
- ^([a-z_]{2,}) # The function name
- \(
- ([0-9][a-z]* # Manual page(1) or page(1M)
- |[^)]*[\$\@\%][^)]+ # ($foo), (1, @foo), (%hash)
- | # ()
- )
- \)
- ([.,;]?)$ # a possible punctuation follows
- /xi
- ) {
- # has parenthesis so should have been a C<> ref
- ## try for a pagename (perlXXX(1))?
- my( $func, $args, $rest ) = ( $1, $2, $3 || '' );
- if( $args =~ /^\d+$/ ){
- my $url = page_sect( $word, '' );
- if( defined $url ){
- $word = qq(<a href="$url" class="man">the $word manpage</a>$rest);
- next;
- }
- }
- ## try function name for a link, append tt'ed argument list
- $word = emit_C( $func, '', "($args)") . $rest;
-
-#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
-## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
-## # perl variables, should be a C<> ref
-## $word = emit_C( $word );
-
- } elsif ($word =~ m,^\w+://\w,) {
- # looks like a URL
- # Don't relativize it: leave it as the author intended
- $word = qq(<a href="$word">$word</a>);
- } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
- # looks like an e-mail address
- my ($w1, $w2, $w3) = ("", $word, "");
- ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
- ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
- $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
- } else {
- $word = html_escape($word) if $word =~ /["&<>]/;
- }
- }
-
- # put everything back together
- return $lead . join( '', @words ) . $trail;
-}
-
-
-#
-# process_text - handles plaintext that appears in the input pod file.
-# there may be pod commands embedded within the text so those must be
-# converted to html commands.
-#
-
-sub process_text1($$;$$);
-sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' }
-sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 }
-
-sub process_text {
- return if $Ignore;
- my( $tref ) = @_;
- my $res = process_text1( 0, $tref );
- $res =~ s/\s+$//s;
- $$tref = $res;
-}
-
-sub process_text_rfc_links {
- my $text = shift;
-
- # For every "RFCnnnn" or "RFC nnn", link it to the authoritative
- # ource. Do not use the /i modifier here. Require "RFC" to be written in
- # in capital letters.
-
- $text =~ s{
- (?<=[^<>[:alpha:]]) # Make sure this is not an URL already
- (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits
- }
- {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx;
-
- $text;
-}
-
-sub process_text1($$;$$){
- my( $lev, $rstr, $func, $closing ) = @_;
- my $res = '';
-
- unless (defined $func) {
- $func = '';
- $lev++;
- }
-
- if( $func eq 'B' ){
- # B<text> - boldface
- $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
-
- } elsif( $func eq 'C' ){
- # C<code> - can be a ref or <code></code>
- # need to extract text
- my $par = go_ahead( $rstr, 'C', $closing );
-
- ## clean-up of the link target
- my $text = depod( $par );
-
- ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
- ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
-
- $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
-
- } elsif( $func eq 'E' ){
- # E<x> - convert to character
- $$rstr =~ s/^([^>]*)>//;
- my $escape = $1;
- $escape =~ s/^0?x([\dA-F]+)$/#x$1/i
- or $escape =~ s/^0([0-7]+)$/'#'.oct($1)/ei
- or $escape =~ s/^(\d+)$/#$1/;
- $res = "&$escape;";
-
- } elsif( $func eq 'F' ){
- # F<filename> - italicize
- $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>';
-
- } elsif( $func eq 'I' ){
- # I<text> - italicize
- $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
-
- } elsif( $func eq 'L' ){
- # L<link> - link
- ## L<text|cross-ref> => produce text, use cross-ref for linking
- ## L<cross-ref> => make text from cross-ref
- ## need to extract text
- my $par = go_ahead( $rstr, 'L', $closing );
-
- # some L<>'s that shouldn't be:
- # a) full-blown URL's are emitted as-is
- if( $par =~ m{^\w+://}s ){
- return make_URL_href( $par );
- }
- # b) C<...> is stripped and treated as C<>
- if( $par =~ /^C<(.*)>$/ ){
- my $text = depod( $1 );
- return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
- }
-
- # analyze the contents
- $par =~ s/\n/ /g; # undo word-wrapped tags
- my $opar = $par;
- my $linktext;
- if( $par =~ s{^([^|]+)\|}{} ){
- $linktext = $1;
- }
-
- # make sure sections start with a /
- $par =~ s{^"}{/"};
-
- my( $page, $section, $ident );
-
- # check for link patterns
- if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
- # we've got a name/ident (no quotes)
- if (length $2) {
- ( $page, $ident ) = ( $1, $2 );
- } else {
- ( $page, $section ) = ( $1, $2 );
- }
- ### print STDERR "--> L<$par> to page $page, ident $ident\n";
-
- } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
- # even though this should be a "section", we go for ident first
- ( $page, $ident ) = ( $1, $2 );
- ### print STDERR "--> L<$par> to page $page, section $section\n";
-
- } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
- ( $page, $section ) = ( '', $par );
- ### print STDERR "--> L<$par> to void page, section $section\n";
-
- } else {
- ( $page, $section ) = ( $par, '' );
- ### print STDERR "--> L<$par> to page $par, void section\n";
- }
-
- # now, either $section or $ident is defined. the convoluted logic
- # below tries to resolve L<> according to what the user specified.
- # failing this, we try to find the next best thing...
- my( $url, $ltext, $fid );
-
- RESOLVE: {
- if( defined $ident ){
- ## try to resolve $ident as an item
- ( $url, $fid ) = coderef( $page, $ident );
- if( $url ){
- if( ! defined( $linktext ) ){
- $linktext = $ident;
- $linktext .= " in " if $ident && $page;
- $linktext .= "the $page manpage" if $page;
- }
- ### print STDERR "got coderef url=$url\n";
- last RESOLVE;
- }
- ## no luck: go for a section (auto-quoting!)
- $section = $ident;
- }
- ## now go for a section
- my $htmlsection = htmlify( $section );
- $url = page_sect( $page, $htmlsection );
- if( $url ){
- if( ! defined( $linktext ) ){
- $linktext = $section;
- $linktext .= " in " if $section && $page;
- $linktext .= "the $page manpage" if $page;
- }
- ### print STDERR "got page/section url=$url\n";
- last RESOLVE;
- }
- ## no luck: go for an ident
- if( $section ){
- $ident = $section;
- } else {
- $ident = $page;
- $page = undef();
- }
- ( $url, $fid ) = coderef( $page, $ident );
- if( $url ){
- if( ! defined( $linktext ) ){
- $linktext = $ident;
- $linktext .= " in " if $ident && $page;
- $linktext .= "the $page manpage" if $page;
- }
- ### print STDERR "got section=>coderef url=$url\n";
- last RESOLVE;
- }
-
- # warning; show some text.
- $linktext = $opar unless defined $linktext;
- warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
- }
-
- # now we have a URL or just plain code
- $$rstr = $linktext . '>' . $$rstr;
- if( defined( $url ) ){
- $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
- } else {
- $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
- }
-
- } elsif( $func eq 'S' ){
- # S<text> - non-breaking spaces
- $res = process_text1( $lev, $rstr );
- $res =~ s/ /&nbsp;/g;
-
- } elsif( $func eq 'X' ){
- # X<> - ignore
- warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
- unless $$rstr =~ s/^[^>]*>// or $Quiet;
- } elsif( $func eq 'Z' ){
- # Z<> - empty
- warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n"
- unless $$rstr =~ s/^>// or $Quiet;
-
- } else {
- my $term = pattern $closing;
- while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
- # all others: either recurse into new function or
- # terminate at closing angle bracket(s)
- my $pt = $1;
- $pt .= $2 if !$3 && $lev == 1;
- $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
- return $res if !$3 && $lev > 1;
- if( $3 ){
- $res .= process_text1( $lev, $rstr, $3, closing $4 );
- }
- }
- if( $lev == 1 ){
- $res .= pure_text( $$rstr );
- } elsif( ! $Quiet ) {
- my $snippet = substr($$rstr,0,60);
- warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n"
-
- }
- $res = process_text_rfc_links($res);
- }
- return $res;
-}
-
-#
-# go_ahead: extract text of an IS (can be nested)
-#
-sub go_ahead($$$){
- my( $rstr, $func, $closing ) = @_;
- my $res = '';
- my @closing = ($closing);
- while( $$rstr =~
- s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){
- $res .= $1;
- unless( $3 ){
- shift @closing;
- return $res unless @closing;
- } else {
- unshift @closing, closing $4;
- }
- $res .= $2;
- }
- unless ($Quiet) {
- my $snippet = substr($$rstr,0,60);
- warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n"
- }
- return $res;
-}
-
-#
-# emit_C - output result of C<text>
-# $text is the depod-ed text
-#
-sub emit_C($;$$){
- my( $text, $nocode, $args ) = @_;
- $args = '' unless defined $args;
- my $res;
- my( $url, $fid ) = coderef( undef(), $text );
-
- # need HTML-safe text
- my $linktext = html_escape( "$text$args" );
-
- if( defined( $url ) &&
- (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
- $res = "<a href=\"$url\"><code>$linktext</code></a>";
- } elsif( 0 && $nocode ){
- $res = $linktext;
- } else {
- $res = "<code>$linktext</code>";
- }
- return $res;
+ $Backlink = $opt_backlink if defined $opt_backlink;
+ $Css = $opt_css if defined $opt_css;
+ $Header = $opt_header if defined $opt_header;
+ $Htmldir = $opt_htmldir if defined $opt_htmldir;
+ $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
+ $Doindex = $opt_index if defined $opt_index;
+ $Podfile = $opt_infile if defined $opt_infile;
+ $Htmlfile = $opt_outfile if defined $opt_outfile;
+ $Poderrors = $opt_poderrors if defined $opt_poderrors;
+ $Podroot = $opt_podroot if defined $opt_podroot;
+ $Quiet = $opt_quiet if defined $opt_quiet;
+ $Recurse = $opt_recurse if defined $opt_recurse;
+ $Title = $opt_title if defined $opt_title;
+ $Verbose = $opt_verbose if defined $opt_verbose;
}
#
@@ -1820,240 +483,6 @@ sub html_escape {
return $rest;
}
-
-#
-# dosify - convert filenames to 8.3
-#
-sub dosify {
- my($str) = @_;
- return lc($str) if $^O eq 'VMS'; # VMS just needs casing
- if ($Is83) {
- $str = lc $str;
- $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
- $str =~ s/(\w+)/substr ($1,0,8)/ge;
- }
- return $str;
-}
-
-#
-# page_sect - make a URL from the text of a L<>
-#
-sub page_sect($$) {
- my( $page, $section ) = @_;
- my( $linktext, $page83, $link); # work strings
-
- # check if we know that this is a section in this page
- if (!defined $Pages{$page} && defined $Sections{$page}) {
- $section = $page;
- $page = "";
- ### print STDERR "reset page='', section=$section\n";
- }
-
- $page83=dosify($page);
- $page=$page83 if (defined $Pages{$page83});
- if ($page eq "") {
- $link = "#" . anchorify( $section );
- } elsif ( $page =~ /::/ ) {
- $page =~ s,::,/,g;
- # Search page cache for an entry keyed under the html page name,
- # then look to see what directory that page might be in. NOTE:
- # this will only find one page. A better solution might be to produce
- # an intermediate page that is an index to all such pages.
- my $page_name = $page ;
- $page_name =~ s,^.*/,,s ;
- if ( defined( $Pages{ $page_name } ) &&
- $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
- ) {
- $page = $1 ;
- }
- else {
- # NOTE: This branch assumes that all A::B pages are located in
- # $Htmlroot/A/B.html . This is often incorrect, since they are
- # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
- # analyze the contents of %Pages and figure out where any
- # cousins of A::B are, then assume that. So, if A::B isn't found,
- # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
- # lib/A/B.pm. This is also limited, but it's an improvement.
- # Maybe a hints file so that the links point to the correct places
- # nonetheless?
-
- }
- $link = "$Htmlroot/$page.html";
- $link .= "#" . anchorify( $section ) if ($section);
- } elsif (!defined $Pages{$page}) {
- $link = "";
- } else {
- $section = anchorify( $section ) if $section ne "";
- ### print STDERR "...section=$section\n";
-
- # if there is a directory by the name of the page, then assume that an
- # appropriate section will exist in the subdirectory
-# if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
- if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
- $link = "$Htmlroot/$1/$section.html";
- ### print STDERR "...link=$link\n";
-
- # since there is no directory by the name of the page, the section will
- # have to exist within a .html of the same name. thus, make sure there
- # is a .pod or .pm that might become that .html
- } else {
- $section = "#$section" if $section;
- ### print STDERR "...section=$section\n";
-
- # check if there is a .pod with the page name.
- # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
- if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
- $link = "$Htmlroot/$1.html$section";
- } else {
- $link = "";
- }
- }
- }
-
- if ($link) {
- # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
- # implies $Htmlroot eq ''. This means that the link in question
- # needs a prefix of $Htmldir if it begins with '/'. The test for
- # the initial '/' is done to avoid '#'-only links, and to allow
- # for other kinds of links, like file:, ftp:, etc.
- my $url ;
- if ( $Htmlfileurl ne '' ) {
- $link = "$Htmldir$link" if $link =~ m{^/}s;
- $url = relativize_url( $link, $Htmlfileurl );
-# print( " b: [$link,$Htmlfileurl,$url]\n" );
- }
- else {
- $url = $link ;
- }
- return $url;
-
- } else {
- return undef();
- }
-}
-
-#
-# relativize_url - convert an absolute URL to one relative to a base URL.
-# Assumes both end in a filename.
-#
-sub relativize_url {
- my ($dest,$source) = @_ ;
-
- my ($dest_volume,$dest_directory,$dest_file) =
- File::Spec::Unix->splitpath( $dest ) ;
- $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
-
- my ($source_volume,$source_directory,$source_file) =
- File::Spec::Unix->splitpath( $source ) ;
- $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
-
- my $rel_path = '' ;
- if ( $dest ne '' ) {
- $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
- }
-
- if ( $rel_path ne '' &&
- substr( $rel_path, -1 ) ne '/' &&
- substr( $dest_file, 0, 1 ) ne '#'
- ) {
- $rel_path .= "/$dest_file" ;
- }
- else {
- $rel_path .= "$dest_file" ;
- }
-
- return $rel_path ;
-}
-
-
-#
-# coderef - make URL from the text of a C<>
-#
-sub coderef($$){
- my( $page, $item ) = @_;
- my( $url );
-
- my $fid = fragment_id( $item );
-
- if( defined( $page ) && $page ne "" ){
- # we have been given a $page...
- $page =~ s{::}{/}g;
-
- Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile")
- if !defined $fid;
- # Do we take it? Item could be a section!
- my $base = $Items{$fid} || "";
- $base =~ s{[^/]*/}{};
- if( $base ne "$page.html" ){
- ### print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
- $page = undef();
- }
-
- } else {
- # no page - local items precede cached items
- if( defined( $fid ) ){
- if( exists $Local_Items{$fid} ){
- $page = $Local_Items{$fid};
- } else {
- $page = $Items{$fid};
- }
- }
- }
-
- # if there was a pod file that we found earlier with an appropriate
- # =item directive, then create a link to that page.
- if( defined $page ){
- if( $page ){
- if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
- $page = $1 . '.html';
- }
- my $link = "$Htmlroot/$page#" . anchorify($fid);
-
- # Here, we take advantage of the knowledge that $Htmlfileurl
- # ne '' implies $Htmlroot eq ''.
- if ( $Htmlfileurl ne '' ) {
- $link = "$Htmldir$link" ;
- $url = relativize_url( $link, $Htmlfileurl ) ;
- } else {
- $url = $link ;
- }
- } else {
- $url = "#" . anchorify($fid);
- }
-
- confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
- }
- return( $url, $fid );
-}
-
-
-
-#
-# Adapted from Nick Ing-Simmons' PodToHtml package.
-sub relative_url {
- my $source_file = shift ;
- my $destination_file = shift;
-
- my $source = URI::file->new_abs($source_file);
- my $uo = URI::file->new($destination_file,$source)->abs;
- return $uo->rel->as_string;
-}
-
-
-#
-# finish_list - finish off any pending HTML lists. this should be called
-# after the entire pod file has been read and converted.
-#
-sub finish_list {
- my $fh = shift;
- if( $Listlevel ){
- warn "$0: $Podfile: unterminated list(s) at =head in paragraph $Paragraph. ignoring.\n" unless $Quiet;
- while( $Listlevel ){
- process_back( $fh );
- }
- }
-}
-
#
# htmlify - converts a pod section specification to a suitable section
# specification for HTML. Note that we keep spaces and special characters
@@ -2082,166 +511,153 @@ sub anchorify {
}
#
-# depod - convert text by eliminating all interior sequences
-# Note: can be called with copy or modify semantics
+# store POD files in %Pages
#
-my %E2c;
-$E2c{lt} = '<';
-$E2c{gt} = '>';
-$E2c{sol} = '/';
-$E2c{verbar} = '|';
-$E2c{amp} = '&'; # in Tk's pods
-
-sub depod1($;$$);
-
-sub depod($){
- my $string;
- if( ref( $_[0] ) ){
- $string = ${$_[0]};
- ${$_[0]} = depod1( \$string );
- } else {
- $string = $_[0];
- depod1( \$string );
- }
-}
+sub _save_page {
+ my ($modspec, $modname) = @_;
-sub depod1($;$$){
- my( $rstr, $func, $closing ) = @_;
- my $res = '';
- return $res unless defined $$rstr;
- if( ! defined( $func ) ){
- # skip to next begin of an interior sequence
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
- # recurse into its text
- $res .= $1 . depod1( $rstr, $2, closing $3);
- }
- $res .= $$rstr;
- } elsif( $func eq 'E' ){
- # E<x> - convert to character
- $$rstr =~ s/^([^>]*)>//;
- $res .= $E2c{$1} || "";
- } elsif( $func eq 'X' ){
- # X<> - ignore
- $$rstr =~ s/^[^>]*>//;
- } elsif( $func eq 'Z' ){
- # Z<> - empty
- $$rstr =~ s/^>//;
- } else {
- # all others: either recurse into new function or
- # terminate at closing angle bracket
- my $term = pattern $closing;
- while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
- $res .= $1;
- last unless $3;
- $res .= depod1( $rstr, $3, closing $4 );
- }
- ## If we're here and $2 ne '>': undelimited interior sequence.
- ## Ignored, as this is called without proper indication of where we are.
- ## Rely on process_text to produce diagnostics.
- }
- return $res;
-}
+ # Remove Podroot from path
+ foreach my $podpath (@Podpath) {
+ my $beg_path = File::Spec->catdir($Podroot, $podpath);
+ if ($beg_path eq substr($modspec, 0, length($beg_path))) {
+ # Replace $Podroot/$podpath with $podpath
+ substr($modspec, 0, length($beg_path), $podpath);
+ last;
+ }
+ }
-{
- my %seen; # static fragment record hash
+ # Convert path to unix style path
+ $modspec = Unixify::unixify($modspec);
-sub fragment_id_readable {
- my $text = shift;
- my $generate = shift; # optional flag
+ my ($file, $dir) = fileparse($modspec, qr/\.[^.]*/); # strip .ext
+ $Pages{$modname} = $dir.$file;
+}
- my $orig = $text;
+1;
- # leave the words for the fragment identifier,
- # change everything else to underbars.
- $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency.
- $text =~ s/_{2,}/_/g;
- $text =~ s/\A_//;
- $text =~ s/_\Z//;
+package Pod::Simple::XHTML::LocalPodLinks;
+use strict;
+use warnings;
+use base 'Pod::Simple::XHTML';
- unless ($text)
- {
- # Nothing left after removing punctuation, so leave it as is
- # E.g. if option is named: "=item -#"
+use File::Spec;
+use File::Spec::Unix;
- $text = $orig;
- }
+__PACKAGE__->_accessorize(
+ 'htmldir',
+ 'htmlfileurl',
+ 'htmlroot',
+ 'pages', # Page name => relative/path/to/page from root POD dir
+ 'quiet',
+ 'verbose',
+);
+
+sub resolve_pod_page_link {
+ my ($self, $to, $section) = @_;
+
+ return undef unless defined $to || defined $section;
+ if (defined $section) {
+ $section = '#' . $self->idify($section, 1);
+ return $section unless defined $to;
+ } else {
+ $section = '';
+ }
+
+ my $path; # path to $to according to %Pages
+ unless (exists $self->pages->{$to}) {
+ # Try to find a POD that ends with $to and use that.
+ # e.g., given L<XHTML>, if there is no $Podpath/XHTML in %Pages,
+ # look for $Podpath/*/XHTML in %Pages, with * being any path,
+ # as a substitute (e.g., $Podpath/Pod/Simple/XHTML)
+ my @matches;
+ foreach my $modname (keys %{$self->pages}) {
+ push @matches, $modname if $modname =~ /::$to\z/;
+ }
- if ($generate) {
- if ( exists $seen{$text} ) {
- # This already exists, make it unique
- $seen{$text}++;
- $text = $text . $seen{$text};
+ if ($#matches == -1) {
+ warn "Cannot find \"$to\" in podpath: " .
+ "cannot find suitable replacement path, cannot resolve link\n"
+ unless $self->quiet;
+ return '';
+ } elsif ($#matches == 0) {
+ warn "Cannot find \"$to\" in podpath: " .
+ "using $matches[0] as replacement path to $to\n"
+ unless $self->quiet;
+ $path = $self->pages->{$matches[0]};
} else {
- $seen{$text} = 1; # first time seen this fragment
+ warn "Cannot find \"$to\" in podpath: " .
+ "more than one possible replacement path to $to, " .
+ "using $matches[-1]\n" unless $self->quiet;
+ # Use [-1] so newer (higher numbered) perl PODs are used
+ $path = $self->pages->{$matches[-1]};
}
+ } else {
+ $path = $self->pages->{$to};
}
- $text;
-}}
-
-my @HC;
-sub fragment_id_obfuscated { # This was the old "_2d_2d__"
- my $text = shift;
- my $generate = shift; # optional flag
-
- # text? Normalize by obfuscating the fragment id to make it unique
- $text =~ s/\s+/_/sg;
-
- $text =~ s{(\W)}{
- defined( $HC[ord($1)] ) ? $HC[ord($1)]
- : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
- $text = substr( $text, 0, 50 );
+ # The use of catdir here (instead of catfile) ensures there will be one
+ # '/' between htmlroot and $path; not zero (if htmlroot == ''), not two
+ # (if htmlroot =~ m#/\z# and $path =~ m#\a/#), just one.
+ my $url = File::Spec::Unix->catdir( Unixify::unixify($self->htmlroot),
+ $path);
+ if ($self->htmlfileurl ne '') {
+ # then $self->htmlroot eq '' (by definition of htmlfileurl) so
+ # $self->htmldir needs to be prepended to link to get the absolute path
+ # that will be relativized
+ $url = relativize_url(
+ File::Spec::Unix->catdir( Unixify::unixify($self->htmldir), $url),
+ $self->htmlfileurl # already unixified
+ );
+ }
- $text;
+ return $url . ".html$section";
}
#
-# fragment_id - construct a fragment identifier from:
-# a) =item text
-# b) contents of C<...>
+# relativize_url - convert an absolute URL to one relative to a base URL.
+# Assumes both end in a filename.
#
+sub relativize_url {
+ my ($dest, $source) = @_;
-sub fragment_id {
- my $text = shift;
- my $generate = shift; # optional flag
-
- $text =~ s/\s+\Z//s;
- if( $text ){
- # a method or function?
- return $1 if $text =~ /(\w+)\s*\(/;
- return $1 if $text =~ /->\s*(\w+)\s*\(?/;
-
- # a variable name?
- return $1 if $text =~ /^([\$\@%*]\S+)/;
-
- # some pattern matching operator?
- return $1 if $text =~ m|^(\w+/).*/\w*$|;
+ # Remove each file from its path
+ my ($dest_volume, $dest_directory, $dest_file) =
+ File::Spec::Unix->splitpath( $dest );
+ $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' );
- # fancy stuff... like "do { }"
- return $1 if $text =~ m|^(\w+)\s*{.*}$|;
+ my ($source_volume, $source_directory, $source_file) =
+ File::Spec::Unix->splitpath( $source );
+ $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' );
- # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
- # and some funnies with ... Module ...
- return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
- return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
+ my $rel_path = '';
+ if ($dest ne '') {
+ $rel_path = File::Spec::Unix->abs2rel( $dest, $source );
+ }
- return fragment_id_readable($text, $generate);
+ if ($rel_path ne '' && substr( $rel_path, -1 ) ne '/') {
+ $rel_path .= "/$dest_file";
} else {
- return;
+ $rel_path .= "$dest_file";
}
+
+ return $rel_path;
}
-#
-# make_URL_href - generate HTML href from URL
-# Special treatment for CGI queries.
-#
-sub make_URL_href($){
- my( $url ) = @_;
- if( $url !~
- s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
- $url = "<a href=\"$url\">$url</a>";
- }
- return $url;
+1;
+
+package Unixify;
+use warnings;
+use strict;
+
+use File::Spec;
+use File::Spec::Unix;
+
+sub unixify {
+ my $full_path = shift;
+ return '' unless $full_path;
+
+ return File::Spec::Unix->catfile( # change \s to /s and such
+ File::Spec->splitdir($full_path));
}
1;
diff --git a/ext/Pod-Html/t/crossref.pod b/ext/Pod-Html/t/crossref.pod
new file mode 100644
index 0000000000..f6dddf1f58
--- /dev/null
+++ b/ext/Pod-Html/t/crossref.pod
@@ -0,0 +1,41 @@
+=head1 NAME
+
+htmlcrossref - Test HTML cross reference links
+
+=head1 LINKS
+
+L</"section1">
+
+L<htmllink/section 2>
+
+L</"item1">
+
+L</"non existant section">
+
+L<var-copy>
+
+L<var-copy/$">
+
+C<var-copy>
+
+C<var-copy/$">
+
+L<podspec-copy/First:>
+
+C<podspec-copy/First:>
+
+L<notperldoc>
+
+=head1 TARGETS
+
+=head2 section1
+
+This is section one.
+
+=over 4
+
+=item item1 X<item> X<one>
+
+This is item one.
+
+=back
diff --git a/ext/Pod-Html/t/crossref.t b/ext/Pod-Html/t/crossref.t
new file mode 100644
index 0000000000..ec178e0024
--- /dev/null
+++ b/ext/Pod-Html/t/crossref.t
@@ -0,0 +1,104 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+END {
+ rem_test_dir();
+}
+
+use strict;
+use Cwd;
+use File::Spec;
+use File::Spec::Functions;
+use Test::More tests => 1;
+
+SKIP: {
+ my $output = make_test_dir();
+ skip "$output", 1 if $output;
+
+ my ($v, $d) = splitpath(cwd(), 1);
+ my $relcwd = substr($d, length(File::Spec->rootdir()));
+
+ convert_n_test("crossref", "cross references",
+ "--podpath=". catdir($relcwd, 't') . ":" . catdir($relcwd, 'testdir/test.lib'),
+ "--podroot=$v". File::Spec->rootdir,
+ "--quiet",
+ );
+}
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+ <li><a href="#TARGETS">TARGETS</a>
+ <ul>
+ <li><a href="#section1">section1</a></li>
+ </ul>
+ </li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>htmlcrossref - Test HTML cross reference links</p>
+
+<h1 id="LINKS">LINKS</h1>
+
+<p><a href="#section1">&quot;section1&quot;</a></p>
+
+<p><a href="/[RELCURRENTWORKINGDIRECTORY]/t/htmllink.html#section-2">&quot;section 2&quot; in htmllink</a></p>
+
+<p><a href="#item1">&quot;item1&quot;</a></p>
+
+<p><a href="#non-existant-section">&quot;non existant section&quot;</a></p>
+
+<p><a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html">var-copy</a></p>
+
+<p><a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html#pod-">&quot;$&quot;&quot; in var-copy</a></p>
+
+<p><code>var-copy</code></p>
+
+<p><code>var-copy/$&quot;</code></p>
+
+<p><a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/podspec-copy.html#First:">&quot;First:&quot; in podspec-copy</a></p>
+
+<p><code>podspec-copy/First:</code></p>
+
+<p><a>notperldoc</a></p>
+
+<h1 id="TARGETS">TARGETS</h1>
+
+<h2 id="section1">section1</h2>
+
+<p>This is section one.</p>
+
+<dl>
+
+<dt id="item1">item1 </dt>
+<dd>
+
+<p>This is item one.</p>
+
+</dd>
+</dl>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/feature.pod b/ext/Pod-Html/t/feature.pod
new file mode 100644
index 0000000000..4de4fb10cb
--- /dev/null
+++ b/ext/Pod-Html/t/feature.pod
@@ -0,0 +1,21 @@
+=head1 Head 1
+
+A paragraph
+
+=for html some html
+
+=begin image
+
+|--|
+| |
+|--|
+
+=end image
+
+Another paragraph
+
+=head1 Another Head 1
+
+some text and a link L<crossref>
+
+=cut
diff --git a/ext/Pod-Html/t/feature.t b/ext/Pod-Html/t/feature.t
new file mode 100644
index 0000000000..5f27454327
--- /dev/null
+++ b/ext/Pod-Html/t/feature.t
@@ -0,0 +1,70 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+use strict;
+use Cwd;
+use File::Spec::Functions;
+use Test::More tests => 1;
+
+my $cwd = cwd();
+
+convert_n_test("feature", "misc pod-html features",
+ "--backlink",
+ "--css=style.css",
+ "--header", # no styling b/c of --ccs
+ "--htmldir=". catdir($cwd, 't'),
+ "--noindex",
+ "--podpath=t",
+ "--podroot=$cwd",
+ "--title=a title",
+
+ );
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>a title</title>
+<link rel="stylesheet" href="style.css" type="text/css" />
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body id="_podtop_">
+<table border="0" width="100%" cellspacing="0" cellpadding="3">
+<tr><td class="_podblock_" valign="middle">
+<big><strong><span class="_podblock_">&nbsp;a title</span></strong></big>
+</td></tr>
+</table>
+
+
+
+<a href="#_podtop_"><h1 id="Head-1">Head 1</h1></a>
+
+<p>A paragraph</p>
+
+
+
+some html
+
+<p>Another paragraph</p>
+
+<a href="#_podtop_"><h1 id="Another-Head-1">Another Head 1</h1></a>
+
+<p>some text and a link <a href="t/crossref.html">crossref</a></p>
+
+<table border="0" width="100%" cellspacing="0" cellpadding="3">
+<tr><td class="_podblock_" valign="middle">
+<big><strong><span class="_podblock_">&nbsp;a title</span></strong></big>
+</td></tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/feature2.pod b/ext/Pod-Html/t/feature2.pod
new file mode 100644
index 0000000000..4de4fb10cb
--- /dev/null
+++ b/ext/Pod-Html/t/feature2.pod
@@ -0,0 +1,21 @@
+=head1 Head 1
+
+A paragraph
+
+=for html some html
+
+=begin image
+
+|--|
+| |
+|--|
+
+=end image
+
+Another paragraph
+
+=head1 Another Head 1
+
+some text and a link L<crossref>
+
+=cut
diff --git a/ext/Pod-Html/t/feature2.t b/ext/Pod-Html/t/feature2.t
new file mode 100644
index 0000000000..feeb84f910
--- /dev/null
+++ b/ext/Pod-Html/t/feature2.t
@@ -0,0 +1,71 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+use strict;
+use Cwd;
+use Test::More tests => 1;
+
+my $cwd = cwd();
+
+convert_n_test("feature2", "misc pod-html features 2",
+ "--backlink",
+ "--header",
+ "--podpath=.",
+ "--podroot=$cwd",
+ "--norecurse",
+ "--verbose",
+ );
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body id="_podtop_" style="background-color: white">
+<table border="0" width="100%" cellspacing="0" cellpadding="3">
+<tr><td class="_podblock_" style="background-color: #cccccc" valign="middle">
+<big><strong><span class="_podblock_">&nbsp;</span></strong></big>
+</td></tr>
+</table>
+
+
+
+<ul id="index">
+ <li><a href="#Head-1">Head 1</a></li>
+ <li><a href="#Another-Head-1">Another Head 1</a></li>
+</ul>
+
+<a href="#_podtop_"><h1 id="Head-1">Head 1</h1></a>
+
+<p>A paragraph</p>
+
+
+
+some html
+
+<p>Another paragraph</p>
+
+<a href="#_podtop_"><h1 id="Another-Head-1">Another Head 1</h1></a>
+
+<p>some text and a link <a>crossref</a></p>
+
+<table border="0" width="100%" cellspacing="0" cellpadding="3">
+<tr><td class="_podblock_" style="background-color: #cccccc" valign="middle">
+<big><strong><span class="_podblock_">&nbsp;</span></strong></big>
+</td></tr>
+</table>
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/htmldir1.pod b/ext/Pod-Html/t/htmldir1.pod
new file mode 100644
index 0000000000..e505caaa18
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir1.pod
@@ -0,0 +1,15 @@
+=head1 NAME
+
+htmldir - Test --htmldir feature
+
+=head1 LINKS
+
+Normal text, a L<link> to nowhere,
+
+a link to L<var-copy>,
+
+L<htmlescp>,
+
+L<feature/Another Head 1>,
+
+and another L<feature/"Another Head 1">.
diff --git a/ext/Pod-Html/t/htmldir1.t b/ext/Pod-Html/t/htmldir1.t
new file mode 100644
index 0000000000..a0e747ffe5
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir1.t
@@ -0,0 +1,86 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+END {
+ rem_test_dir();
+}
+
+use strict;
+use Cwd;
+use File::Spec;
+use File::Spec::Functions;
+use Test::More tests => 2;
+
+# XXX Separate tests that rely on test.lib from the others so they are the only
+# ones skipped (instead of all of them). This applies to htmldir{1,3,5}.t, and
+# crossref.t (as of 10/29/11).
+SKIP: {
+ my $output = make_test_dir();
+ skip "$output", 2 if $output;
+
+ my ($v, $d) = splitpath(cwd(), 1);
+ my $relcwd = substr($d, length(File::Spec->rootdir()));
+
+ my $data_pos = tell DATA; # to read <DATA> twice
+
+
+ convert_n_test("htmldir1", "test --htmldir and --htmlroot 1a",
+ "--podpath=". catdir($relcwd, 't') . ":" . catfile($relcwd, 'testdir/test.lib'),
+ "--podroot=$v". File::Spec->rootdir,
+ "--htmldir=t",
+ );
+
+ seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same)
+
+ convert_n_test("htmldir1", "test --htmldir and --htmlroot 1b",
+ "--podpath=$relcwd",
+ "--podroot=$v". File::Spec->rootdir,
+ "--htmldir=". catfile $relcwd, 't',
+ "--htmlroot=/",
+ );
+}
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>htmldir - Test --htmldir feature</p>
+
+<h1 id="LINKS">LINKS</h1>
+
+<p>Normal text, a <a>link</a> to nowhere,</p>
+
+<p>a link to <a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html">var-copy</a>,</p>
+
+<p><a href="/[RELCURRENTWORKINGDIRECTORY]/t/htmlescp.html">htmlescp</a>,</p>
+
+<p><a href="/[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>,</p>
+
+<p>and another <a href="/[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>.</p>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/htmldir2.pod b/ext/Pod-Html/t/htmldir2.pod
new file mode 100644
index 0000000000..1c9e97b3e1
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir2.pod
@@ -0,0 +1,15 @@
+=head1 NAME
+
+htmldir - Test --htmldir feature
+
+=head1 LINKS
+
+Normal text, a L<link> to nowhere,
+
+a link to L<perlvar-copy>,
+
+L<htmlescp>,
+
+L<feature/Another Head 1>,
+
+and another L<feature/"Another Head 1">.
diff --git a/ext/Pod-Html/t/htmldir2.t b/ext/Pod-Html/t/htmldir2.t
new file mode 100644
index 0000000000..b5f5b48105
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir2.t
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+use strict;
+use Cwd;
+use Test::More tests => 3;
+
+my $cwd = cwd();
+my $data_pos = tell DATA; # to read <DATA> twice
+
+convert_n_test("htmldir2", "test --htmldir and --htmlroot 2a",
+ "--podpath=t",
+ "--htmldir=t",
+);
+
+seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same)
+
+convert_n_test("htmldir2", "test --htmldir and --htmlroot 2b",
+ "--podpath=t",
+);
+
+seek DATA, $data_pos, 0; # to read <DATA> thrice (expected output is the same)
+
+# this test makes sure paths are absolute unless --htmldir is specified
+convert_n_test("htmldir2", "test --htmldir and --htmlroot 2c",
+ "--podpath=t",
+ "--podroot=$cwd",
+ "--norecurse", # testing --norecurse, too
+);
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>htmldir - Test --htmldir feature</p>
+
+<h1 id="LINKS">LINKS</h1>
+
+<p>Normal text, a <a>link</a> to nowhere,</p>
+
+<p>a link to <a>perlvar-copy</a>,</p>
+
+<p><a href="/t/htmlescp.html">htmlescp</a>,</p>
+
+<p><a href="/t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>,</p>
+
+<p>and another <a href="/t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>.</p>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/htmldir3.pod b/ext/Pod-Html/t/htmldir3.pod
new file mode 100644
index 0000000000..e505caaa18
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir3.pod
@@ -0,0 +1,15 @@
+=head1 NAME
+
+htmldir - Test --htmldir feature
+
+=head1 LINKS
+
+Normal text, a L<link> to nowhere,
+
+a link to L<var-copy>,
+
+L<htmlescp>,
+
+L<feature/Another Head 1>,
+
+and another L<feature/"Another Head 1">.
diff --git a/ext/Pod-Html/t/htmldir3.t b/ext/Pod-Html/t/htmldir3.t
new file mode 100644
index 0000000000..805c9d9d41
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir3.t
@@ -0,0 +1,83 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+END {
+ rem_test_dir();
+}
+
+use strict;
+use Cwd;
+use File::Spec;
+use File::Spec::Functions;
+use Test::More tests => 2;
+
+SKIP: {
+ my $output = make_test_dir();
+ skip "$output", 2 if $output;
+
+ my $cwd = cwd();
+ my ($v, $d) = splitpath($cwd, 1);
+ my $relcwd = substr($d, length(File::Spec->rootdir()));
+
+ my $data_pos = tell DATA; # to read <DATA> twice
+
+ convert_n_test("htmldir3", "test --htmldir and --htmlroot 3a",
+ "--podpath=$relcwd",
+ "--podroot=$v". File::Spec->rootdir,
+ "--htmldir=". catdir($cwd, 't', ''), # test removal trailing slash,
+ );
+
+ seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same)
+
+ convert_n_test("htmldir3", "test --htmldir and --htmlroot 3b",
+ "--podpath=". catdir($relcwd, 't'),
+ "--podroot=$v". File::Spec->rootdir,
+ "--htmldir=t",
+ "--outfile=t/htmldir3.html",
+ );
+}
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>htmldir - Test --htmldir feature</p>
+
+<h1 id="LINKS">LINKS</h1>
+
+<p>Normal text, a <a>link</a> to nowhere,</p>
+
+<p>a link to <a href="[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html">var-copy</a>,</p>
+
+<p><a href="[RELCURRENTWORKINGDIRECTORY]/t/htmlescp.html">htmlescp</a>,</p>
+
+<p><a href="[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>,</p>
+
+<p>and another <a href="[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>.</p>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/htmldir4.pod b/ext/Pod-Html/t/htmldir4.pod
new file mode 100644
index 0000000000..1c9e97b3e1
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir4.pod
@@ -0,0 +1,15 @@
+=head1 NAME
+
+htmldir - Test --htmldir feature
+
+=head1 LINKS
+
+Normal text, a L<link> to nowhere,
+
+a link to L<perlvar-copy>,
+
+L<htmlescp>,
+
+L<feature/Another Head 1>,
+
+and another L<feature/"Another Head 1">.
diff --git a/ext/Pod-Html/t/htmldir4.t b/ext/Pod-Html/t/htmldir4.t
new file mode 100644
index 0000000000..034fffe65f
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir4.t
@@ -0,0 +1,70 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+use strict;
+use Cwd;
+use File::Spec::Functions ':ALL';
+use Test::More tests => 2;
+
+my $cwd = cwd();
+my $data_pos = tell DATA; # to read <DATA> twice
+
+convert_n_test("htmldir4", "test --htmldir and --htmlroot 4a",
+ "--podpath=t",
+ "--htmldir=t",
+ "--outfile=". catfile('t', 'htmldir4.html'),
+);
+
+seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same)
+
+convert_n_test("htmldir4", "test --htmldir and --htmlroot 4b",
+ "--podpath=t",
+ "--podroot=$cwd",
+ "--htmldir=". catdir($cwd, 't'),
+ "--norecurse",
+);
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>htmldir - Test --htmldir feature</p>
+
+<h1 id="LINKS">LINKS</h1>
+
+<p>Normal text, a <a>link</a> to nowhere,</p>
+
+<p>a link to <a>perlvar-copy</a>,</p>
+
+<p><a href="t/htmlescp.html">htmlescp</a>,</p>
+
+<p><a href="t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>,</p>
+
+<p>and another <a href="t/feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>.</p>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/htmldir5.pod b/ext/Pod-Html/t/htmldir5.pod
new file mode 100644
index 0000000000..e505caaa18
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir5.pod
@@ -0,0 +1,15 @@
+=head1 NAME
+
+htmldir - Test --htmldir feature
+
+=head1 LINKS
+
+Normal text, a L<link> to nowhere,
+
+a link to L<var-copy>,
+
+L<htmlescp>,
+
+L<feature/Another Head 1>,
+
+and another L<feature/"Another Head 1">.
diff --git a/ext/Pod-Html/t/htmldir5.t b/ext/Pod-Html/t/htmldir5.t
new file mode 100644
index 0000000000..15a3901036
--- /dev/null
+++ b/ext/Pod-Html/t/htmldir5.t
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+END {
+ rem_test_dir();
+}
+
+use strict;
+use Cwd;
+use File::Spec::Functions;
+use Test::More tests => 1;
+
+SKIP: {
+ my $output = make_test_dir();
+ skip "$output", 1 if $output;
+
+
+ my $cwd = catdir cwd(); # catdir converts path separators to that of the OS
+ # running the test
+ # XXX but why don't the other tests complain about
+ # this?
+
+ convert_n_test("htmldir5", "test --htmldir and --htmlroot 5",
+ "--podpath=t:testdir/test.lib",
+ "--podroot=$cwd",
+ "--htmldir=$cwd",
+ "--htmlroot=/",
+ );
+}
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>htmldir - Test --htmldir feature</p>
+
+<h1 id="LINKS">LINKS</h1>
+
+<p>Normal text, a <a>link</a> to nowhere,</p>
+
+<p>a link to <a href="../testdir/test.lib/var-copy.html">var-copy</a>,</p>
+
+<p><a href="./htmlescp.html">htmlescp</a>,</p>
+
+<p><a href="./feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>,</p>
+
+<p>and another <a href="./feature.html#Another-Head-1">&quot;Another Head 1&quot; in feature</a>.</p>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/htmlescp.t b/ext/Pod-Html/t/htmlescp.t
index 3314829dfd..30c75b4a32 100644
--- a/ext/Pod-Html/t/htmlescp.t
+++ b/ext/Pod-Html/t/htmlescp.t
@@ -14,7 +14,7 @@ __DATA__
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
-<title>NAME</title>
+<title></title>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<link rev="made" href="mailto:[PERLADMIN]" />
</head>
@@ -22,35 +22,27 @@ __DATA__
<body style="background-color: white">
-<!-- INDEX BEGIN -->
-<div name="index">
-<p><a name="__index__"></a></p>
-<ul>
-
- <li><a href="#name">NAME</a></li>
- <li><a href="#description">DESCRIPTION</a></li>
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#DESCRIPTION">DESCRIPTION</a></li>
</ul>
-<hr name="index" />
-</div>
-<!-- INDEX END -->
+<h1 id="NAME">NAME</h1>
-<p>
-</p>
-<h1><a name="name">NAME</a></h1>
<p>Escape Sequences Test</p>
-<p>
-</p>
-<hr />
-<h1><a name="description">DESCRIPTION</a></h1>
-<p>I am a stupid fool who puts naked &lt; &amp; &gt; characters in my POD
-instead of escaping them as &lt; and &gt;.</p>
-<p>Here is some <strong>bold</strong> text, some <em>italic</em> plus <em class="file">/etc/fstab</em>
-file and something that looks like an &lt;html&gt; tag.
-This is some <code>$code($arg1)</code>.</p>
-<p>Some numeric escapes: &#80; &#x65; &#x72; &#108;</p>
+
+<h1 id="DESCRIPTION">DESCRIPTION</h1>
+
+<p>I am a stupid fool who puts naked &lt; &amp; &gt; characters in my POD instead of escaping them as &lt; and &gt;.</p>
+
+<p>Here is some <b>bold</b> text, some <i>italic</i> plus <i>/etc/fstab</i> file and something that looks like an &lt;html&gt; tag. This is some <code>$code($arg1)</code>.</p>
+
+<p>Some numeric escapes: P e r l</p>
+
</body>
</html>
+
+
diff --git a/ext/Pod-Html/t/htmllink.t b/ext/Pod-Html/t/htmllink.t
index 592fef3262..9c26dbfcf2 100644
--- a/ext/Pod-Html/t/htmllink.t
+++ b/ext/Pod-Html/t/htmllink.t
@@ -14,7 +14,7 @@ __DATA__
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
-<title>htmllink - Test HTML links</title>
+<title></title>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<link rev="made" href="mailto:[PERLADMIN]" />
</head>
@@ -22,106 +22,136 @@ __DATA__
<body style="background-color: white">
-<!-- INDEX BEGIN -->
-<div name="index">
-<p><a name="__index__"></a></p>
-<ul>
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#LINKS">LINKS</a></li>
+ <li><a href="#TARGETS">TARGETS</a>
+ <ul>
+ <li><a href="#section1">section1</a></li>
+ <li><a href="#section-2">section 2</a></li>
+ <li><a href="#section-three">section three</a></li>
+ </ul>
+ </li>
+</ul>
- <li><a href="#name">NAME</a></li>
- <li><a href="#links">LINKS</a></li>
- <li><a href="#targets">TARGETS</a></li>
- <ul>
+<h1 id="NAME">NAME</h1>
- <li><a href="#section1">section1</a></li>
- <li><a href="#section_2">section 2</a></li>
- <li><a href="#section_three">section three</a></li>
- </ul>
+<p>htmllink - Test HTML links</p>
-</ul>
+<h1 id="LINKS">LINKS</h1>
-<hr name="index" />
-</div>
-<!-- INDEX END -->
+<p><a href="#section1">&quot;section1&quot;</a></p>
+
+<p><a href="#section-2">&quot;section 2&quot;</a></p>
+
+<p><a href="#section-three">&quot;section three&quot;</a></p>
+
+<p><a href="#item1">&quot;item1&quot;</a></p>
+
+<p><a href="#item-2">&quot;item 2&quot;</a></p>
+
+<p><a href="#item-three">&quot;item three&quot;</a></p>
+
+<p><a href="#section1">&quot;section1&quot;</a></p>
+
+<p><a href="#section-2">&quot;section 2&quot;</a></p>
+
+<p><a href="#section-three">&quot;section three&quot;</a></p>
+
+<p><a href="#item1">&quot;item1&quot;</a></p>
+
+<p><a href="#item-2">&quot;item 2&quot;</a></p>
+
+<p><a href="#item-three">&quot;item three&quot;</a></p>
+
+<p><a href="#section1">&quot;section1&quot;</a></p>
+
+<p><a href="#section-2">&quot;section 2&quot;</a></p>
+
+<p><a href="#section-three">&quot;section three&quot;</a></p>
+
+<p><a href="#item1">&quot;item1&quot;</a></p>
+
+<p><a href="#item-2">&quot;item 2&quot;</a></p>
+
+<p><a href="#item-three">&quot;item three&quot;</a></p>
-<p>
-</p>
-<h1><a name="name">NAME</a></h1>
-<p>htmllink - Test HTML links</p>
-<p>
-</p>
-<hr />
-<h1><a name="links">LINKS</a></h1>
-<p><a href="#section1">section1</a></p>
-<p><a href="#section_2">section 2</a></p>
-<p><a href="#section_three">section three</a></p>
-<p><a href="#item1">item1</a></p>
-<p><a href="#item_2">item 2</a></p>
-<p><a href="#item_three">item three</a></p>
-<p><a href="#section1">section1</a></p>
-<p><a href="#section_2">section 2</a></p>
-<p><a href="#section_three">section three</a></p>
-<p><a href="#item1">item1</a></p>
-<p><a href="#item_2">item 2</a></p>
-<p><a href="#item_three">item three</a></p>
-<p><a href="#section1">section1</a></p>
-<p><a href="#section_2">section 2</a></p>
-<p><a href="#section_three">section three</a></p>
-<p><a href="#item1">item1</a></p>
-<p><a href="#item_2">item 2</a></p>
-<p><a href="#item_three">item three</a></p>
<p><a href="#section1">text</a></p>
-<p><a href="#section_2">text</a></p>
-<p><a href="#section_three">text</a></p>
+
+<p><a href="#section-2">text</a></p>
+
+<p><a href="#section-three">text</a></p>
+
<p><a href="#item1">text</a></p>
-<p><a href="#item_2">text</a></p>
-<p><a href="#item_three">text</a></p>
+
+<p><a href="#item-2">text</a></p>
+
+<p><a href="#item-three">text</a></p>
+
<p><a href="#section1">text</a></p>
-<p><a href="#section_2">text</a></p>
-<p><a href="#section_three">text</a></p>
+
+<p><a href="#section-2">text</a></p>
+
+<p><a href="#section-three">text</a></p>
+
<p><a href="#item1">text</a></p>
-<p><a href="#item_2">text</a></p>
-<p><a href="#item_three">text</a></p>
+
+<p><a href="#item-2">text</a></p>
+
+<p><a href="#item-three">text</a></p>
+
<p><a href="#section1">text</a></p>
-<p><a href="#section_2">text</a></p>
-<p><a href="#section_three">text</a></p>
+
+<p><a href="#section-2">text</a></p>
+
+<p><a href="#section-three">text</a></p>
+
<p><a href="#item1">text</a></p>
-<p><a href="#item_2">text</a></p>
-<p><a href="#item_three">text</a></p>
-<p>
-</p>
-<hr />
-<h1><a name="targets">TARGETS</a></h1>
-<p>
-</p>
-<h2><a name="section1">section1</a></h2>
+
+<p><a href="#item-2">text</a></p>
+
+<p><a href="#item-three">text</a></p>
+
+<h1 id="TARGETS">TARGETS</h1>
+
+<h2 id="section1">section1</h2>
+
<p>This is section one.</p>
-<p>
-</p>
-<h2><a name="section_2">section 2</a></h2>
+
+<h2 id="section-2">section 2</h2>
+
<p>This is section two.</p>
-<p>
-</p>
-<h2><a name="section_three">section three</a></h2>
+
+<h2 id="section-three">section three</h2>
+
<p>This is section three.</p>
+
<dl>
-<dt><strong><a name="item1" class="item">item1</a></strong></dt>
+<dt id="item1">item1 </dt>
<dd>
+
<p>This is item one.</p>
-</dd>
-<dt><strong><a name="item_2" class="item">item 2</a></strong></dt>
+</dd>
+<dt id="item-2">item 2 </dt>
<dd>
+
<p>This is item two.</p>
-</dd>
-<dt><strong><a name="item_three" class="item">item three</a></strong></dt>
+</dd>
+<dt id="item-three">item three </dt>
<dd>
+
<p>This is item three.</p>
+
</dd>
</dl>
+
</body>
</html>
+
+
diff --git a/ext/Pod-Html/t/htmlview.pod b/ext/Pod-Html/t/htmlview.pod
index 2ac15d36fe..05272c46cc 100644
--- a/ext/Pod-Html/t/htmlview.pod
+++ b/ext/Pod-Html/t/htmlview.pod
@@ -17,10 +17,10 @@ This is the description.
This is some more regular text.
Here is some B<bold> text, some I<italic> and something that looks
-like an E<lt>htmlE<gt> tag. This is some C<$code($arg1)>.
+like an E<lt>htmlE<gt> tag. This is some C<$code($arg1)>.
-This C<text contains embedded B<bold> and I<italic> tags>. These can
-be nested, allowing B<bold and I<bold E<amp> italic> text>. The module also
+This C<text contains embedded B<bold> and I<italic> tags>. These can
+be nested, allowing B<bold and I<bold E<amp> italic> text>. The module also
supports the extended B<< syntax >> and permits I<< nested tags E<amp>
other B<<< cool >>> stuff >>
@@ -30,7 +30,7 @@ Here is a list of methods
=head2 new()
-Constructor method. Accepts the following config options:
+Constructor method. Accepts the following config options:
=over 4
@@ -60,6 +60,16 @@ The waz item.
The baz item.
+=over 4
+
+=item *
+
+A correct list within a list
+
+=item * Boomerang
+
+=back
+
=back
Title on the same line as the =item + * bullets
@@ -78,6 +88,24 @@ Title on the same line as the =item + numerical bullets
=over
+=item 1
+
+Cat
+
+=item 2
+
+Sat
+
+=item 3
+
+Mat
+
+=back
+
+Numbered list with text on the same line
+
+=over
+
=item 1 Cat
=item 2 Sat
diff --git a/ext/Pod-Html/t/htmlview.t b/ext/Pod-Html/t/htmlview.t
index dc15c6b194..97e0536524 100644
--- a/ext/Pod-Html/t/htmlview.t
+++ b/ext/Pod-Html/t/htmlview.t
@@ -14,7 +14,7 @@ __DATA__
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
-<title>NAME</title>
+<title></title>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<link rev="made" href="mailto:[PERLADMIN]" />
</head>
@@ -22,162 +22,214 @@ __DATA__
<body style="background-color: white">
-<!-- INDEX BEGIN -->
-<div name="index">
-<p><a name="__index__"></a></p>
-<ul>
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#SYNOPSIS">SYNOPSIS</a></li>
+ <li><a href="#DESCRIPTION">DESCRIPTION</a></li>
+ <li><a href="#METHODS-OTHER-STUFF">METHODS =&gt; OTHER STUFF</a>
+ <ul>
+ <li><a href="#new-">new()</a></li>
+ <li><a href="#old-">old()</a></li>
+ </ul>
+ </li>
+ <li><a href="#TESTING-FOR-AND-BEGIN">TESTING FOR AND BEGIN</a></li>
+ <li><a href="#TESTING-URLs-hyperlinking">TESTING URLs hyperlinking</a></li>
+ <li><a href="#SEE-ALSO">SEE ALSO</a></li>
+ <li><a href="#POD-ERRORS">POD ERRORS</a></li>
+</ul>
- <li><a href="#name">NAME</a></li>
- <li><a href="#synopsis">SYNOPSIS</a></li>
- <li><a href="#description">DESCRIPTION</a></li>
- <li><a href="#methods____other_stuff">METHODS =&gt; OTHER STUFF</a></li>
- <ul>
+<h1 id="NAME">NAME</h1>
- <li><a href="#new__"><code>new()</code></a></li>
- <li><a href="#old__"><code>old()</code></a></li>
- </ul>
+<p>Test HTML Rendering</p>
- <li><a href="#testing_for_and_begin">TESTING FOR AND BEGIN</a></li>
- <li><a href="#testing_urls_hyperlinking">TESTING URLs hyperlinking</a></li>
- <li><a href="#see_also">SEE ALSO</a></li>
-</ul>
+<h1 id="SYNOPSIS">SYNOPSIS</h1>
-<hr name="index" />
-</div>
-<!-- INDEX END -->
+<pre><code> use My::Module;
+
+ my $module = My::Module-&gt;new();</code></pre>
+
+<h1 id="DESCRIPTION">DESCRIPTION</h1>
-<p>
-</p>
-<h1><a name="name">NAME</a></h1>
-<p>Test HTML Rendering</p>
-<p>
-</p>
-<hr />
-<h1><a name="synopsis">SYNOPSIS</a></h1>
-<pre>
- use My::Module;</pre>
-<pre>
- my $module = My::Module-&gt;new();</pre>
-<p>
-</p>
-<hr />
-<h1><a name="description">DESCRIPTION</a></h1>
<p>This is the description.</p>
-<pre>
- Here is a verbatim section.</pre>
+
+<pre><code> Here is a verbatim section.</code></pre>
+
<p>This is some more regular text.</p>
-<p>Here is some <strong>bold</strong> text, some <em>italic</em> and something that looks
-like an &lt;html&gt; tag. This is some <code>$code($arg1)</code>.</p>
-<p>This <code>text contains embedded bold and italic tags</code>. These can
-be nested, allowing <strong>bold and <em>bold &amp; italic</em> text</strong>. The module also
-supports the extended <strong>syntax </strong>&gt; and permits <em>nested tags &amp;
-other <strong>cool </strong></em>&gt; stuff &gt;&gt;</p>
-<p>
-</p>
-<hr />
-<h1><a name="methods____other_stuff">METHODS =&gt; OTHER STUFF</a></h1>
+
+<p>Here is some <b>bold</b> text, some <i>italic</i> and something that looks like an &lt;html&gt; tag. This is some <code>$code($arg1)</code>.</p>
+
+<p>This <code>text contains embedded <b>bold</b> and <i>italic</i> tags</code>. These can be nested, allowing <b>bold and <i>bold &amp; italic</i> text</b>. The module also supports the extended <b>syntax</b> and permits <i>nested tags &amp; other <b>cool</b> stuff</i></p>
+
+<h1 id="METHODS-OTHER-STUFF">METHODS =&gt; OTHER STUFF</h1>
+
<p>Here is a list of methods</p>
-<p>
-</p>
-<h2><a name="new__"><code>new()</code></a></h2>
-<p>Constructor method. Accepts the following config options:</p>
+
+<h2 id="new-">new()</h2>
+
+<p>Constructor method. Accepts the following config options:</p>
+
<dl>
-<dt><strong><a name="foo" class="item">foo</a></strong></dt>
+<dt id="foo">foo</dt>
<dd>
+
<p>The foo item.</p>
-</dd>
-<dt><strong><a name="bar" class="item">bar</a></strong></dt>
+</dd>
+<dt id="bar">bar</dt>
<dd>
+
<p>The bar item.</p>
-<p>This is a list within a list</p>
+
<ul>
-<li>
+
+<p>This is a list within a list</p>
+
+<p>*</p>
+
<p>The wiz item.</p>
-</li>
-<li>
+
+<p>*</p>
+
<p>The waz item.</p>
-</li>
+
</ul>
-</dd>
-<dt><strong><a name="baz" class="item">baz</a></strong></dt>
+</dd>
+<dt id="baz">baz</dt>
<dd>
+
<p>The baz item.</p>
+
+<ul>
+
+<li><p>A correct list within a list</p>
+
+</li>
+<li><p>Boomerang</p>
+
+</li>
+</ul>
+
</dd>
</dl>
+
<p>Title on the same line as the =item + * bullets</p>
+
<ul>
-<li><strong><a name="black_cat" class="item"><code>Black</code> Cat</a></strong>
+
+<li><p><code>Black</code> Cat</p>
</li>
-<li><strong><a name="sat_on_the" class="item">Sat <em>on</em>&nbsp;the</a></strong>
+<li><p>Sat <span style="white-space: nowrap;"><i>on</i> the</span></p>
</li>
-<li><strong><a name="mat" class="item">Mat&lt;!&gt;</a></strong>
+<li><p>Mat&lt;!&gt;</p>
</li>
</ul>
+
<p>Title on the same line as the =item + numerical bullets</p>
+
<ol>
-<li><strong><a name="cat" class="item">Cat</a></strong>
+
+<li><p>Cat</p>
</li>
-<li><strong><a name="sat" class="item">Sat</a></strong>
+<li><p>Sat</p>
</li>
-<li><strong><a name="mat2" class="item">Mat</a></strong>
+<li><p>Mat</p>
</li>
</ol>
-<p>No bullets, no title</p>
+
+<p>Numbered list with text on the same line</p>
+
<dl>
-<dt>
+
+<dt id="Cat">1 Cat</dt>
<dd>
-<p>Cat</p>
+
</dd>
-<dt>
+<dt id="Sat">2 Sat</dt>
<dd>
-<p>Sat</p>
+
</dd>
-<dt>
+<dt id="Mat">3 Mat</dt>
<dd>
-<p>Mat</p>
+
</dd>
</dl>
-<p>
-</p>
-<h2><a name="old__"><code>old()</code></a></h2>
+
+<p>No bullets, no title</p>
+
+<ul>
+
+<li><p>Cat</p>
+
+</li>
+<li><p>Sat</p>
+
+</li>
+<li><p>Mat</p>
+
+</li>
+</ul>
+
+<h2 id="old-">old()</h2>
+
<p>Destructor method</p>
-<p>
-</p>
-<hr />
-<h1><a name="testing_for_and_begin">TESTING FOR AND BEGIN</a></h1>
+
+<h1 id="TESTING-FOR-AND-BEGIN">TESTING FOR AND BEGIN</h1>
+
+
+
<br />
<p>
blah blah
-</p><p>intermediate text</p>
+</p>
+
+<p>intermediate text</p>
+
+
+
<more>
HTML
-</more>some text<p>
-</p>
-<hr />
-<h1><a name="testing_urls_hyperlinking">TESTING URLs hyperlinking</a></h1>
-<p>This is an href link1: <a href="http://example.com">http://example.com</a></p>
-<p>This is an href link2: <a href="http://example.com/foo/bar.html">http://example.com/foo/bar.html</a></p>
-<p>This is an email link: <a href="mailto:mailto:foo@bar.com">mailto:foo@bar.com</a></p>
-<pre>
- This is a link in a verbatim block &lt;a href=&quot;<a href="http://perl.org">http://perl.org</a>&quot;&gt; Perl &lt;/a&gt;</pre>
-<p>
-</p>
-<hr />
-<h1><a name="see_also">SEE ALSO</a></h1>
-<p>See also <a href="/t/htmlescp.html">Test Page 2</a>, the <a href="/Your/Module.html">the Your::Module manpage</a> and <a href="/Their/Module.html">the Their::Module manpage</a>
-manpages and the other interesting file <em class="file">/usr/local/my/module/rocks</em>
-as well.</p>
+</more>some text
+
+<h1 id="TESTING-URLs-hyperlinking">TESTING URLs hyperlinking</h1>
+
+<p>This is an href link1: http://example.com</p>
+
+<p>This is an href link2: http://example.com/foo/bar.html</p>
+
+<p>This is an email link: mailto:foo@bar.com</p>
+
+<pre><code> This is a link in a verbatim block &lt;a href=&quot;http://perl.org&quot;&gt; Perl &lt;/a&gt;</code></pre>
+
+<h1 id="SEE-ALSO">SEE ALSO</h1>
+
+<p>See also <a href="/t/htmlescp.html">Test Page 2</a>, the <a>Your::Module</a> and <a>Their::Module</a> manpages and the other interesting file <i>/usr/local/my/module/rocks</i> as well.</p>
+
+<h1 id="POD-ERRORS">POD ERRORS</h1>
+
+<p>Hey! <b>The above document had some coding errors, which are explained below:</b></p>
+
+<dl>
+
+<dt id="Around-line-45:">Around line 45:</dt>
+<dd>
+
+<p>You can&#39;t have =items (as at line 49) unless the first thing after the =over is an =item</p>
+
+</dd>
+</dl>
+
</body>
</html>
+
+
diff --git a/ext/Pod-Html/t/pod2html-lib.pl b/ext/Pod-Html/t/pod2html-lib.pl
index 2259d4528e..7a71e4c5b7 100644
--- a/ext/Pod-Html/t/pod2html-lib.pl
+++ b/ext/Pod-Html/t/pod2html-lib.pl
@@ -1,22 +1,51 @@
require Cwd;
require Pod::Html;
require Config;
-use File::Spec::Functions;
+use File::Spec::Functions ':ALL';
+use File::Path 'remove_tree';
+use File::Copy;
+
+# make_test_dir and rem_test_dir dynamically create and remove testdir/test.lib.
+# it is created dynamically to pass t/filenames.t, which does not allow '.'s in
+# filenames as '.' is the directory separator on VMS. All tests that require
+# testdir/test.lib to be present are skipped if test.lib cannot be created.
+sub make_test_dir {
+ if (-d 'testdir/test.lib') {
+ warn "Directory 'test.lib' exists (it shouldn't yet) - removing it";
+ rem_test_dir();
+ }
+ mkdir('testdir/test.lib') or return "Could not make test.lib directory: $!\n";
+ copy('testdir/perlpodspec-copy.pod', 'testdir/test.lib/podspec-copy.pod')
+ or return "Could not copy perlpodspec-copy: $!";
+ copy('testdir/perlvar-copy.pod', 'testdir/test.lib/var-copy.pod')
+ or return "Could not copy perlvar-copy: $!";
+ return 0;
+}
+
+sub rem_test_dir {
+ remove_tree('testdir/test.lib')
+ or warn "Error removing temporary directory 'testdir/test.lib'";
+}
sub convert_n_test {
- my($podfile, $testname) = @_;
+ my($podfile, $testname, @p2h_args) = @_;
my $cwd = Cwd::cwd();
+ my ($vol, $dir) = splitpath($cwd, 1);
+ my $relcwd = substr($dir, length(File::Spec->rootdir()));
+
my $new_dir = catdir $cwd, "t";
my $infile = catfile $new_dir, "$podfile.pod";
my $outfile = catfile $new_dir, "$podfile.html";
-
+
+ # To add/modify args to p2h, use @p2h_args
Pod::Html::pod2html(
+ "--infile=$infile",
+ "--outfile=$outfile",
"--podpath=t",
- "--podroot=$cwd",
"--htmlroot=/",
- "--infile=$infile",
- "--outfile=$outfile"
+ "--podroot=$cwd",
+ @p2h_args,
);
@@ -26,6 +55,7 @@ sub convert_n_test {
# expected
$expect = <DATA>;
$expect =~ s/\[PERLADMIN\]/$Config::Config{perladmin}/;
+ $expect =~ s/\[RELCURRENTWORKINGDIRECTORY\]/$relcwd/g;
if (ord("A") == 193) { # EBCDIC.
$expect =~ s/item_mat_3c_21_3e/item_mat_4c_5a_6e/;
}
@@ -54,8 +84,6 @@ sub convert_n_test {
# pod2html creates these
1 while unlink $outfile;
- 1 while unlink "pod2htmd.tmp";
- 1 while unlink "pod2htmi.tmp";
}
1;
diff --git a/ext/Pod-Html/t/poderr.pod b/ext/Pod-Html/t/poderr.pod
new file mode 100644
index 0000000000..f54ab49283
--- /dev/null
+++ b/ext/Pod-Html/t/poderr.pod
@@ -0,0 +1,19 @@
+=head1 NAME
+
+Test POD ERROR section
+
+=over 4
+
+This text is not allowed
+
+=item *
+
+The wiz item.
+
+=item *
+
+The waz item.
+
+=back
+
+=cut
diff --git a/ext/Pod-Html/t/poderr.t b/ext/Pod-Html/t/poderr.t
new file mode 100644
index 0000000000..270d948af4
--- /dev/null
+++ b/ext/Pod-Html/t/poderr.t
@@ -0,0 +1,68 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+use strict;
+use Test::More tests => 1;
+
+convert_n_test("poderr", "pod error section");
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+ <li><a href="#POD-ERRORS">POD ERRORS</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>Test POD ERROR section</p>
+
+<ul>
+
+<p>This text is not allowed</p>
+
+<p>*</p>
+
+<p>The wiz item.</p>
+
+<p>*</p>
+
+<p>The waz item.</p>
+
+</ul>
+
+<h1 id="POD-ERRORS">POD ERRORS</h1>
+
+<p>Hey! <b>The above document had some coding errors, which are explained below:</b></p>
+
+<dl>
+
+<dt id="Around-line-5:">Around line 5:</dt>
+<dd>
+
+<p>You can&#39;t have =items (as at line 9) unless the first thing after the =over is an =item</p>
+
+</dd>
+</dl>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/t/podnoerr.pod b/ext/Pod-Html/t/podnoerr.pod
new file mode 100644
index 0000000000..f54ab49283
--- /dev/null
+++ b/ext/Pod-Html/t/podnoerr.pod
@@ -0,0 +1,19 @@
+=head1 NAME
+
+Test POD ERROR section
+
+=over 4
+
+This text is not allowed
+
+=item *
+
+The wiz item.
+
+=item *
+
+The waz item.
+
+=back
+
+=cut
diff --git a/ext/Pod-Html/t/podnoerr.t b/ext/Pod-Html/t/podnoerr.t
new file mode 100644
index 0000000000..cb74812db2
--- /dev/null
+++ b/ext/Pod-Html/t/podnoerr.t
@@ -0,0 +1,55 @@
+#!/usr/bin/perl -w # -*- perl -*-
+
+BEGIN {
+ require "t/pod2html-lib.pl";
+}
+
+use strict;
+use Test::More tests => 1;
+
+convert_n_test("podnoerr", "pod error section",
+ "--nopoderrors",
+);
+
+__DATA__
+<?xml version="1.0" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title></title>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<link rev="made" href="mailto:[PERLADMIN]" />
+</head>
+
+<body style="background-color: white">
+
+
+
+<ul id="index">
+ <li><a href="#NAME">NAME</a></li>
+</ul>
+
+<h1 id="NAME">NAME</h1>
+
+<p>Test POD ERROR section</p>
+
+<ul>
+
+<p>This text is not allowed</p>
+
+<p>*</p>
+
+<p>The wiz item.</p>
+
+<p>*</p>
+
+<p>The waz item.</p>
+
+</ul>
+
+
+</body>
+
+</html>
+
+
diff --git a/ext/Pod-Html/testdir/perlpodspec-copy.pod b/ext/Pod-Html/testdir/perlpodspec-copy.pod
new file mode 100644
index 0000000000..4f914ef0e4
--- /dev/null
+++ b/ext/Pod-Html/testdir/perlpodspec-copy.pod
@@ -0,0 +1,1899 @@
+
+=head1 NAME
+
+perlpodspeccopy - Plain Old Documentation: format specification and notes
+
+=head1 DESCRIPTION
+
+This document is detailed notes on the Pod markup language. Most
+people will only have to read L<perlpod|perlpod> to know how to write
+in Pod, but this document may answer some incidental questions to do
+with parsing and rendering Pod.
+
+In this document, "must" / "must not", "should" /
+"should not", and "may" have their conventional (cf. RFC 2119)
+meanings: "X must do Y" means that if X doesn't do Y, it's against
+this specification, and should really be fixed. "X should do Y"
+means that it's recommended, but X may fail to do Y, if there's a
+good reason. "X may do Y" is merely a note that X can do Y at
+will (although it is up to the reader to detect any connotation of
+"and I think it would be I<nice> if X did Y" versus "it wouldn't
+really I<bother> me if X did Y").
+
+Notably, when I say "the parser should do Y", the
+parser may fail to do Y, if the calling application explicitly
+requests that the parser I<not> do Y. I often phrase this as
+"the parser should, by default, do Y." This doesn't I<require>
+the parser to provide an option for turning off whatever
+feature Y is (like expanding tabs in verbatim paragraphs), although
+it implicates that such an option I<may> be provided.
+
+=head1 Pod Definitions
+
+Pod is embedded in files, typically Perl source files -- although you
+can write a file that's nothing but Pod.
+
+A B<line> in a file consists of zero or more non-newline characters,
+terminated by either a newline or the end of the file.
+
+A B<newline sequence> is usually a platform-dependent concept, but
+Pod parsers should understand it to mean any of CR (ASCII 13), LF
+(ASCII 10), or a CRLF (ASCII 13 followed immediately by ASCII 10), in
+addition to any other system-specific meaning. The first CR/CRLF/LF
+sequence in the file may be used as the basis for identifying the
+newline sequence for parsing the rest of the file.
+
+A B<blank line> is a line consisting entirely of zero or more spaces
+(ASCII 32) or tabs (ASCII 9), and terminated by a newline or end-of-file.
+A B<non-blank line> is a line containing one or more characters other
+than space or tab (and terminated by a newline or end-of-file).
+
+(I<Note:> Many older Pod parsers did not accept a line consisting of
+spaces/tabs and then a newline as a blank line -- the only lines they
+considered blank were lines consisting of I<no characters at all>,
+terminated by a newline.)
+
+B<Whitespace> is used in this document as a blanket term for spaces,
+tabs, and newline sequences. (By itself, this term usually refers
+to literal whitespace. That is, sequences of whitespace characters
+in Pod source, as opposed to "EE<lt>32>", which is a formatting
+code that I<denotes> a whitespace character.)
+
+A B<Pod parser> is a module meant for parsing Pod (regardless of
+whether this involves calling callbacks or building a parse tree or
+directly formatting it). A B<Pod formatter> (or B<Pod translator>)
+is a module or program that converts Pod to some other format (HTML,
+plaintext, TeX, PostScript, RTF). A B<Pod processor> might be a
+formatter or translator, or might be a program that does something
+else with the Pod (like counting words, scanning for index points,
+etc.).
+
+Pod content is contained in B<Pod blocks>. A Pod block starts with a
+line that matches <m/\A=[a-zA-Z]/>, and continues up to the next line
+that matches C<m/\A=cut/> -- or up to the end of the file, if there is
+no C<m/\A=cut/> line.
+
+=for comment
+ The current perlsyn says:
+ [beginquote]
+ Note that pod translators should look at only paragraphs beginning
+ with a pod directive (it makes parsing easier), whereas the compiler
+ actually knows to look for pod escapes even in the middle of a
+ paragraph. This means that the following secret stuff will be ignored
+ by both the compiler and the translators.
+ $a=3;
+ =secret stuff
+ warn "Neither POD nor CODE!?"
+ =cut back
+ print "got $a\n";
+ You probably shouldn't rely upon the warn() being podded out forever.
+ Not all pod translators are well-behaved in this regard, and perhaps
+ the compiler will become pickier.
+ [endquote]
+ I think that those paragraphs should just be removed; paragraph-based
+ parsing seems to have been largely abandoned, because of the hassle
+ with non-empty blank lines messing up what people meant by "paragraph".
+ Even if the "it makes parsing easier" bit were especially true,
+ it wouldn't be worth the confusion of having perl and pod2whatever
+ actually disagree on what can constitute a Pod block.
+
+Within a Pod block, there are B<Pod paragraphs>. A Pod paragraph
+consists of non-blank lines of text, separated by one or more blank
+lines.
+
+For purposes of Pod processing, there are four types of paragraphs in
+a Pod block:
+
+=over
+
+=item *
+
+A command paragraph (also called a "directive"). The first line of
+this paragraph must match C<m/\A=[a-zA-Z]/>. Command paragraphs are
+typically one line, as in:
+
+ =head1 NOTES
+
+ =item *
+
+But they may span several (non-blank) lines:
+
+ =for comment
+ Hm, I wonder what it would look like if
+ you tried to write a BNF for Pod from this.
+
+ =head3 Dr. Strangelove, or: How I Learned to
+ Stop Worrying and Love the Bomb
+
+I<Some> command paragraphs allow formatting codes in their content
+(i.e., after the part that matches C<m/\A=[a-zA-Z]\S*\s*/>), as in:
+
+ =head1 Did You Remember to C<use strict;>?
+
+In other words, the Pod processing handler for "head1" will apply the
+same processing to "Did You Remember to CE<lt>use strict;>?" that it
+would to an ordinary paragraph -- i.e., formatting codes (like
+"CE<lt>...>") are parsed and presumably formatted appropriately, and
+whitespace in the form of literal spaces and/or tabs is not
+significant.
+
+=item *
+
+A B<verbatim paragraph>. The first line of this paragraph must be a
+literal space or tab, and this paragraph must not be inside a "=begin
+I<identifier>", ... "=end I<identifier>" sequence unless
+"I<identifier>" begins with a colon (":"). That is, if a paragraph
+starts with a literal space or tab, but I<is> inside a
+"=begin I<identifier>", ... "=end I<identifier>" region, then it's
+a data paragraph, unless "I<identifier>" begins with a colon.
+
+Whitespace I<is> significant in verbatim paragraphs (although, in
+processing, tabs are probably expanded).
+
+=item *
+
+An B<ordinary paragraph>. A paragraph is an ordinary paragraph
+if its first line matches neither C<m/\A=[a-zA-Z]/> nor
+C<m/\A[ \t]/>, I<and> if it's not inside a "=begin I<identifier>",
+... "=end I<identifier>" sequence unless "I<identifier>" begins with
+a colon (":").
+
+=item *
+
+A B<data paragraph>. This is a paragraph that I<is> inside a "=begin
+I<identifier>" ... "=end I<identifier>" sequence where
+"I<identifier>" does I<not> begin with a literal colon (":"). In
+some sense, a data paragraph is not part of Pod at all (i.e.,
+effectively it's "out-of-band"), since it's not subject to most kinds
+of Pod parsing; but it is specified here, since Pod
+parsers need to be able to call an event for it, or store it in some
+form in a parse tree, or at least just parse I<around> it.
+
+=back
+
+For example: consider the following paragraphs:
+
+ # <- that's the 0th column
+
+ =head1 Foo
+
+ Stuff
+
+ $foo->bar
+
+ =cut
+
+Here, "=head1 Foo" and "=cut" are command paragraphs because the first
+line of each matches C<m/\A=[a-zA-Z]/>. "I<[space][space]>$foo->bar"
+is a verbatim paragraph, because its first line starts with a literal
+whitespace character (and there's no "=begin"..."=end" region around).
+
+The "=begin I<identifier>" ... "=end I<identifier>" commands stop
+paragraphs that they surround from being parsed as ordinary or verbatim
+paragraphs, if I<identifier> doesn't begin with a colon. This
+is discussed in detail in the section
+L</About Data Paragraphs and "=beginE<sol>=end" Regions>.
+
+=head1 Pod Commands
+
+This section is intended to supplement and clarify the discussion in
+L<perlpod/"Command Paragraph">. These are the currently recognized
+Pod commands:
+
+=over
+
+=item "=head1", "=head2", "=head3", "=head4"
+
+This command indicates that the text in the remainder of the paragraph
+is a heading. That text may contain formatting codes. Examples:
+
+ =head1 Object Attributes
+
+ =head3 What B<Not> to Do!
+
+=item "=pod"
+
+This command indicates that this paragraph begins a Pod block. (If we
+are already in the middle of a Pod block, this command has no effect at
+all.) If there is any text in this command paragraph after "=pod",
+it must be ignored. Examples:
+
+ =pod
+
+ This is a plain Pod paragraph.
+
+ =pod This text is ignored.
+
+=item "=cut"
+
+This command indicates that this line is the end of this previously
+started Pod block. If there is any text after "=cut" on the line, it must be
+ignored. Examples:
+
+ =cut
+
+ =cut The documentation ends here.
+
+ =cut
+ # This is the first line of program text.
+ sub foo { # This is the second.
+
+It is an error to try to I<start> a Pod block with a "=cut" command. In
+that case, the Pod processor must halt parsing of the input file, and
+must by default emit a warning.
+
+=item "=over"
+
+This command indicates that this is the start of a list/indent
+region. If there is any text following the "=over", it must consist
+of only a nonzero positive numeral. The semantics of this numeral is
+explained in the L</"About =over...=back Regions"> section, further
+below. Formatting codes are not expanded. Examples:
+
+ =over 3
+
+ =over 3.5
+
+ =over
+
+=item "=item"
+
+This command indicates that an item in a list begins here. Formatting
+codes are processed. The semantics of the (optional) text in the
+remainder of this paragraph are
+explained in the L</"About =over...=back Regions"> section, further
+below. Examples:
+
+ =item
+
+ =item *
+
+ =item *
+
+ =item 14
+
+ =item 3.
+
+ =item C<< $thing->stuff(I<dodad>) >>
+
+ =item For transporting us beyond seas to be tried for pretended
+ offenses
+
+ =item He is at this time transporting large armies of foreign
+ mercenaries to complete the works of death, desolation and
+ tyranny, already begun with circumstances of cruelty and perfidy
+ scarcely paralleled in the most barbarous ages, and totally
+ unworthy the head of a civilized nation.
+
+=item "=back"
+
+This command indicates that this is the end of the region begun
+by the most recent "=over" command. It permits no text after the
+"=back" command.
+
+=item "=begin formatname"
+
+This marks the following paragraphs (until the matching "=end
+formatname") as being for some special kind of processing. Unless
+"formatname" begins with a colon, the contained non-command
+paragraphs are data paragraphs. But if "formatname" I<does> begin
+with a colon, then non-command paragraphs are ordinary paragraphs
+or data paragraphs. This is discussed in detail in the section
+L</About Data Paragraphs and "=beginE<sol>=end" Regions>.
+
+It is advised that formatnames match the regexp
+C<m/\A:?[-a-zA-Z0-9_]+\z/>. Implementors should anticipate future
+expansion in the semantics and syntax of the first parameter
+to "=begin"/"=end"/"=for".
+
+=item "=end formatname"
+
+This marks the end of the region opened by the matching
+"=begin formatname" region. If "formatname" is not the formatname
+of the most recent open "=begin formatname" region, then this
+is an error, and must generate an error message. This
+is discussed in detail in the section
+L</About Data Paragraphs and "=beginE<sol>=end" Regions>.
+
+=item "=for formatname text..."
+
+This is synonymous with:
+
+ =begin formatname
+
+ text...
+
+ =end formatname
+
+That is, it creates a region consisting of a single paragraph; that
+paragraph is to be treated as a normal paragraph if "formatname"
+begins with a ":"; if "formatname" I<doesn't> begin with a colon,
+then "text..." will constitute a data paragraph. There is no way
+to use "=for formatname text..." to express "text..." as a verbatim
+paragraph.
+
+=item "=encoding encodingname"
+
+This command, which should occur early in the document (at least
+before any non-US-ASCII data!), declares that this document is
+encoded in the encoding I<encodingname>, which must be
+an encoding name that L<Encode> recognizes. (Encode's list
+of supported encodings, in L<Encode::Supported>, is useful here.)
+If the Pod parser cannot decode the declared encoding, it
+should emit a warning and may abort parsing the document
+altogether.
+
+A document having more than one "=encoding" line should be
+considered an error. Pod processors may silently tolerate this if
+the not-first "=encoding" lines are just duplicates of the
+first one (e.g., if there's a "=encoding utf8" line, and later on
+another "=encoding utf8" line). But Pod processors should complain if
+there are contradictory "=encoding" lines in the same document
+(e.g., if there is a "=encoding utf8" early in the document and
+"=encoding big5" later). Pod processors that recognize BOMs
+may also complain if they see an "=encoding" line
+that contradicts the BOM (e.g., if a document with a UTF-16LE
+BOM has an "=encoding shiftjis" line).
+
+=back
+
+If a Pod processor sees any command other than the ones listed
+above (like "=head", or "=haed1", or "=stuff", or "=cuttlefish",
+or "=w123"), that processor must by default treat this as an
+error. It must not process the paragraph beginning with that
+command, must by default warn of this as an error, and may
+abort the parse. A Pod parser may allow a way for particular
+applications to add to the above list of known commands, and to
+stipulate, for each additional command, whether formatting
+codes should be processed.
+
+Future versions of this specification may add additional
+commands.
+
+
+
+=head1 Pod Formatting Codes
+
+(Note that in previous drafts of this document and of perlpod,
+formatting codes were referred to as "interior sequences", and
+this term may still be found in the documentation for Pod parsers,
+and in error messages from Pod processors.)
+
+There are two syntaxes for formatting codes:
+
+=over
+
+=item *
+
+A formatting code starts with a capital letter (just US-ASCII [A-Z])
+followed by a "<", any number of characters, and ending with the first
+matching ">". Examples:
+
+ That's what I<you> think!
+
+ What's C<dump()> for?
+
+ X<C<chmod> and C<unlink()> Under Different Operating Systems>
+
+=item *
+
+A formatting code starts with a capital letter (just US-ASCII [A-Z])
+followed by two or more "<"'s, one or more whitespace characters,
+any number of characters, one or more whitespace characters,
+and ending with the first matching sequence of two or more ">"'s, where
+the number of ">"'s equals the number of "<"'s in the opening of this
+formatting code. Examples:
+
+ That's what I<< you >> think!
+
+ C<<< open(X, ">>thing.dat") || die $! >>>
+
+ B<< $foo->bar(); >>
+
+With this syntax, the whitespace character(s) after the "CE<lt><<"
+and before the ">>" (or whatever letter) are I<not> renderable -- they
+do not signify whitespace, are merely part of the formatting codes
+themselves. That is, these are all synonymous:
+
+ C<thing>
+ C<< thing >>
+ C<< thing >>
+ C<<< thing >>>
+ C<<<<
+ thing
+ >>>>
+
+and so on.
+
+=back
+
+In parsing Pod, a notably tricky part is the correct parsing of
+(potentially nested!) formatting codes. Implementors should
+consult the code in the C<parse_text> routine in Pod::Parser as an
+example of a correct implementation.
+
+=over
+
+=item C<IE<lt>textE<gt>> -- italic text
+
+See the brief discussion in L<perlpod/"Formatting Codes">.
+
+=item C<BE<lt>textE<gt>> -- bold text
+
+See the brief discussion in L<perlpod/"Formatting Codes">.
+
+=item C<CE<lt>codeE<gt>> -- code text
+
+See the brief discussion in L<perlpod/"Formatting Codes">.
+
+=item C<FE<lt>filenameE<gt>> -- style for filenames
+
+See the brief discussion in L<perlpod/"Formatting Codes">.
+
+=item C<XE<lt>topic nameE<gt>> -- an index entry
+
+See the brief discussion in L<perlpod/"Formatting Codes">.
+
+This code is unusual in that most formatters completely discard
+this code and its content. Other formatters will render it with
+invisible codes that can be used in building an index of
+the current document.
+
+=item C<ZE<lt>E<gt>> -- a null (zero-effect) formatting code
+
+Discussed briefly in L<perlpod/"Formatting Codes">.
+
+This code is unusual is that it should have no content. That is,
+a processor may complain if it sees C<ZE<lt>potatoesE<gt>>. Whether
+or not it complains, the I<potatoes> text should ignored.
+
+=item C<LE<lt>nameE<gt>> -- a hyperlink
+
+The complicated syntaxes of this code are discussed at length in
+L<perlpod/"Formatting Codes">, and implementation details are
+discussed below, in L</"About LE<lt>...E<gt> Codes">. Parsing the
+contents of LE<lt>content> is tricky. Notably, the content has to be
+checked for whether it looks like a URL, or whether it has to be split
+on literal "|" and/or "/" (in the right order!), and so on,
+I<before> EE<lt>...> codes are resolved.
+
+=item C<EE<lt>escapeE<gt>> -- a character escape
+
+See L<perlpod/"Formatting Codes">, and several points in
+L</Notes on Implementing Pod Processors>.
+
+=item C<SE<lt>textE<gt>> -- text contains non-breaking spaces
+
+This formatting code is syntactically simple, but semantically
+complex. What it means is that each space in the printable
+content of this code signifies a non-breaking space.
+
+Consider:
+
+ C<$x ? $y : $z>
+
+ S<C<$x ? $y : $z>>
+
+Both signify the monospace (c[ode] style) text consisting of
+"$x", one space, "?", one space, ":", one space, "$z". The
+difference is that in the latter, with the S code, those spaces
+are not "normal" spaces, but instead are non-breaking spaces.
+
+=back
+
+
+If a Pod processor sees any formatting code other than the ones
+listed above (as in "NE<lt>...>", or "QE<lt>...>", etc.), that
+processor must by default treat this as an error.
+A Pod parser may allow a way for particular
+applications to add to the above list of known formatting codes;
+a Pod parser might even allow a way to stipulate, for each additional
+command, whether it requires some form of special processing, as
+LE<lt>...> does.
+
+Future versions of this specification may add additional
+formatting codes.
+
+Historical note: A few older Pod processors would not see a ">" as
+closing a "CE<lt>" code, if the ">" was immediately preceded by
+a "-". This was so that this:
+
+ C<$foo->bar>
+
+would parse as equivalent to this:
+
+ C<$foo-E<gt>bar>
+
+instead of as equivalent to a "C" formatting code containing
+only "$foo-", and then a "bar>" outside the "C" formatting code. This
+problem has since been solved by the addition of syntaxes like this:
+
+ C<< $foo->bar >>
+
+Compliant parsers must not treat "->" as special.
+
+Formatting codes absolutely cannot span paragraphs. If a code is
+opened in one paragraph, and no closing code is found by the end of
+that paragraph, the Pod parser must close that formatting code,
+and should complain (as in "Unterminated I code in the paragraph
+starting at line 123: 'Time objects are not...'"). So these
+two paragraphs:
+
+ I<I told you not to do this!
+
+ Don't make me say it again!>
+
+...must I<not> be parsed as two paragraphs in italics (with the I
+code starting in one paragraph and starting in another.) Instead,
+the first paragraph should generate a warning, but that aside, the
+above code must parse as if it were:
+
+ I<I told you not to do this!>
+
+ Don't make me say it again!E<gt>
+
+(In SGMLish jargon, all Pod commands are like block-level
+elements, whereas all Pod formatting codes are like inline-level
+elements.)
+
+
+
+=head1 Notes on Implementing Pod Processors
+
+The following is a long section of miscellaneous requirements
+and suggestions to do with Pod processing.
+
+=over
+
+=item *
+
+Pod formatters should tolerate lines in verbatim blocks that are of
+any length, even if that means having to break them (possibly several
+times, for very long lines) to avoid text running off the side of the
+page. Pod formatters may warn of such line-breaking. Such warnings
+are particularly appropriate for lines are over 100 characters long, which
+are usually not intentional.
+
+=item *
+
+Pod parsers must recognize I<all> of the three well-known newline
+formats: CR, LF, and CRLF. See L<perlport|perlport>.
+
+=item *
+
+Pod parsers should accept input lines that are of any length.
+
+=item *
+
+Since Perl recognizes a Unicode Byte Order Mark at the start of files
+as signaling that the file is Unicode encoded as in UTF-16 (whether
+big-endian or little-endian) or UTF-8, Pod parsers should do the
+same. Otherwise, the character encoding should be understood as
+being UTF-8 if the first highbit byte sequence in the file seems
+valid as a UTF-8 sequence, or otherwise as Latin-1.
+
+Future versions of this specification may specify
+how Pod can accept other encodings. Presumably treatment of other
+encodings in Pod parsing would be as in XML parsing: whatever the
+encoding declared by a particular Pod file, content is to be
+stored in memory as Unicode characters.
+
+=item *
+
+The well known Unicode Byte Order Marks are as follows: if the
+file begins with the two literal byte values 0xFE 0xFF, this is
+the BOM for big-endian UTF-16. If the file begins with the two
+literal byte value 0xFF 0xFE, this is the BOM for little-endian
+UTF-16. If the file begins with the three literal byte values
+0xEF 0xBB 0xBF, this is the BOM for UTF-8.
+
+=for comment
+ use bytes; print map sprintf(" 0x%02X", ord $_), split '', "\x{feff}";
+ 0xEF 0xBB 0xBF
+
+=for comment
+ If toke.c is modified to support UTF-32, add mention of those here.
+
+=item *
+
+A naive but sufficient heuristic for testing the first highbit
+byte-sequence in a BOM-less file (whether in code or in Pod!), to see
+whether that sequence is valid as UTF-8 (RFC 2279) is to check whether
+that the first byte in the sequence is in the range 0xC0 - 0xFD
+I<and> whether the next byte is in the range
+0x80 - 0xBF. If so, the parser may conclude that this file is in
+UTF-8, and all highbit sequences in the file should be assumed to
+be UTF-8. Otherwise the parser should treat the file as being
+in Latin-1. In the unlikely circumstance that the first highbit
+sequence in a truly non-UTF-8 file happens to appear to be UTF-8, one
+can cater to our heuristic (as well as any more intelligent heuristic)
+by prefacing that line with a comment line containing a highbit
+sequence that is clearly I<not> valid as UTF-8. A line consisting
+of simply "#", an e-acute, and any non-highbit byte,
+is sufficient to establish this file's encoding.
+
+=for comment
+ If/WHEN some brave soul makes these heuristics into a generic
+ text-file class (or PerlIO layer?), we can presumably delete
+ mention of these icky details from this file, and can instead
+ tell people to just use appropriate class/layer.
+ Auto-recognition of newline sequences would be another desirable
+ feature of such a class/layer.
+ HINT HINT HINT.
+
+=for comment
+ "The probability that a string of characters
+ in any other encoding appears as valid UTF-8 is low" - RFC2279
+
+=item *
+
+This document's requirements and suggestions about encodings
+do not apply to Pod processors running on non-ASCII platforms,
+notably EBCDIC platforms.
+
+=item *
+
+Pod processors must treat a "=for [label] [content...]" paragraph as
+meaning the same thing as a "=begin [label]" paragraph, content, and
+an "=end [label]" paragraph. (The parser may conflate these two
+constructs, or may leave them distinct, in the expectation that the
+formatter will nevertheless treat them the same.)
+
+=item *
+
+When rendering Pod to a format that allows comments (i.e., to nearly
+any format other than plaintext), a Pod formatter must insert comment
+text identifying its name and version number, and the name and
+version numbers of any modules it might be using to process the Pod.
+Minimal examples:
+
+ %% POD::Pod2PS v3.14159, using POD::Parser v1.92
+
+ <!-- Pod::HTML v3.14159, using POD::Parser v1.92 -->
+
+ {\doccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08}
+
+ .\" Pod::Man version 3.14159, using POD::Parser version 1.92
+
+Formatters may also insert additional comments, including: the
+release date of the Pod formatter program, the contact address for
+the author(s) of the formatter, the current time, the name of input
+file, the formatting options in effect, version of Perl used, etc.
+
+Formatters may also choose to note errors/warnings as comments,
+besides or instead of emitting them otherwise (as in messages to
+STDERR, or C<die>ing).
+
+=item *
+
+Pod parsers I<may> emit warnings or error messages ("Unknown E code
+EE<lt>zslig>!") to STDERR (whether through printing to STDERR, or
+C<warn>ing/C<carp>ing, or C<die>ing/C<croak>ing), but I<must> allow
+suppressing all such STDERR output, and instead allow an option for
+reporting errors/warnings
+in some other way, whether by triggering a callback, or noting errors
+in some attribute of the document object, or some similarly unobtrusive
+mechanism -- or even by appending a "Pod Errors" section to the end of
+the parsed form of the document.
+
+=item *
+
+In cases of exceptionally aberrant documents, Pod parsers may abort the
+parse. Even then, using C<die>ing/C<croak>ing is to be avoided; where
+possible, the parser library may simply close the input file
+and add text like "*** Formatting Aborted ***" to the end of the
+(partial) in-memory document.
+
+=item *
+
+In paragraphs where formatting codes (like EE<lt>...>, BE<lt>...>)
+are understood (i.e., I<not> verbatim paragraphs, but I<including>
+ordinary paragraphs, and command paragraphs that produce renderable
+text, like "=head1"), literal whitespace should generally be considered
+"insignificant", in that one literal space has the same meaning as any
+(nonzero) number of literal spaces, literal newlines, and literal tabs
+(as long as this produces no blank lines, since those would terminate
+the paragraph). Pod parsers should compact literal whitespace in each
+processed paragraph, but may provide an option for overriding this
+(since some processing tasks do not require it), or may follow
+additional special rules (for example, specially treating
+period-space-space or period-newline sequences).
+
+=item *
+
+Pod parsers should not, by default, try to coerce apostrophe (') and
+quote (") into smart quotes (little 9's, 66's, 99's, etc), nor try to
+turn backtick (`) into anything else but a single backtick character
+(distinct from an open quote character!), nor "--" into anything but
+two minus signs. They I<must never> do any of those things to text
+in CE<lt>...> formatting codes, and never I<ever> to text in verbatim
+paragraphs.
+
+=item *
+
+When rendering Pod to a format that has two kinds of hyphens (-), one
+that's a non-breaking hyphen, and another that's a breakable hyphen
+(as in "object-oriented", which can be split across lines as
+"object-", newline, "oriented"), formatters are encouraged to
+generally translate "-" to non-breaking hyphen, but may apply
+heuristics to convert some of these to breaking hyphens.
+
+=item *
+
+Pod formatters should make reasonable efforts to keep words of Perl
+code from being broken across lines. For example, "Foo::Bar" in some
+formatting systems is seen as eligible for being broken across lines
+as "Foo::" newline "Bar" or even "Foo::-" newline "Bar". This should
+be avoided where possible, either by disabling all line-breaking in
+mid-word, or by wrapping particular words with internal punctuation
+in "don't break this across lines" codes (which in some formats may
+not be a single code, but might be a matter of inserting non-breaking
+zero-width spaces between every pair of characters in a word.)
+
+=item *
+
+Pod parsers should, by default, expand tabs in verbatim paragraphs as
+they are processed, before passing them to the formatter or other
+processor. Parsers may also allow an option for overriding this.
+
+=item *
+
+Pod parsers should, by default, remove newlines from the end of
+ordinary and verbatim paragraphs before passing them to the
+formatter. For example, while the paragraph you're reading now
+could be considered, in Pod source, to end with (and contain)
+the newline(s) that end it, it should be processed as ending with
+(and containing) the period character that ends this sentence.
+
+=item *
+
+Pod parsers, when reporting errors, should make some effort to report
+an approximate line number ("Nested EE<lt>>'s in Paragraph #52, near
+line 633 of Thing/Foo.pm!"), instead of merely noting the paragraph
+number ("Nested EE<lt>>'s in Paragraph #52 of Thing/Foo.pm!"). Where
+this is problematic, the paragraph number should at least be
+accompanied by an excerpt from the paragraph ("Nested EE<lt>>'s in
+Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for
+the CE<lt>interest rate> attribute...'").
+
+=item *
+
+Pod parsers, when processing a series of verbatim paragraphs one
+after another, should consider them to be one large verbatim
+paragraph that happens to contain blank lines. I.e., these two
+lines, which have a blank line between them:
+
+ use Foo;
+
+ print Foo->VERSION
+
+should be unified into one paragraph ("\tuse Foo;\n\n\tprint
+Foo->VERSION") before being passed to the formatter or other
+processor. Parsers may also allow an option for overriding this.
+
+While this might be too cumbersome to implement in event-based Pod
+parsers, it is straightforward for parsers that return parse trees.
+
+=item *
+
+Pod formatters, where feasible, are advised to avoid splitting short
+verbatim paragraphs (under twelve lines, say) across pages.
+
+=item *
+
+Pod parsers must treat a line with only spaces and/or tabs on it as a
+"blank line" such as separates paragraphs. (Some older parsers
+recognized only two adjacent newlines as a "blank line" but would not
+recognize a newline, a space, and a newline, as a blank line. This
+is noncompliant behavior.)
+
+=item *
+
+Authors of Pod formatters/processors should make every effort to
+avoid writing their own Pod parser. There are already several in
+CPAN, with a wide range of interface styles -- and one of them,
+Pod::Parser, comes with modern versions of Perl.
+
+=item *
+
+Characters in Pod documents may be conveyed either as literals, or by
+number in EE<lt>n> codes, or by an equivalent mnemonic, as in
+EE<lt>eacute> which is exactly equivalent to EE<lt>233>.
+
+Characters in the range 32-126 refer to those well known US-ASCII
+characters (also defined there by Unicode, with the same meaning),
+which all Pod formatters must render faithfully. Characters
+in the ranges 0-31 and 127-159 should not be used (neither as
+literals, nor as EE<lt>number> codes), except for the
+literal byte-sequences for newline (13, 13 10, or 10), and tab (9).
+
+Characters in the range 160-255 refer to Latin-1 characters (also
+defined there by Unicode, with the same meaning). Characters above
+255 should be understood to refer to Unicode characters.
+
+=item *
+
+Be warned
+that some formatters cannot reliably render characters outside 32-126;
+and many are able to handle 32-126 and 160-255, but nothing above
+255.
+
+=item *
+
+Besides the well-known "EE<lt>lt>" and "EE<lt>gt>" codes for
+less-than and greater-than, Pod parsers must understand "EE<lt>sol>"
+for "/" (solidus, slash), and "EE<lt>verbar>" for "|" (vertical bar,
+pipe). Pod parsers should also understand "EE<lt>lchevron>" and
+"EE<lt>rchevron>" as legacy codes for characters 171 and 187, i.e.,
+"left-pointing double angle quotation mark" = "left pointing
+guillemet" and "right-pointing double angle quotation mark" = "right
+pointing guillemet". (These look like little "<<" and ">>", and they
+are now preferably expressed with the HTML/XHTML codes "EE<lt>laquo>"
+and "EE<lt>raquo>".)
+
+=item *
+
+Pod parsers should understand all "EE<lt>html>" codes as defined
+in the entity declarations in the most recent XHTML specification at
+C<www.W3.org>. Pod parsers must understand at least the entities
+that define characters in the range 160-255 (Latin-1). Pod parsers,
+when faced with some unknown "EE<lt>I<identifier>>" code,
+shouldn't simply replace it with nullstring (by default, at least),
+but may pass it through as a string consisting of the literal characters
+E, less-than, I<identifier>, greater-than. Or Pod parsers may offer the
+alternative option of processing such unknown
+"EE<lt>I<identifier>>" codes by firing an event especially
+for such codes, or by adding a special node-type to the in-memory
+document tree. Such "EE<lt>I<identifier>>" may have special meaning
+to some processors, or some processors may choose to add them to
+a special error report.
+
+=item *
+
+Pod parsers must also support the XHTML codes "EE<lt>quot>" for
+character 34 (doublequote, "), "EE<lt>amp>" for character 38
+(ampersand, &), and "EE<lt>apos>" for character 39 (apostrophe, ').
+
+=item *
+
+Note that in all cases of "EE<lt>whatever>", I<whatever> (whether
+an htmlname, or a number in any base) must consist only of
+alphanumeric characters -- that is, I<whatever> must watch
+C<m/\A\w+\z/>. So "EE<lt> 0 1 2 3 >" is invalid, because
+it contains spaces, which aren't alphanumeric characters. This
+presumably does not I<need> special treatment by a Pod processor;
+" 0 1 2 3 " doesn't look like a number in any base, so it would
+presumably be looked up in the table of HTML-like names. Since
+there isn't (and cannot be) an HTML-like entity called " 0 1 2 3 ",
+this will be treated as an error. However, Pod processors may
+treat "EE<lt> 0 1 2 3 >" or "EE<lt>e-acute>" as I<syntactically>
+invalid, potentially earning a different error message than the
+error message (or warning, or event) generated by a merely unknown
+(but theoretically valid) htmlname, as in "EE<lt>qacute>"
+[sic]. However, Pod parsers are not required to make this
+distinction.
+
+=item *
+
+Note that EE<lt>number> I<must not> be interpreted as simply
+"codepoint I<number> in the current/native character set". It always
+means only "the character represented by codepoint I<number> in
+Unicode." (This is identical to the semantics of &#I<number>; in XML.)
+
+This will likely require many formatters to have tables mapping from
+treatable Unicode codepoints (such as the "\xE9" for the e-acute
+character) to the escape sequences or codes necessary for conveying
+such sequences in the target output format. A converter to *roff
+would, for example know that "\xE9" (whether conveyed literally, or via
+a EE<lt>...> sequence) is to be conveyed as "e\\*'".
+Similarly, a program rendering Pod in a Mac OS application window, would
+presumably need to know that "\xE9" maps to codepoint 142 in MacRoman
+encoding that (at time of writing) is native for Mac OS. Such
+Unicode2whatever mappings are presumably already widely available for
+common output formats. (Such mappings may be incomplete! Implementers
+are not expected to bend over backwards in an attempt to render
+Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any
+of the other weird things that Unicode can encode.) And
+if a Pod document uses a character not found in such a mapping, the
+formatter should consider it an unrenderable character.
+
+=item *
+
+If, surprisingly, the implementor of a Pod formatter can't find a
+satisfactory pre-existing table mapping from Unicode characters to
+escapes in the target format (e.g., a decent table of Unicode
+characters to *roff escapes), it will be necessary to build such a
+table. If you are in this circumstance, you should begin with the
+characters in the range 0x00A0 - 0x00FF, which is mostly the heavily
+used accented characters. Then proceed (as patience permits and
+fastidiousness compels) through the characters that the (X)HTML
+standards groups judged important enough to merit mnemonics
+for. These are declared in the (X)HTML specifications at the
+www.W3.org site. At time of writing (September 2001), the most recent
+entity declaration files are:
+
+ http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
+ http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
+ http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
+
+Then you can progress through any remaining notable Unicode characters
+in the range 0x2000-0x204D (consult the character tables at
+www.unicode.org), and whatever else strikes your fancy. For example,
+in F<xhtml-symbol.ent>, there is the entry:
+
+ <!ENTITY infin "&#8734;"> <!-- infinity, U+221E ISOtech -->
+
+While the mapping "infin" to the character "\x{221E}" will (hopefully)
+have been already handled by the Pod parser, the presence of the
+character in this file means that it's reasonably important enough to
+include in a formatter's table that maps from notable Unicode characters
+to the codes necessary for rendering them. So for a Unicode-to-*roff
+mapping, for example, this would merit the entry:
+
+ "\x{221E}" => '\(in',
+
+It is eagerly hoped that in the future, increasing numbers of formats
+(and formatters) will support Unicode characters directly (as (X)HTML
+does with C<&infin;>, C<&#8734;>, or C<&#x221E;>), reducing the need
+for idiosyncratic mappings of Unicode-to-I<my_escapes>.
+
+=item *
+
+It is up to individual Pod formatter to display good judgement when
+confronted with an unrenderable character (which is distinct from an
+unknown EE<lt>thing> sequence that the parser couldn't resolve to
+anything, renderable or not). It is good practice to map Latin letters
+with diacritics (like "EE<lt>eacute>"/"EE<lt>233>") to the corresponding
+unaccented US-ASCII letters (like a simple character 101, "e"), but
+clearly this is often not feasible, and an unrenderable character may
+be represented as "?", or the like. In attempting a sane fallback
+(as from EE<lt>233> to "e"), Pod formatters may use the
+%Latin1Code_to_fallback table in L<Pod::Escapes|Pod::Escapes>, or
+L<Text::Unidecode|Text::Unidecode>, if available.
+
+For example, this Pod text:
+
+ magic is enabled if you set C<$Currency> to 'E<euro>'.
+
+may be rendered as:
+"magic is enabled if you set C<$Currency> to 'I<?>'" or as
+"magic is enabled if you set C<$Currency> to 'B<[euro]>'", or as
+"magic is enabled if you set C<$Currency> to '[x20AC]', etc.
+
+A Pod formatter may also note, in a comment or warning, a list of what
+unrenderable characters were encountered.
+
+=item *
+
+EE<lt>...> may freely appear in any formatting code (other than
+in another EE<lt>...> or in an ZE<lt>>). That is, "XE<lt>The
+EE<lt>euro>1,000,000 Solution>" is valid, as is "LE<lt>The
+EE<lt>euro>1,000,000 Solution|Million::Euros>".
+
+=item *
+
+Some Pod formatters output to formats that implement non-breaking
+spaces as an individual character (which I'll call "NBSP"), and
+others output to formats that implement non-breaking spaces just as
+spaces wrapped in a "don't break this across lines" code. Note that
+at the level of Pod, both sorts of codes can occur: Pod can contain a
+NBSP character (whether as a literal, or as a "EE<lt>160>" or
+"EE<lt>nbsp>" code); and Pod can contain "SE<lt>foo
+IE<lt>barE<gt> baz>" codes, where "mere spaces" (character 32) in
+such codes are taken to represent non-breaking spaces. Pod
+parsers should consider supporting the optional parsing of "SE<lt>foo
+IE<lt>barE<gt> baz>" as if it were
+"fooI<NBSP>IE<lt>barE<gt>I<NBSP>baz", and, going the other way, the
+optional parsing of groups of words joined by NBSP's as if each group
+were in a SE<lt>...> code, so that formatters may use the
+representation that maps best to what the output format demands.
+
+=item *
+
+Some processors may find that the C<SE<lt>...E<gt>> code is easiest to
+implement by replacing each space in the parse tree under the content
+of the S, with an NBSP. But note: the replacement should apply I<not> to
+spaces in I<all> text, but I<only> to spaces in I<printable> text. (This
+distinction may or may not be evident in the particular tree/event
+model implemented by the Pod parser.) For example, consider this
+unusual case:
+
+ S<L</Autoloaded Functions>>
+
+This means that the space in the middle of the visible link text must
+not be broken across lines. In other words, it's the same as this:
+
+ L<"AutoloadedE<160>Functions"/Autoloaded Functions>
+
+However, a misapplied space-to-NBSP replacement could (wrongly)
+produce something equivalent to this:
+
+ L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions>
+
+...which is almost definitely not going to work as a hyperlink (assuming
+this formatter outputs a format supporting hypertext).
+
+Formatters may choose to just not support the S format code,
+especially in cases where the output format simply has no NBSP
+character/code and no code for "don't break this stuff across lines".
+
+=item *
+
+Besides the NBSP character discussed above, implementors are reminded
+of the existence of the other "special" character in Latin-1, the
+"soft hyphen" character, also known as "discretionary hyphen",
+i.e. C<EE<lt>173E<gt>> = C<EE<lt>0xADE<gt>> =
+C<EE<lt>shyE<gt>>). This character expresses an optional hyphenation
+point. That is, it normally renders as nothing, but may render as a
+"-" if a formatter breaks the word at that point. Pod formatters
+should, as appropriate, do one of the following: 1) render this with
+a code with the same meaning (e.g., "\-" in RTF), 2) pass it through
+in the expectation that the formatter understands this character as
+such, or 3) delete it.
+
+For example:
+
+ sigE<shy>action
+ manuE<shy>script
+ JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi
+
+These signal to a formatter that if it is to hyphenate "sigaction"
+or "manuscript", then it should be done as
+"sig-I<[linebreak]>action" or "manu-I<[linebreak]>script"
+(and if it doesn't hyphenate it, then the C<EE<lt>shyE<gt>> doesn't
+show up at all). And if it is
+to hyphenate "Jarkko" and/or "Hietaniemi", it can do
+so only at the points where there is a C<EE<lt>shyE<gt>> code.
+
+In practice, it is anticipated that this character will not be used
+often, but formatters should either support it, or delete it.
+
+=item *
+
+If you think that you want to add a new command to Pod (like, say, a
+"=biblio" command), consider whether you could get the same
+effect with a for or begin/end sequence: "=for biblio ..." or "=begin
+biblio" ... "=end biblio". Pod processors that don't understand
+"=for biblio", etc, will simply ignore it, whereas they may complain
+loudly if they see "=biblio".
+
+=item *
+
+Throughout this document, "Pod" has been the preferred spelling for
+the name of the documentation format. One may also use "POD" or
+"pod". For the documentation that is (typically) in the Pod
+format, you may use "pod", or "Pod", or "POD". Understanding these
+distinctions is useful; but obsessing over how to spell them, usually
+is not.
+
+=back
+
+
+
+
+
+=head1 About LE<lt>...E<gt> Codes
+
+As you can tell from a glance at L<perlpod|perlpod>, the LE<lt>...>
+code is the most complex of the Pod formatting codes. The points below
+will hopefully clarify what it means and how processors should deal
+with it.
+
+=over
+
+=item *
+
+In parsing an LE<lt>...> code, Pod parsers must distinguish at least
+four attributes:
+
+=over
+
+=item First:
+
+The link-text. If there is none, this must be undef. (E.g., in
+"LE<lt>Perl Functions|perlfunc>", the link-text is "Perl Functions".
+In "LE<lt>Time::HiRes>" and even "LE<lt>|Time::HiRes>", there is no
+link text. Note that link text may contain formatting.)
+
+=item Second:
+
+The possibly inferred link-text -- i.e., if there was no real link
+text, then this is the text that we'll infer in its place. (E.g., for
+"LE<lt>Getopt::Std>", the inferred link text is "Getopt::Std".)
+
+=item Third:
+
+The name or URL, or undef if none. (E.g., in "LE<lt>Perl
+Functions|perlfunc>", the name -- also sometimes called the page --
+is "perlfunc". In "LE<lt>/CAVEATS>", the name is undef.)
+
+=item Fourth:
+
+The section (AKA "item" in older perlpods), or undef if none. E.g.,
+in "LE<lt>Getopt::Std/DESCRIPTIONE<gt>", "DESCRIPTION" is the section. (Note
+that this is not the same as a manpage section like the "5" in "man 5
+crontab". "Section Foo" in the Pod sense means the part of the text
+that's introduced by the heading or item whose text is "Foo".)
+
+=back
+
+Pod parsers may also note additional attributes including:
+
+=over
+
+=item Fifth:
+
+A flag for whether item 3 (if present) is a URL (like
+"http://lists.perl.org" is), in which case there should be no section
+attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or
+possibly a man page name (like "crontab(5)" is).
+
+=item Sixth:
+
+The raw original LE<lt>...> content, before text is split on
+"|", "/", etc, and before EE<lt>...> codes are expanded.
+
+=back
+
+(The above were numbered only for concise reference below. It is not
+a requirement that these be passed as an actual list or array.)
+
+For example:
+
+ L<Foo::Bar>
+ => undef, # link text
+ "Foo::Bar", # possibly inferred link text
+ "Foo::Bar", # name
+ undef, # section
+ 'pod', # what sort of link
+ "Foo::Bar" # original content
+
+ L<Perlport's section on NL's|perlport/Newlines>
+ => "Perlport's section on NL's", # link text
+ "Perlport's section on NL's", # possibly inferred link text
+ "perlport", # name
+ "Newlines", # section
+ 'pod', # what sort of link
+ "Perlport's section on NL's|perlport/Newlines" # orig. content
+
+ L<perlport/Newlines>
+ => undef, # link text
+ '"Newlines" in perlport', # possibly inferred link text
+ "perlport", # name
+ "Newlines", # section
+ 'pod', # what sort of link
+ "perlport/Newlines" # original content
+
+ L<crontab(5)/"DESCRIPTION">
+ => undef, # link text
+ '"DESCRIPTION" in crontab(5)', # possibly inferred link text
+ "crontab(5)", # name
+ "DESCRIPTION", # section
+ 'man', # what sort of link
+ 'crontab(5)/"DESCRIPTION"' # original content
+
+ L</Object Attributes>
+ => undef, # link text
+ '"Object Attributes"', # possibly inferred link text
+ undef, # name
+ "Object Attributes", # section
+ 'pod', # what sort of link
+ "/Object Attributes" # original content
+
+ L<http://www.perl.org/>
+ => undef, # link text
+ "http://www.perl.org/", # possibly inferred link text
+ "http://www.perl.org/", # name
+ undef, # section
+ 'url', # what sort of link
+ "http://www.perl.org/" # original content
+
+Note that you can distinguish URL-links from anything else by the
+fact that they match C<m/\A\w+:[^:\s]\S*\z/>. So
+C<LE<lt>http://www.perl.comE<gt>> is a URL, but
+C<LE<lt>HTTP::ResponseE<gt>> isn't.
+
+=item *
+
+In case of LE<lt>...> codes with no "text|" part in them,
+older formatters have exhibited great variation in actually displaying
+the link or cross reference. For example, LE<lt>crontab(5)> would render
+as "the C<crontab(5)> manpage", or "in the C<crontab(5)> manpage"
+or just "C<crontab(5)>".
+
+Pod processors must now treat "text|"-less links as follows:
+
+ L<name> => L<name|name>
+ L</section> => L<"section"|/section>
+ L<name/section> => L<"section" in name|name/section>
+
+=item *
+
+Note that section names might contain markup. I.e., if a section
+starts with:
+
+ =head2 About the C<-M> Operator
+
+or with:
+
+ =item About the C<-M> Operator
+
+then a link to it would look like this:
+
+ L<somedoc/About the C<-M> Operator>
+
+Formatters may choose to ignore the markup for purposes of resolving
+the link and use only the renderable characters in the section name,
+as in:
+
+ <h1><a name="About_the_-M_Operator">About the <code>-M</code>
+ Operator</h1>
+
+ ...
+
+ <a href="somedoc#About_the_-M_Operator">About the <code>-M</code>
+ Operator" in somedoc</a>
+
+=item *
+
+Previous versions of perlpod distinguished C<LE<lt>name/"section"E<gt>>
+links from C<LE<lt>name/itemE<gt>> links (and their targets). These
+have been merged syntactically and semantically in the current
+specification, and I<section> can refer either to a "=headI<n> Heading
+Content" command or to a "=item Item Content" command. This
+specification does not specify what behavior should be in the case
+of a given document having several things all seeming to produce the
+same I<section> identifier (e.g., in HTML, several things all producing
+the same I<anchorname> in <a name="I<anchorname>">...</a>
+elements). Where Pod processors can control this behavior, they should
+use the first such anchor. That is, C<LE<lt>Foo/BarE<gt>> refers to the
+I<first> "Bar" section in Foo.
+
+But for some processors/formats this cannot be easily controlled; as
+with the HTML example, the behavior of multiple ambiguous
+<a name="I<anchorname>">...</a> is most easily just left up to
+browsers to decide.
+
+=item *
+
+Authors wanting to link to a particular (absolute) URL, must do so
+only with "LE<lt>scheme:...>" codes (like
+LE<lt>http://www.perl.org>), and must not attempt "LE<lt>Some Site
+Name|scheme:...>" codes. This restriction avoids many problems
+in parsing and rendering LE<lt>...> codes.
+
+=item *
+
+In a C<LE<lt>text|...E<gt>> code, text may contain formatting codes
+for formatting or for EE<lt>...> escapes, as in:
+
+ L<B<ummE<234>stuff>|...>
+
+For C<LE<lt>...E<gt>> codes without a "name|" part, only
+C<EE<lt>...E<gt>> and C<ZE<lt>E<gt>> codes may occur -- no
+other formatting codes. That is, authors should not use
+"C<LE<lt>BE<lt>Foo::BarE<gt>E<gt>>".
+
+Note, however, that formatting codes and ZE<lt>>'s can occur in any
+and all parts of an LE<lt>...> (i.e., in I<name>, I<section>, I<text>,
+and I<url>).
+
+Authors must not nest LE<lt>...> codes. For example, "LE<lt>The
+LE<lt>Foo::Bar> man page>" should be treated as an error.
+
+=item *
+
+Note that Pod authors may use formatting codes inside the "text"
+part of "LE<lt>text|name>" (and so on for LE<lt>text|/"sec">).
+
+In other words, this is valid:
+
+ Go read L<the docs on C<$.>|perlvar-copy/"$.">
+
+Some output formats that do allow rendering "LE<lt>...>" codes as
+hypertext, might not allow the link-text to be formatted; in
+that case, formatters will have to just ignore that formatting.
+
+=item *
+
+At time of writing, C<LE<lt>nameE<gt>> values are of two types:
+either the name of a Pod page like C<LE<lt>Foo::BarE<gt>> (which
+might be a real Perl module or program in an @INC / PATH
+directory, or a .pod file in those places); or the name of a UNIX
+man page, like C<LE<lt>crontab(5)E<gt>>. In theory, C<LE<lt>chmodE<gt>>
+in ambiguous between a Pod page called "chmod", or the Unix man page
+"chmod" (in whatever man-section). However, the presence of a string
+in parens, as in "crontab(5)", is sufficient to signal that what
+is being discussed is not a Pod page, and so is presumably a
+UNIX man page. The distinction is of no importance to many
+Pod processors, but some processors that render to hypertext formats
+may need to distinguish them in order to know how to render a
+given C<LE<lt>fooE<gt>> code.
+
+=item *
+
+Previous versions of perlpod allowed for a C<LE<lt>sectionE<gt>> syntax
+(as in C<LE<lt>Object AttributesE<gt>>), which was not easily distinguishable
+from C<LE<lt>nameE<gt>> syntax. This syntax is no longer in the
+specification, and has been replaced by the C<LE<lt>"section"E<gt>> syntax
+(where the quotes were formerly optional). Pod parsers should tolerate
+the C<LE<lt>sectionE<gt>> syntax, for a while at least. The suggested
+heuristic for distinguishing C<LE<lt>sectionE<gt>> from C<LE<lt>nameE<gt>>
+is that if it contains any whitespace, it's a I<section>. Pod processors
+may warn about this being deprecated syntax.
+
+=back
+
+=head1 About =over...=back Regions
+
+"=over"..."=back" regions are used for various kinds of list-like
+structures. (I use the term "region" here simply as a collective
+term for everything from the "=over" to the matching "=back".)
+
+=over
+
+=item *
+
+The non-zero numeric I<indentlevel> in "=over I<indentlevel>" ...
+"=back" is used for giving the formatter a clue as to how many
+"spaces" (ems, or roughly equivalent units) it should tab over,
+although many formatters will have to convert this to an absolute
+measurement that may not exactly match with the size of spaces (or M's)
+in the document's base font. Other formatters may have to completely
+ignore the number. The lack of any explicit I<indentlevel> parameter is
+equivalent to an I<indentlevel> value of 4. Pod processors may
+complain if I<indentlevel> is present but is not a positive number
+matching C<m/\A(\d*\.)?\d+\z/>.
+
+=item *
+
+Authors of Pod formatters are reminded that "=over" ... "=back" may
+map to several different constructs in your output format. For
+example, in converting Pod to (X)HTML, it can map to any of
+<ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or
+<blockquote>...</blockquote>. Similarly, "=item" can map to <li> or
+<dt>.
+
+=item *
+
+Each "=over" ... "=back" region should be one of the following:
+
+=over
+
+=item *
+
+An "=over" ... "=back" region containing only "=item *" commands,
+each followed by some number of ordinary/verbatim paragraphs, other
+nested "=over" ... "=back" regions, "=for..." paragraphs, and
+"=begin"..."=end" regions.
+
+(Pod processors must tolerate a bare "=item" as if it were "=item
+*".) Whether "*" is rendered as a literal asterisk, an "o", or as
+some kind of real bullet character, is left up to the Pod formatter,
+and may depend on the level of nesting.
+
+=item *
+
+An "=over" ... "=back" region containing only
+C<m/\A=item\s+\d+\.?\s*\z/> paragraphs, each one (or each group of them)
+followed by some number of ordinary/verbatim paragraphs, other nested
+"=over" ... "=back" regions, "=for..." paragraphs, and/or
+"=begin"..."=end" codes. Note that the numbers must start at 1
+in each section, and must proceed in order and without skipping
+numbers.
+
+(Pod processors must tolerate lines like "=item 1" as if they were
+"=item 1.", with the period.)
+
+=item *
+
+An "=over" ... "=back" region containing only "=item [text]"
+commands, each one (or each group of them) followed by some number of
+ordinary/verbatim paragraphs, other nested "=over" ... "=back"
+regions, or "=for..." paragraphs, and "=begin"..."=end" regions.
+
+The "=item [text]" paragraph should not match
+C<m/\A=item\s+\d+\.?\s*\z/> or C<m/\A=item\s+\*\s*\z/>, nor should it
+match just C<m/\A=item\s*\z/>.
+
+=item *
+
+An "=over" ... "=back" region containing no "=item" paragraphs at
+all, and containing only some number of
+ordinary/verbatim paragraphs, and possibly also some nested "=over"
+... "=back" regions, "=for..." paragraphs, and "=begin"..."=end"
+regions. Such an itemless "=over" ... "=back" region in Pod is
+equivalent in meaning to a "<blockquote>...</blockquote>" element in
+HTML.
+
+=back
+
+Note that with all the above cases, you can determine which type of
+"=over" ... "=back" you have, by examining the first (non-"=cut",
+non-"=pod") Pod paragraph after the "=over" command.
+
+=item *
+
+Pod formatters I<must> tolerate arbitrarily large amounts of text
+in the "=item I<text...>" paragraph. In practice, most such
+paragraphs are short, as in:
+
+ =item For cutting off our trade with all parts of the world
+
+But they may be arbitrarily long:
+
+ =item For transporting us beyond seas to be tried for pretended
+ offenses
+
+ =item He is at this time transporting large armies of foreign
+ mercenaries to complete the works of death, desolation and
+ tyranny, already begun with circumstances of cruelty and perfidy
+ scarcely paralleled in the most barbarous ages, and totally
+ unworthy the head of a civilized nation.
+
+=item *
+
+Pod processors should tolerate "=item *" / "=item I<number>" commands
+with no accompanying paragraph. The middle item is an example:
+
+ =over
+
+ =item 1
+
+ Pick up dry cleaning.
+
+ =item 2
+
+ =item 3
+
+ Stop by the store. Get Abba Zabas, Stoli, and cheap lawn chairs.
+
+ =back
+
+=item *
+
+No "=over" ... "=back" region can contain headings. Processors may
+treat such a heading as an error.
+
+=item *
+
+Note that an "=over" ... "=back" region should have some
+content. That is, authors should not have an empty region like this:
+
+ =over
+
+ =back
+
+Pod processors seeing such a contentless "=over" ... "=back" region,
+may ignore it, or may report it as an error.
+
+=item *
+
+Processors must tolerate an "=over" list that goes off the end of the
+document (i.e., which has no matching "=back"), but they may warn
+about such a list.
+
+=item *
+
+Authors of Pod formatters should note that this construct:
+
+ =item Neque
+
+ =item Porro
+
+ =item Quisquam Est
+
+ Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
+ velit, sed quia non numquam eius modi tempora incidunt ut
+ labore et dolore magnam aliquam quaerat voluptatem.
+
+ =item Ut Enim
+
+is semantically ambiguous, in a way that makes formatting decisions
+a bit difficult. On the one hand, it could be mention of an item
+"Neque", mention of another item "Porro", and mention of another
+item "Quisquam Est", with just the last one requiring the explanatory
+paragraph "Qui dolorem ipsum quia dolor..."; and then an item
+"Ut Enim". In that case, you'd want to format it like so:
+
+ Neque
+
+ Porro
+
+ Quisquam Est
+ Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
+ velit, sed quia non numquam eius modi tempora incidunt ut
+ labore et dolore magnam aliquam quaerat voluptatem.
+
+ Ut Enim
+
+But it could equally well be a discussion of three (related or equivalent)
+items, "Neque", "Porro", and "Quisquam Est", followed by a paragraph
+explaining them all, and then a new item "Ut Enim". In that case, you'd
+probably want to format it like so:
+
+ Neque
+ Porro
+ Quisquam Est
+ Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
+ velit, sed quia non numquam eius modi tempora incidunt ut
+ labore et dolore magnam aliquam quaerat voluptatem.
+
+ Ut Enim
+
+But (for the foreseeable future), Pod does not provide any way for Pod
+authors to distinguish which grouping is meant by the above
+"=item"-cluster structure. So formatters should format it like so:
+
+ Neque
+
+ Porro
+
+ Quisquam Est
+
+ Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
+ velit, sed quia non numquam eius modi tempora incidunt ut
+ labore et dolore magnam aliquam quaerat voluptatem.
+
+ Ut Enim
+
+That is, there should be (at least roughly) equal spacing between
+items as between paragraphs (although that spacing may well be less
+than the full height of a line of text). This leaves it to the reader
+to use (con)textual cues to figure out whether the "Qui dolorem
+ipsum..." paragraph applies to the "Quisquam Est" item or to all three
+items "Neque", "Porro", and "Quisquam Est". While not an ideal
+situation, this is preferable to providing formatting cues that may
+be actually contrary to the author's intent.
+
+=back
+
+
+
+=head1 About Data Paragraphs and "=begin/=end" Regions
+
+Data paragraphs are typically used for inlining non-Pod data that is
+to be used (typically passed through) when rendering the document to
+a specific format:
+
+ =begin rtf
+
+ \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par}
+
+ =end rtf
+
+The exact same effect could, incidentally, be achieved with a single
+"=for" paragraph:
+
+ =for rtf \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par}
+
+(Although that is not formally a data paragraph, it has the same
+meaning as one, and Pod parsers may parse it as one.)
+
+Another example of a data paragraph:
+
+ =begin html
+
+ I like <em>PIE</em>!
+
+ <hr>Especially pecan pie!
+
+ =end html
+
+If these were ordinary paragraphs, the Pod parser would try to
+expand the "EE<lt>/em>" (in the first paragraph) as a formatting
+code, just like "EE<lt>lt>" or "EE<lt>eacute>". But since this
+is in a "=begin I<identifier>"..."=end I<identifier>" region I<and>
+the identifier "html" doesn't begin have a ":" prefix, the contents
+of this region are stored as data paragraphs, instead of being
+processed as ordinary paragraphs (or if they began with a spaces
+and/or tabs, as verbatim paragraphs).
+
+As a further example: At time of writing, no "biblio" identifier is
+supported, but suppose some processor were written to recognize it as
+a way of (say) denoting a bibliographic reference (necessarily
+containing formatting codes in ordinary paragraphs). The fact that
+"biblio" paragraphs were meant for ordinary processing would be
+indicated by prefacing each "biblio" identifier with a colon:
+
+ =begin :biblio
+
+ Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
+ Programs.> Prentice-Hall, Englewood Cliffs, NJ.
+
+ =end :biblio
+
+This would signal to the parser that paragraphs in this begin...end
+region are subject to normal handling as ordinary/verbatim paragraphs
+(while still tagged as meant only for processors that understand the
+"biblio" identifier). The same effect could be had with:
+
+ =for :biblio
+ Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
+ Programs.> Prentice-Hall, Englewood Cliffs, NJ.
+
+The ":" on these identifiers means simply "process this stuff
+normally, even though the result will be for some special target".
+I suggest that parser APIs report "biblio" as the target identifier,
+but also report that it had a ":" prefix. (And similarly, with the
+above "html", report "html" as the target identifier, and note the
+I<lack> of a ":" prefix.)
+
+Note that a "=begin I<identifier>"..."=end I<identifier>" region where
+I<identifier> begins with a colon, I<can> contain commands. For example:
+
+ =begin :biblio
+
+ Wirth's classic is available in several editions, including:
+
+ =for comment
+ hm, check abebooks.com for how much used copies cost.
+
+ =over
+
+ =item
+
+ Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.>
+ Teubner, Stuttgart. [Yes, it's in German.]
+
+ =item
+
+ Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
+ Programs.> Prentice-Hall, Englewood Cliffs, NJ.
+
+ =back
+
+ =end :biblio
+
+Note, however, a "=begin I<identifier>"..."=end I<identifier>"
+region where I<identifier> does I<not> begin with a colon, should not
+directly contain "=head1" ... "=head4" commands, nor "=over", nor "=back",
+nor "=item". For example, this may be considered invalid:
+
+ =begin somedata
+
+ This is a data paragraph.
+
+ =head1 Don't do this!
+
+ This is a data paragraph too.
+
+ =end somedata
+
+A Pod processor may signal that the above (specifically the "=head1"
+paragraph) is an error. Note, however, that the following should
+I<not> be treated as an error:
+
+ =begin somedata
+
+ This is a data paragraph.
+
+ =cut
+
+ # Yup, this isn't Pod anymore.
+ sub excl { (rand() > .5) ? "hoo!" : "hah!" }
+
+ =pod
+
+ This is a data paragraph too.
+
+ =end somedata
+
+And this too is valid:
+
+ =begin someformat
+
+ This is a data paragraph.
+
+ And this is a data paragraph.
+
+ =begin someotherformat
+
+ This is a data paragraph too.
+
+ And this is a data paragraph too.
+
+ =begin :yetanotherformat
+
+ =head2 This is a command paragraph!
+
+ This is an ordinary paragraph!
+
+ And this is a verbatim paragraph!
+
+ =end :yetanotherformat
+
+ =end someotherformat
+
+ Another data paragraph!
+
+ =end someformat
+
+The contents of the above "=begin :yetanotherformat" ...
+"=end :yetanotherformat" region I<aren't> data paragraphs, because
+the immediately containing region's identifier (":yetanotherformat")
+begins with a colon. In practice, most regions that contain
+data paragraphs will contain I<only> data paragraphs; however,
+the above nesting is syntactically valid as Pod, even if it is
+rare. However, the handlers for some formats, like "html",
+will accept only data paragraphs, not nested regions; and they may
+complain if they see (targeted for them) nested regions, or commands,
+other than "=end", "=pod", and "=cut".
+
+Also consider this valid structure:
+
+ =begin :biblio
+
+ Wirth's classic is available in several editions, including:
+
+ =over
+
+ =item
+
+ Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.>
+ Teubner, Stuttgart. [Yes, it's in German.]
+
+ =item
+
+ Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
+ Programs.> Prentice-Hall, Englewood Cliffs, NJ.
+
+ =back
+
+ Buy buy buy!
+
+ =begin html
+
+ <img src='wirth_spokesmodeling_book.png'>
+
+ <hr>
+
+ =end html
+
+ Now now now!
+
+ =end :biblio
+
+There, the "=begin html"..."=end html" region is nested inside
+the larger "=begin :biblio"..."=end :biblio" region. Note that the
+content of the "=begin html"..."=end html" region is data
+paragraph(s), because the immediately containing region's identifier
+("html") I<doesn't> begin with a colon.
+
+Pod parsers, when processing a series of data paragraphs one
+after another (within a single region), should consider them to
+be one large data paragraph that happens to contain blank lines. So
+the content of the above "=begin html"..."=end html" I<may> be stored
+as two data paragraphs (one consisting of
+"<img src='wirth_spokesmodeling_book.png'>\n"
+and another consisting of "<hr>\n"), but I<should> be stored as
+a single data paragraph (consisting of
+"<img src='wirth_spokesmodeling_book.png'>\n\n<hr>\n").
+
+Pod processors should tolerate empty
+"=begin I<something>"..."=end I<something>" regions,
+empty "=begin :I<something>"..."=end :I<something>" regions, and
+contentless "=for I<something>" and "=for :I<something>"
+paragraphs. I.e., these should be tolerated:
+
+ =for html
+
+ =begin html
+
+ =end html
+
+ =begin :biblio
+
+ =end :biblio
+
+Incidentally, note that there's no easy way to express a data
+paragraph starting with something that looks like a command. Consider:
+
+ =begin stuff
+
+ =shazbot
+
+ =end stuff
+
+There, "=shazbot" will be parsed as a Pod command "shazbot", not as a data
+paragraph "=shazbot\n". However, you can express a data paragraph consisting
+of "=shazbot\n" using this code:
+
+ =for stuff =shazbot
+
+The situation where this is necessary, is presumably quite rare.
+
+Note that =end commands must match the currently open =begin command. That
+is, they must properly nest. For example, this is valid:
+
+ =begin outer
+
+ X
+
+ =begin inner
+
+ Y
+
+ =end inner
+
+ Z
+
+ =end outer
+
+while this is invalid:
+
+ =begin outer
+
+ X
+
+ =begin inner
+
+ Y
+
+ =end outer
+
+ Z
+
+ =end inner
+
+This latter is improper because when the "=end outer" command is seen, the
+currently open region has the formatname "inner", not "outer". (It just
+happens that "outer" is the format name of a higher-up region.) This is
+an error. Processors must by default report this as an error, and may halt
+processing the document containing that error. A corollary of this is that
+regions cannot "overlap" -- i.e., the latter block above does not represent
+a region called "outer" which contains X and Y, overlapping a region called
+"inner" which contains Y and Z. But because it is invalid (as all
+apparently overlapping regions would be), it doesn't represent that, or
+anything at all.
+
+Similarly, this is invalid:
+
+ =begin thing
+
+ =end hting
+
+This is an error because the region is opened by "thing", and the "=end"
+tries to close "hting" [sic].
+
+This is also invalid:
+
+ =begin thing
+
+ =end
+
+This is invalid because every "=end" command must have a formatname
+parameter.
+
+=head1 SEE ALSO
+
+L<perlpod>, L<perlsyn/"PODs: Embedded Documentation">,
+L<podchecker>
+
+=head1 AUTHOR
+
+Sean M. Burke
+
+=cut
+
+
diff --git a/ext/Pod-Html/testdir/perlvar-copy.pod b/ext/Pod-Html/testdir/perlvar-copy.pod
new file mode 100644
index 0000000000..04d2a7474c
--- /dev/null
+++ b/ext/Pod-Html/testdir/perlvar-copy.pod
@@ -0,0 +1,1737 @@
+=head1 NAME
+
+perlvarcopy - Perl predefined variables
+
+=head1 DESCRIPTION
+
+=head2 Predefined Names
+
+The following names have special meaning to Perl. Most
+punctuation names have reasonable mnemonics, or analogs in the
+shells. Nevertheless, if you wish to use long variable names,
+you need only say
+
+ use English;
+
+at the top of your program. This aliases all the short names to the long
+names in the current package. Some even have medium names, generally
+borrowed from B<awk>. In general, it's best to use the
+
+ use English '-no_match_vars';
+
+invocation if you don't need $PREMATCH, $MATCH, or $POSTMATCH, as it avoids
+a certain performance hit with the use of regular expressions. See
+L<English>.
+
+Variables that depend on the currently selected filehandle may be set by
+calling an appropriate object method on the IO::Handle object, although
+this is less efficient than using the regular built-in variables. (Summary
+lines below for this contain the word HANDLE.) First you must say
+
+ use IO::Handle;
+
+after which you may use either
+
+ method HANDLE EXPR
+
+or more safely,
+
+ HANDLE->method(EXPR)
+
+Each method returns the old value of the IO::Handle attribute.
+The methods each take an optional EXPR, which, if supplied, specifies the
+new value for the IO::Handle attribute in question. If not supplied,
+most methods do nothing to the current value--except for
+autoflush(), which will assume a 1 for you, just to be different.
+
+Because loading in the IO::Handle class is an expensive operation, you should
+learn how to use the regular built-in variables.
+
+A few of these variables are considered "read-only". This means that if
+you try to assign to this variable, either directly or indirectly through
+a reference, you'll raise a run-time exception.
+
+You should be very careful when modifying the default values of most
+special variables described in this document. In most cases you want
+to localize these variables before changing them, since if you don't,
+the change may affect other modules which rely on the default values
+of the special variables that you have changed. This is one of the
+correct ways to read the whole file at once:
+
+ open my $fh, "<", "foo" or die $!;
+ local $/; # enable localized slurp mode
+ my $content = <$fh>;
+ close $fh;
+
+But the following code is quite bad:
+
+ open my $fh, "<", "foo" or die $!;
+ undef $/; # enable slurp mode
+ my $content = <$fh>;
+ close $fh;
+
+since some other module, may want to read data from some file in the
+default "line mode", so if the code we have just presented has been
+executed, the global value of C<$/> is now changed for any other code
+running inside the same Perl interpreter.
+
+Usually when a variable is localized you want to make sure that this
+change affects the shortest scope possible. So unless you are already
+inside some short C<{}> block, you should create one yourself. For
+example:
+
+ my $content = '';
+ open my $fh, "<", "foo" or die $!;
+ {
+ local $/;
+ $content = <$fh>;
+ }
+ close $fh;
+
+Here is an example of how your own code can go broken:
+
+ for (1..5){
+ nasty_break();
+ print "$_ ";
+ }
+ sub nasty_break {
+ $_ = 5;
+ # do something with $_
+ }
+
+You probably expect this code to print:
+
+ 1 2 3 4 5
+
+but instead you get:
+
+ 5 5 5 5 5
+
+Why? Because nasty_break() modifies C<$_> without localizing it
+first. The fix is to add local():
+
+ local $_ = 5;
+
+It's easy to notice the problem in such a short example, but in more
+complicated code you are looking for trouble if you don't localize
+changes to the special variables.
+
+The following list is ordered by scalar variables first, then the
+arrays, then the hashes.
+
+=over 8
+
+=item $ARG
+
+=item $_
+X<$_> X<$ARG>
+
+The default input and pattern-searching space. The following pairs are
+equivalent:
+
+ while (<>) {...} # equivalent only in while!
+ while (defined($_ = <>)) {...}
+
+ /^Subject:/
+ $_ =~ /^Subject:/
+
+ tr/a-z/A-Z/
+ $_ =~ tr/a-z/A-Z/
+
+ chomp
+ chomp($_)
+
+Here are the places where Perl will assume $_ even if you
+don't use it:
+
+=over 3
+
+=item *
+
+The following functions:
+
+abs, alarm, chomp, chop, chr, chroot, cos, defined, eval, exp, glob,
+hex, int, lc, lcfirst, length, log, lstat, mkdir, oct, ord, pos, print,
+quotemeta, readlink, readpipe, ref, require, reverse (in scalar context only),
+rmdir, sin, split (on its second argument), sqrt, stat, study, uc, ucfirst,
+unlink, unpack.
+
+=item *
+
+All file tests (C<-f>, C<-d>) except for C<-t>, which defaults to STDIN.
+See L<perlfunc/-X>
+
+
+=item *
+
+The pattern matching operations C<m//>, C<s///> and C<tr///> (aka C<y///>)
+when used without an C<=~> operator.
+
+=item *
+
+The default iterator variable in a C<foreach> loop if no other
+variable is supplied.
+
+=item *
+
+The implicit iterator variable in the grep() and map() functions.
+
+=item *
+
+The implicit variable of given().
+
+=item *
+
+The default place to put an input record when a C<< <FH> >>
+operation's result is tested by itself as the sole criterion of a C<while>
+test. Outside a C<while> test, this will not happen.
+
+=back
+
+As C<$_> is a global variable, this may lead in some cases to unwanted
+side-effects. As of perl 5.9.1, you can now use a lexical version of
+C<$_> by declaring it in a file or in a block with C<my>. Moreover,
+declaring C<our $_> restores the global C<$_> in the current scope.
+
+(Mnemonic: underline is understood in certain operations.)
+
+=back
+
+=over 8
+
+=item $a
+
+=item $b
+X<$a> X<$b>
+
+Special package variables when using sort(), see L<perlfunc/sort>.
+Because of this specialness $a and $b don't need to be declared
+(using use vars, or our()) even when using the C<strict 'vars'> pragma.
+Don't lexicalize them with C<my $a> or C<my $b> if you want to be
+able to use them in the sort() comparison block or function.
+
+=back
+
+=over 8
+
+=item $<I<digits>>
+X<$1> X<$2> X<$3>
+
+Contains the subpattern from the corresponding set of capturing
+parentheses from the last pattern match, not counting patterns
+matched in nested blocks that have been exited already. (Mnemonic:
+like \digits.) These variables are all read-only and dynamically
+scoped to the current BLOCK.
+
+=item $MATCH
+
+=item $&
+X<$&> X<$MATCH>
+
+The string matched by the last successful pattern match (not counting
+any matches hidden within a BLOCK or eval() enclosed by the current
+BLOCK). (Mnemonic: like & in some editors.) This variable is read-only
+and dynamically scoped to the current BLOCK.
+
+The use of this variable anywhere in a program imposes a considerable
+performance penalty on all regular expression matches. See L</BUGS>.
+
+See L</@-> for a replacement.
+
+=item ${^MATCH}
+X<${^MATCH}>
+
+This is similar to C<$&> (C<$MATCH>) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</p> modifier.
+
+=item $PREMATCH
+
+=item $`
+X<$`> X<$PREMATCH>
+
+The string preceding whatever was matched by the last successful
+pattern match (not counting any matches hidden within a BLOCK or eval
+enclosed by the current BLOCK). (Mnemonic: C<`> often precedes a quoted
+string.) This variable is read-only.
+
+The use of this variable anywhere in a program imposes a considerable
+performance penalty on all regular expression matches. See L</BUGS>.
+
+See L</@-> for a replacement.
+
+=item ${^PREMATCH}
+X<${^PREMATCH}>
+
+This is similar to C<$`> ($PREMATCH) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</p> modifier.
+
+=item $POSTMATCH
+
+=item $'
+X<$'> X<$POSTMATCH>
+
+The string following whatever was matched by the last successful
+pattern match (not counting any matches hidden within a BLOCK or eval()
+enclosed by the current BLOCK). (Mnemonic: C<'> often follows a quoted
+string.) Example:
+
+ local $_ = 'abcdefghi';
+ /def/;
+ print "$`:$&:$'\n"; # prints abc:def:ghi
+
+This variable is read-only and dynamically scoped to the current BLOCK.
+
+The use of this variable anywhere in a program imposes a considerable
+performance penalty on all regular expression matches. See L</BUGS>.
+
+See L</@-> for a replacement.
+
+=item ${^POSTMATCH}
+X<${^POSTMATCH}>
+
+This is similar to C<$'> (C<$POSTMATCH>) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</p> modifier.
+
+=item $LAST_PAREN_MATCH
+
+=item $+
+X<$+> X<$LAST_PAREN_MATCH>
+
+The text matched by the last bracket of the last successful search pattern.
+This is useful if you don't know which one of a set of alternative patterns
+matched. For example:
+
+ /Version: (.*)|Revision: (.*)/ && ($rev = $+);
+
+(Mnemonic: be positive and forward looking.)
+This variable is read-only and dynamically scoped to the current BLOCK.
+
+=item $LAST_SUBMATCH_RESULT
+
+=item $^N
+X<$^N>
+
+The text matched by the used group most-recently closed (i.e. the group
+with the rightmost closing parenthesis) of the last successful search
+pattern. (Mnemonic: the (possibly) Nested parenthesis that most
+recently closed.)
+
+This is primarily used inside C<(?{...})> blocks for examining text
+recently matched. For example, to effectively capture text to a variable
+(in addition to C<$1>, C<$2>, etc.), replace C<(...)> with
+
+ (?:(...)(?{ $var = $^N }))
+
+By setting and then using C<$var> in this way relieves you from having to
+worry about exactly which numbered set of parentheses they are.
+
+This variable is dynamically scoped to the current BLOCK.
+
+=item @LAST_MATCH_END
+
+=item @+
+X<@+> X<@LAST_MATCH_END>
+
+This array holds the offsets of the ends of the last successful
+submatches in the currently active dynamic scope. C<$+[0]> is
+the offset into the string of the end of the entire match. This
+is the same value as what the C<pos> function returns when called
+on the variable that was matched against. The I<n>th element
+of this array holds the offset of the I<n>th submatch, so
+C<$+[1]> is the offset past where $1 ends, C<$+[2]> the offset
+past where $2 ends, and so on. You can use C<$#+> to determine
+how many subgroups were in the last successful match. See the
+examples given for the C<@-> variable.
+
+=item %LAST_PAREN_MATCH
+
+=item %+
+X<%+>
+
+Similar to C<@+>, the C<%+> hash allows access to the named capture
+buffers, should they exist, in the last successful match in the
+currently active dynamic scope.
+
+For example, C<$+{foo}> is equivalent to C<$1> after the following match:
+
+ 'foo' =~ /(?<foo>foo)/;
+
+The keys of the C<%+> hash list only the names of buffers that have
+captured (and that are thus associated to defined values).
+
+The underlying behaviour of C<%+> is provided by the
+L<Tie::Hash::NamedCapture> module.
+
+B<Note:> C<%-> and C<%+> are tied views into a common internal hash
+associated with the last successful regular expression. Therefore mixing
+iterative access to them via C<each> may have unpredictable results.
+Likewise, if the last successful match changes, then the results may be
+surprising.
+
+=item HANDLE->input_line_number(EXPR)
+
+=item $INPUT_LINE_NUMBER
+
+=item $NR
+
+=item $.
+X<$.> X<$NR> X<$INPUT_LINE_NUMBER> X<line number>
+
+Current line number for the last filehandle accessed.
+
+Each filehandle in Perl counts the number of lines that have been read
+from it. (Depending on the value of C<$/>, Perl's idea of what
+constitutes a line may not match yours.) When a line is read from a
+filehandle (via readline() or C<< <> >>), or when tell() or seek() is
+called on it, C<$.> becomes an alias to the line counter for that
+filehandle.
+
+You can adjust the counter by assigning to C<$.>, but this will not
+actually move the seek pointer. I<Localizing C<$.> will not localize
+the filehandle's line count>. Instead, it will localize perl's notion
+of which filehandle C<$.> is currently aliased to.
+
+C<$.> is reset when the filehandle is closed, but B<not> when an open
+filehandle is reopened without an intervening close(). For more
+details, see L<perlop/"IE<sol>O Operators">. Because C<< <> >> never does
+an explicit close, line numbers increase across ARGV files (but see
+examples in L<perlfunc/eof>).
+
+You can also use C<< HANDLE->input_line_number(EXPR) >> to access the
+line counter for a given filehandle without having to worry about
+which handle you last accessed.
+
+(Mnemonic: many programs use "." to mean the current line number.)
+
+=item IO::Handle->input_record_separator(EXPR)
+
+=item $INPUT_RECORD_SEPARATOR
+
+=item $RS
+
+=item $/
+X<$/> X<$RS> X<$INPUT_RECORD_SEPARATOR>
+
+The input record separator, newline by default. This
+influences Perl's idea of what a "line" is. Works like B<awk>'s RS
+variable, including treating empty lines as a terminator if set to
+the null string. (An empty line cannot contain any spaces
+or tabs.) You may set it to a multi-character string to match a
+multi-character terminator, or to C<undef> to read through the end
+of file. Setting it to C<"\n\n"> means something slightly
+different than setting to C<"">, if the file contains consecutive
+empty lines. Setting to C<""> will treat two or more consecutive
+empty lines as a single empty line. Setting to C<"\n\n"> will
+blindly assume that the next input character belongs to the next
+paragraph, even if it's a newline. (Mnemonic: / delimits
+line boundaries when quoting poetry.)
+
+ local $/; # enable "slurp" mode
+ local $_ = <FH>; # whole file now here
+ s/\n[ \t]+/ /g;
+
+Remember: the value of C<$/> is a string, not a regex. B<awk> has to be
+better for something. :-)
+
+Setting C<$/> to a reference to an integer, scalar containing an integer, or
+scalar that's convertible to an integer will attempt to read records
+instead of lines, with the maximum record size being the referenced
+integer. So this:
+
+ local $/ = \32768; # or \"32768", or \$var_containing_32768
+ open my $fh, "<", $myfile or die $!;
+ local $_ = <$fh>;
+
+will read a record of no more than 32768 bytes from FILE. If you're
+not reading from a record-oriented file (or your OS doesn't have
+record-oriented files), then you'll likely get a full chunk of data
+with every read. If a record is larger than the record size you've
+set, you'll get the record back in pieces. Trying to set the record
+size to zero or less will cause reading in the (rest of the) whole file.
+
+On VMS, record reads are done with the equivalent of C<sysread>,
+so it's best not to mix record and non-record reads on the same
+file. (This is unlikely to be a problem, because any file you'd
+want to read in record mode is probably unusable in line mode.)
+Non-VMS systems do normal I/O, so it's safe to mix record and
+non-record reads of a file.
+
+See also L<perlport/"Newlines">. Also see C<$.>.
+
+=item HANDLE->autoflush(EXPR)
+
+=item $OUTPUT_AUTOFLUSH
+
+=item $|
+X<$|> X<autoflush> X<flush> X<$OUTPUT_AUTOFLUSH>
+
+If set to nonzero, forces a flush right away and after every write
+or print on the currently selected output channel. Default is 0
+(regardless of whether the channel is really buffered by the
+system or not; C<$|> tells you only whether you've asked Perl
+explicitly to flush after each write). STDOUT will
+typically be line buffered if output is to the terminal and block
+buffered otherwise. Setting this variable is useful primarily when
+you are outputting to a pipe or socket, such as when you are running
+a Perl program under B<rsh> and want to see the output as it's
+happening. This has no effect on input buffering. See L<perlfunc/getc>
+for that. See L<perldoc/select> on how to select the output channel.
+See also L<IO::Handle>. (Mnemonic: when you want your pipes to be piping hot.)
+
+=item IO::Handle->output_field_separator EXPR
+
+=item $OUTPUT_FIELD_SEPARATOR
+
+=item $OFS
+
+=item $,
+X<$,> X<$OFS> X<$OUTPUT_FIELD_SEPARATOR>
+
+The output field separator for the print operator. If defined, this
+value is printed between each of print's arguments. Default is C<undef>.
+(Mnemonic: what is printed when there is a "," in your print statement.)
+
+=item IO::Handle->output_record_separator EXPR
+
+=item $OUTPUT_RECORD_SEPARATOR
+
+=item $ORS
+
+=item $\
+X<$\> X<$ORS> X<$OUTPUT_RECORD_SEPARATOR>
+
+The output record separator for the print operator. If defined, this
+value is printed after the last of print's arguments. Default is C<undef>.
+(Mnemonic: you set C<$\> instead of adding "\n" at the end of the print.
+Also, it's just like C<$/>, but it's what you get "back" from Perl.)
+
+=item $LIST_SEPARATOR
+
+=item $"
+X<$"> X<$LIST_SEPARATOR>
+
+This is like C<$,> except that it applies to array and slice values
+interpolated into a double-quoted string (or similar interpreted
+string). Default is a space. (Mnemonic: obvious, I think.)
+
+=item $SUBSCRIPT_SEPARATOR
+
+=item $SUBSEP
+
+=item $;
+X<$;> X<$SUBSEP> X<SUBSCRIPT_SEPARATOR>
+
+The subscript separator for multidimensional array emulation. If you
+refer to a hash element as
+
+ $foo{$a,$b,$c}
+
+it really means
+
+ $foo{join($;, $a, $b, $c)}
+
+But don't put
+
+ @foo{$a,$b,$c} # a slice--note the @
+
+which means
+
+ ($foo{$a},$foo{$b},$foo{$c})
+
+Default is "\034", the same as SUBSEP in B<awk>. If your
+keys contain binary data there might not be any safe value for C<$;>.
+(Mnemonic: comma (the syntactic subscript separator) is a
+semi-semicolon. Yeah, I know, it's pretty lame, but C<$,> is already
+taken for something more important.)
+
+Consider using "real" multidimensional arrays as described
+in L<perllol>.
+
+=item HANDLE->format_page_number(EXPR)
+
+=item $FORMAT_PAGE_NUMBER
+
+=item $%
+X<$%> X<$FORMAT_PAGE_NUMBER>
+
+The current page number of the currently selected output channel.
+Used with formats.
+(Mnemonic: % is page number in B<nroff>.)
+
+=item HANDLE->format_lines_per_page(EXPR)
+
+=item $FORMAT_LINES_PER_PAGE
+
+=item $=
+X<$=> X<$FORMAT_LINES_PER_PAGE>
+
+The current page length (printable lines) of the currently selected
+output channel. Default is 60.
+Used with formats.
+(Mnemonic: = has horizontal lines.)
+
+=item HANDLE->format_lines_left(EXPR)
+
+=item $FORMAT_LINES_LEFT
+
+=item $-
+X<$-> X<$FORMAT_LINES_LEFT>
+
+The number of lines left on the page of the currently selected output
+channel.
+Used with formats.
+(Mnemonic: lines_on_page - lines_printed.)
+
+=item @LAST_MATCH_START
+
+=item @-
+X<@-> X<@LAST_MATCH_START>
+
+$-[0] is the offset of the start of the last successful match.
+C<$-[>I<n>C<]> is the offset of the start of the substring matched by
+I<n>-th subpattern, or undef if the subpattern did not match.
+
+Thus after a match against $_, $& coincides with C<substr $_, $-[0],
+$+[0] - $-[0]>. Similarly, $I<n> coincides with C<substr $_, $-[n],
+$+[n] - $-[n]> if C<$-[n]> is defined, and $+ coincides with
+C<substr $_, $-[$#-], $+[$#-] - $-[$#-]>. One can use C<$#-> to find the last
+matched subgroup in the last successful match. Contrast with
+C<$#+>, the number of subgroups in the regular expression. Compare
+with C<@+>.
+
+This array holds the offsets of the beginnings of the last
+successful submatches in the currently active dynamic scope.
+C<$-[0]> is the offset into the string of the beginning of the
+entire match. The I<n>th element of this array holds the offset
+of the I<n>th submatch, so C<$-[1]> is the offset where $1
+begins, C<$-[2]> the offset where $2 begins, and so on.
+
+After a match against some variable $var:
+
+=over 5
+
+=item C<$`> is the same as C<substr($var, 0, $-[0])>
+
+=item C<$&> is the same as C<substr($var, $-[0], $+[0] - $-[0])>
+
+=item C<$'> is the same as C<substr($var, $+[0])>
+
+=item C<$1> is the same as C<substr($var, $-[1], $+[1] - $-[1])>
+
+=item C<$2> is the same as C<substr($var, $-[2], $+[2] - $-[2])>
+
+=item C<$3> is the same as C<substr($var, $-[3], $+[3] - $-[3])>
+
+=back
+
+=item %-
+X<%->
+
+Similar to C<%+>, this variable allows access to the named capture buffers
+in the last successful match in the currently active dynamic scope. To
+each capture buffer name found in the regular expression, it associates a
+reference to an array containing the list of values captured by all
+buffers with that name (should there be several of them), in the order
+where they appear.
+
+Here's an example:
+
+ if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) {
+ foreach my $bufname (sort keys %-) {
+ my $ary = $-{$bufname};
+ foreach my $idx (0..$#$ary) {
+ print "\$-{$bufname}[$idx] : ",
+ (defined($ary->[$idx]) ? "'$ary->[$idx]'" : "undef"),
+ "\n";
+ }
+ }
+ }
+
+would print out:
+
+ $-{A}[0] : '1'
+ $-{A}[1] : '3'
+ $-{B}[0] : '2'
+ $-{B}[1] : '4'
+
+The keys of the C<%-> hash correspond to all buffer names found in
+the regular expression.
+
+The behaviour of C<%-> is implemented via the
+L<Tie::Hash::NamedCapture> module.
+
+B<Note:> C<%-> and C<%+> are tied views into a common internal hash
+associated with the last successful regular expression. Therefore mixing
+iterative access to them via C<each> may have unpredictable results.
+Likewise, if the last successful match changes, then the results may be
+surprising.
+
+=item HANDLE->format_name(EXPR)
+
+=item $FORMAT_NAME
+
+=item $~
+X<$~> X<$FORMAT_NAME>
+
+The name of the current report format for the currently selected output
+channel. Default is the name of the filehandle. (Mnemonic: brother to
+C<$^>.)
+
+=item HANDLE->format_top_name(EXPR)
+
+=item $FORMAT_TOP_NAME
+
+=item $^
+X<$^> X<$FORMAT_TOP_NAME>
+
+The name of the current top-of-page format for the currently selected
+output channel. Default is the name of the filehandle with _TOP
+appended. (Mnemonic: points to top of page.)
+
+=item IO::Handle->format_line_break_characters EXPR
+
+=item $FORMAT_LINE_BREAK_CHARACTERS
+
+=item $:
+X<$:> X<FORMAT_LINE_BREAK_CHARACTERS>
+
+The current set of characters after which a string may be broken to
+fill continuation fields (starting with ^) in a format. Default is
+S<" \n-">, to break on whitespace or hyphens. (Mnemonic: a "colon" in
+poetry is a part of a line.)
+
+=item IO::Handle->format_formfeed EXPR
+
+=item $FORMAT_FORMFEED
+
+=item $^L
+X<$^L> X<$FORMAT_FORMFEED>
+
+What formats output as a form feed. Default is \f.
+
+=item $ACCUMULATOR
+
+=item $^A
+X<$^A> X<$ACCUMULATOR>
+
+The current value of the write() accumulator for format() lines. A format
+contains formline() calls that put their result into C<$^A>. After
+calling its format, write() prints out the contents of C<$^A> and empties.
+So you never really see the contents of C<$^A> unless you call
+formline() yourself and then look at it. See L<perlform> and
+L<perlfunc/formline()>.
+
+=item $CHILD_ERROR
+
+=item $?
+X<$?> X<$CHILD_ERROR>
+
+The status returned by the last pipe close, backtick (C<``>) command,
+successful call to wait() or waitpid(), or from the system()
+operator. This is just the 16-bit status word returned by the
+traditional Unix wait() system call (or else is made up to look like it). Thus, the
+exit value of the subprocess is really (C<<< $? >> 8 >>>), and
+C<$? & 127> gives which signal, if any, the process died from, and
+C<$? & 128> reports whether there was a core dump. (Mnemonic:
+similar to B<sh> and B<ksh>.)
+
+Additionally, if the C<h_errno> variable is supported in C, its value
+is returned via $? if any C<gethost*()> function fails.
+
+If you have installed a signal handler for C<SIGCHLD>, the
+value of C<$?> will usually be wrong outside that handler.
+
+Inside an C<END> subroutine C<$?> contains the value that is going to be
+given to C<exit()>. You can modify C<$?> in an C<END> subroutine to
+change the exit status of your program. For example:
+
+ END {
+ $? = 1 if $? == 255; # die would make it 255
+ }
+
+Under VMS, the pragma C<use vmsish 'status'> makes C<$?> reflect the
+actual VMS exit status, instead of the default emulation of POSIX
+status; see L<perlvms/$?> for details.
+
+Also see L<Error Indicators>.
+
+=item ${^CHILD_ERROR_NATIVE}
+X<$^CHILD_ERROR_NATIVE>
+
+The native status returned by the last pipe close, backtick (C<``>)
+command, successful call to wait() or waitpid(), or from the system()
+operator. On POSIX-like systems this value can be decoded with the
+WIFEXITED, WEXITSTATUS, WIFSIGNALED, WTERMSIG, WIFSTOPPED, WSTOPSIG
+and WIFCONTINUED functions provided by the L<POSIX> module.
+
+Under VMS this reflects the actual VMS exit status; i.e. it is the same
+as $? when the pragma C<use vmsish 'status'> is in effect.
+
+=item ${^ENCODING}
+X<$^ENCODING>
+
+The I<object reference> to the Encode object that is used to convert
+the source code to Unicode. Thanks to this variable your perl script
+does not have to be written in UTF-8. Default is I<undef>. The direct
+manipulation of this variable is highly discouraged.
+
+=item $OS_ERROR
+
+=item $ERRNO
+
+=item $!
+X<$!> X<$ERRNO> X<$OS_ERROR>
+
+If used numerically, yields the current value of the C C<errno>
+variable, or in other words, if a system or library call fails, it
+sets this variable. This means that the value of C<$!> is meaningful
+only I<immediately> after a B<failure>:
+
+ if (open my $fh, "<", $filename) {
+ # Here $! is meaningless.
+ ...
+ } else {
+ # ONLY here is $! meaningful.
+ ...
+ # Already here $! might be meaningless.
+ }
+ # Since here we might have either success or failure,
+ # here $! is meaningless.
+
+In the above I<meaningless> stands for anything: zero, non-zero,
+C<undef>. A successful system or library call does B<not> set
+the variable to zero.
+
+If used as a string, yields the corresponding system error string.
+You can assign a number to C<$!> to set I<errno> if, for instance,
+you want C<"$!"> to return the string for error I<n>, or you want
+to set the exit value for the die() operator. (Mnemonic: What just
+went bang?)
+
+Also see L<Error Indicators>.
+
+=item %OS_ERROR
+
+=item %ERRNO
+
+=item %!
+X<%!>
+
+Each element of C<%!> has a true value only if C<$!> is set to that
+value. For example, C<$!{ENOENT}> is true if and only if the current
+value of C<$!> is C<ENOENT>; that is, if the most recent error was
+"No such file or directory" (or its moral equivalent: not all operating
+systems give that exact error, and certainly not all languages).
+To check if a particular key is meaningful on your system, use
+C<exists $!{the_key}>; for a list of legal keys, use C<keys %!>.
+See L<Errno> for more information, and also see above for the
+validity of C<$!>.
+
+=item $EXTENDED_OS_ERROR
+
+=item $^E
+X<$^E> X<$EXTENDED_OS_ERROR>
+
+Error information specific to the current operating system. At
+the moment, this differs from C<$!> under only VMS, OS/2, and Win32
+(and for MacPerl). On all other platforms, C<$^E> is always just
+the same as C<$!>.
+
+Under VMS, C<$^E> provides the VMS status value from the last
+system error. This is more specific information about the last
+system error than that provided by C<$!>. This is particularly
+important when C<$!> is set to B<EVMSERR>.
+
+Under OS/2, C<$^E> is set to the error code of the last call to
+OS/2 API either via CRT, or directly from perl.
+
+Under Win32, C<$^E> always returns the last error information
+reported by the Win32 call C<GetLastError()> which describes
+the last error from within the Win32 API. Most Win32-specific
+code will report errors via C<$^E>. ANSI C and Unix-like calls
+set C<errno> and so most portable Perl code will report errors
+via C<$!>.
+
+Caveats mentioned in the description of C<$!> generally apply to
+C<$^E>, also. (Mnemonic: Extra error explanation.)
+
+Also see L<Error Indicators>.
+
+=item $EVAL_ERROR
+
+=item $@
+X<$@> X<$EVAL_ERROR>
+
+The Perl syntax error message from the last eval() operator.
+If $@ is the null string, the last eval() parsed and executed
+correctly (although the operations you invoked may have failed in the
+normal fashion). (Mnemonic: Where was the syntax error "at"?)
+
+Warning messages are not collected in this variable. You can,
+however, set up a routine to process warnings by setting C<$SIG{__WARN__}>
+as described below.
+
+Also see L<Error Indicators>.
+
+=item $PROCESS_ID
+
+=item $PID
+
+=item $$
+X<$$> X<$PID> X<$PROCESS_ID>
+
+The process number of the Perl running this script. You should
+consider this variable read-only, although it will be altered
+across fork() calls. (Mnemonic: same as shells.)
+
+Note for Linux users: on Linux, the C functions C<getpid()> and
+C<getppid()> return different values from different threads. In order to
+be portable, this behavior is not reflected by C<$$>, whose value remains
+consistent across threads. If you want to call the underlying C<getpid()>,
+you may use the CPAN module C<Linux::Pid>.
+
+=item $REAL_USER_ID
+
+=item $UID
+
+=item $<
+X<< $< >> X<$UID> X<$REAL_USER_ID>
+
+The real uid of this process. (Mnemonic: it's the uid you came I<from>,
+if you're running setuid.) You can change both the real uid and
+the effective uid at the same time by using POSIX::setuid(). Since
+changes to $< require a system call, check $! after a change attempt to
+detect any possible errors.
+
+=item $EFFECTIVE_USER_ID
+
+=item $EUID
+
+=item $>
+X<< $> >> X<$EUID> X<$EFFECTIVE_USER_ID>
+
+The effective uid of this process. Example:
+
+ $< = $>; # set real to effective uid
+ ($<,$>) = ($>,$<); # swap real and effective uid
+
+You can change both the effective uid and the real uid at the same
+time by using POSIX::setuid(). Changes to $> require a check to $!
+to detect any possible errors after an attempted change.
+
+(Mnemonic: it's the uid you went I<to>, if you're running setuid.)
+C<< $< >> and C<< $> >> can be swapped only on machines
+supporting setreuid().
+
+=item $REAL_GROUP_ID
+
+=item $GID
+
+=item $(
+X<$(> X<$GID> X<$REAL_GROUP_ID>
+
+The real gid of this process. If you are on a machine that supports
+membership in multiple groups simultaneously, gives a space separated
+list of groups you are in. The first number is the one returned by
+getgid(), and the subsequent ones by getgroups(), one of which may be
+the same as the first number.
+
+However, a value assigned to C<$(> must be a single number used to
+set the real gid. So the value given by C<$(> should I<not> be assigned
+back to C<$(> without being forced numeric, such as by adding zero. Note
+that this is different to the effective gid (C<$)>) which does take a
+list.
+
+You can change both the real gid and the effective gid at the same
+time by using POSIX::setgid(). Changes to $( require a check to $!
+to detect any possible errors after an attempted change.
+
+(Mnemonic: parentheses are used to I<group> things. The real gid is the
+group you I<left>, if you're running setgid.)
+
+=item $EFFECTIVE_GROUP_ID
+
+=item $EGID
+
+=item $)
+X<$)> X<$EGID> X<$EFFECTIVE_GROUP_ID>
+
+The effective gid of this process. If you are on a machine that
+supports membership in multiple groups simultaneously, gives a space
+separated list of groups you are in. The first number is the one
+returned by getegid(), and the subsequent ones by getgroups(), one of
+which may be the same as the first number.
+
+Similarly, a value assigned to C<$)> must also be a space-separated
+list of numbers. The first number sets the effective gid, and
+the rest (if any) are passed to setgroups(). To get the effect of an
+empty list for setgroups(), just repeat the new effective gid; that is,
+to force an effective gid of 5 and an effectively empty setgroups()
+list, say C< $) = "5 5" >.
+
+You can change both the effective gid and the real gid at the same
+time by using POSIX::setgid() (use only a single numeric argument).
+Changes to $) require a check to $! to detect any possible errors
+after an attempted change.
+
+(Mnemonic: parentheses are used to I<group> things. The effective gid
+is the group that's I<right> for you, if you're running setgid.)
+
+C<< $< >>, C<< $> >>, C<$(> and C<$)> can be set only on
+machines that support the corresponding I<set[re][ug]id()> routine. C<$(>
+and C<$)> can be swapped only on machines supporting setregid().
+
+=item $PROGRAM_NAME
+
+=item $0
+X<$0> X<$PROGRAM_NAME>
+
+Contains the name of the program being executed.
+
+On some (read: not all) operating systems assigning to C<$0> modifies
+the argument area that the C<ps> program sees. On some platforms you
+may have to use special C<ps> options or a different C<ps> to see the
+changes. Modifying the $0 is more useful as a way of indicating the
+current program state than it is for hiding the program you're
+running. (Mnemonic: same as B<sh> and B<ksh>.)
+
+Note that there are platform specific limitations on the maximum
+length of C<$0>. In the most extreme case it may be limited to the
+space occupied by the original C<$0>.
+
+In some platforms there may be arbitrary amount of padding, for
+example space characters, after the modified name as shown by C<ps>.
+In some platforms this padding may extend all the way to the original
+length of the argument area, no matter what you do (this is the case
+for example with Linux 2.2).
+
+Note for BSD users: setting C<$0> does not completely remove "perl"
+from the ps(1) output. For example, setting C<$0> to C<"foobar"> may
+result in C<"perl: foobar (perl)"> (whether both the C<"perl: "> prefix
+and the " (perl)" suffix are shown depends on your exact BSD variant
+and version). This is an operating system feature, Perl cannot help it.
+
+In multithreaded scripts Perl coordinates the threads so that any
+thread may modify its copy of the C<$0> and the change becomes visible
+to ps(1) (assuming the operating system plays along). Note that
+the view of C<$0> the other threads have will not change since they
+have their own copies of it.
+
+If the program has been given to perl via the switches C<-e> or C<-E>,
+C<$0> will contain the string C<"-e">.
+
+=item $[
+X<$[>
+
+The index of the first element in an array, and of the first character
+in a substring. Default is 0, but you could theoretically set it
+to 1 to make Perl behave more like B<awk> (or Fortran) when
+subscripting and when evaluating the index() and substr() functions.
+(Mnemonic: [ begins subscripts.)
+
+As of release 5 of Perl, assignment to C<$[> is treated as a compiler
+directive, and cannot influence the behavior of any other file.
+(That's why you can only assign compile-time constants to it.)
+Its use is highly discouraged.
+
+Note that, unlike other compile-time directives (such as L<strict>),
+assignment to C<$[> can be seen from outer lexical scopes in the same file.
+However, you can use local() on it to strictly bind its value to a
+lexical block.
+
+=item $]
+X<$]>
+
+The version + patchlevel / 1000 of the Perl interpreter. This variable
+can be used to determine whether the Perl interpreter executing a
+script is in the right range of versions. (Mnemonic: Is this version
+of perl in the right bracket?) Example:
+
+ warn "No checksumming!\n" if $] < 3.019;
+
+See also the documentation of C<use VERSION> and C<require VERSION>
+for a convenient way to fail if the running Perl interpreter is too old.
+
+The floating point representation can sometimes lead to inaccurate
+numeric comparisons. See C<$^V> for a more modern representation of
+the Perl version that allows accurate string comparisons.
+
+=item $COMPILING
+
+=item $^C
+X<$^C> X<$COMPILING>
+
+The current value of the flag associated with the B<-c> switch.
+Mainly of use with B<-MO=...> to allow code to alter its behavior
+when being compiled, such as for example to AUTOLOAD at compile
+time rather than normal, deferred loading. Setting
+C<$^C = 1> is similar to calling C<B::minus_c>.
+
+=item $DEBUGGING
+
+=item $^D
+X<$^D> X<$DEBUGGING>
+
+The current value of the debugging flags. (Mnemonic: value of B<-D>
+switch.) May be read or set. Like its command-line equivalent, you can use
+numeric or symbolic values, eg C<$^D = 10> or C<$^D = "st">.
+
+=item ${^RE_DEBUG_FLAGS}
+
+The current value of the regex debugging flags. Set to 0 for no debug output
+even when the re 'debug' module is loaded. See L<re> for details.
+
+=item ${^RE_TRIE_MAXBUF}
+
+Controls how certain regex optimisations are applied and how much memory they
+utilize. This value by default is 65536 which corresponds to a 512kB temporary
+cache. Set this to a higher value to trade memory for speed when matching
+large alternations. Set it to a lower value if you want the optimisations to
+be as conservative of memory as possible but still occur, and set it to a
+negative value to prevent the optimisation and conserve the most memory.
+Under normal situations this variable should be of no interest to you.
+
+=item $SYSTEM_FD_MAX
+
+=item $^F
+X<$^F> X<$SYSTEM_FD_MAX>
+
+The maximum system file descriptor, ordinarily 2. System file
+descriptors are passed to exec()ed processes, while higher file
+descriptors are not. Also, during an open(), system file descriptors are
+preserved even if the open() fails. (Ordinary file descriptors are
+closed before the open() is attempted.) The close-on-exec
+status of a file descriptor will be decided according to the value of
+C<$^F> when the corresponding file, pipe, or socket was opened, not the
+time of the exec().
+
+=item $^H
+
+WARNING: This variable is strictly for internal use only. Its availability,
+behavior, and contents are subject to change without notice.
+
+This variable contains compile-time hints for the Perl interpreter. At the
+end of compilation of a BLOCK the value of this variable is restored to the
+value when the interpreter started to compile the BLOCK.
+
+When perl begins to parse any block construct that provides a lexical scope
+(e.g., eval body, required file, subroutine body, loop body, or conditional
+block), the existing value of $^H is saved, but its value is left unchanged.
+When the compilation of the block is completed, it regains the saved value.
+Between the points where its value is saved and restored, code that
+executes within BEGIN blocks is free to change the value of $^H.
+
+This behavior provides the semantic of lexical scoping, and is used in,
+for instance, the C<use strict> pragma.
+
+The contents should be an integer; different bits of it are used for
+different pragmatic flags. Here's an example:
+
+ sub add_100 { $^H |= 0x100 }
+
+ sub foo {
+ BEGIN { add_100() }
+ bar->baz($boon);
+ }
+
+Consider what happens during execution of the BEGIN block. At this point
+the BEGIN block has already been compiled, but the body of foo() is still
+being compiled. The new value of $^H will therefore be visible only while
+the body of foo() is being compiled.
+
+Substitution of the above BEGIN block with:
+
+ BEGIN { require strict; strict->import('vars') }
+
+demonstrates how C<use strict 'vars'> is implemented. Here's a conditional
+version of the same lexical pragma:
+
+ BEGIN { require strict; strict->import('vars') if $condition }
+
+=item %^H
+
+The %^H hash provides the same scoping semantic as $^H. This makes it
+useful for implementation of lexically scoped pragmas. See L<perlpragma>.
+
+=item $INPLACE_EDIT
+
+=item $^I
+X<$^I> X<$INPLACE_EDIT>
+
+The current value of the inplace-edit extension. Use C<undef> to disable
+inplace editing. (Mnemonic: value of B<-i> switch.)
+
+=item $^M
+X<$^M>
+
+By default, running out of memory is an untrappable, fatal error.
+However, if suitably built, Perl can use the contents of C<$^M>
+as an emergency memory pool after die()ing. Suppose that your Perl
+were compiled with C<-DPERL_EMERGENCY_SBRK> and used Perl's malloc.
+Then
+
+ $^M = 'a' x (1 << 16);
+
+would allocate a 64K buffer for use in an emergency. See the
+F<INSTALL> file in the Perl distribution for information on how to
+add custom C compilation flags when compiling perl. To discourage casual
+use of this advanced feature, there is no L<English|English> long name for
+this variable.
+
+=item $OSNAME
+
+=item $^O
+X<$^O> X<$OSNAME>
+
+The name of the operating system under which this copy of Perl was
+built, as determined during the configuration process. The value
+is identical to C<$Config{'osname'}>. See also L<Config> and the
+B<-V> command-line switch documented in L<perlrun>.
+
+In Windows platforms, $^O is not very helpful: since it is always
+C<MSWin32>, it doesn't tell the difference between
+95/98/ME/NT/2000/XP/CE/.NET. Use Win32::GetOSName() or
+Win32::GetOSVersion() (see L<Win32> and L<perlport>) to distinguish
+between the variants.
+
+=item ${^OPEN}
+
+An internal variable used by PerlIO. A string in two parts, separated
+by a C<\0> byte, the first part describes the input layers, the second
+part describes the output layers.
+
+=item $PERLDB
+
+=item $^P
+X<$^P> X<$PERLDB>
+
+The internal variable for debugging support. The meanings of the
+various bits are subject to change, but currently indicate:
+
+=over 6
+
+=item 0x01
+
+Debug subroutine enter/exit.
+
+=item 0x02
+
+Line-by-line debugging. Causes DB::DB() subroutine to be called for each
+statement executed. Also causes saving source code lines (like 0x400).
+
+=item 0x04
+
+Switch off optimizations.
+
+=item 0x08
+
+Preserve more data for future interactive inspections.
+
+=item 0x10
+
+Keep info about source lines on which a subroutine is defined.
+
+=item 0x20
+
+Start with single-step on.
+
+=item 0x40
+
+Use subroutine address instead of name when reporting.
+
+=item 0x80
+
+Report C<goto &subroutine> as well.
+
+=item 0x100
+
+Provide informative "file" names for evals based on the place they were compiled.
+
+=item 0x200
+
+Provide informative names to anonymous subroutines based on the place they
+were compiled.
+
+=item 0x400
+
+Save source code lines into C<@{"_<$filename"}>.
+
+=back
+
+Some bits may be relevant at compile-time only, some at
+run-time only. This is a new mechanism and the details may change.
+See also L<perldebguts>.
+
+=item $LAST_REGEXP_CODE_RESULT
+
+=item $^R
+X<$^R> X<$LAST_REGEXP_CODE_RESULT>
+
+The result of evaluation of the last successful C<(?{ code })>
+regular expression assertion (see L<perlre>). May be written to.
+
+=item $EXCEPTIONS_BEING_CAUGHT
+
+=item $^S
+X<$^S> X<$EXCEPTIONS_BEING_CAUGHT>
+
+Current state of the interpreter.
+
+ $^S State
+ --------- -------------------
+ undef Parsing module/eval
+ true (1) Executing an eval
+ false (0) Otherwise
+
+The first state may happen in $SIG{__DIE__} and $SIG{__WARN__} handlers.
+
+=item $BASETIME
+
+=item $^T
+X<$^T> X<$BASETIME>
+
+The time at which the program began running, in seconds since the
+epoch (beginning of 1970). The values returned by the B<-M>, B<-A>,
+and B<-C> filetests are based on this value.
+
+=item ${^TAINT}
+
+Reflects if taint mode is on or off. 1 for on (the program was run with
+B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with
+B<-t> or B<-TU>). This variable is read-only.
+
+=item ${^UNICODE}
+
+Reflects certain Unicode settings of Perl. See L<perlrun>
+documentation for the C<-C> switch for more information about
+the possible values. This variable is set during Perl startup
+and is thereafter read-only.
+
+=item ${^UTF8CACHE}
+
+This variable controls the state of the internal UTF-8 offset caching code.
+1 for on (the default), 0 for off, -1 to debug the caching code by checking
+all its results against linear scans, and panicking on any discrepancy.
+
+=item ${^UTF8LOCALE}
+
+This variable indicates whether an UTF-8 locale was detected by perl at
+startup. This information is used by perl when it's in
+adjust-utf8ness-to-locale mode (as when run with the C<-CL> command-line
+switch); see L<perlrun> for more info on this.
+
+=item $PERL_VERSION
+
+=item $^V
+X<$^V> X<$PERL_VERSION>
+
+The revision, version, and subversion of the Perl interpreter, represented
+as a C<version> object.
+
+This variable first appeared in perl 5.6.0; earlier versions of perl will
+see an undefined value. Before perl 5.10.0 $^V was represented as a v-string.
+
+$^V can be used to determine whether the Perl interpreter executing a
+script is in the right range of versions. (Mnemonic: use ^V for Version
+Control.) Example:
+
+ warn "Hashes not randomized!\n" if !$^V or $^V lt v5.8.1
+
+To convert C<$^V> into its string representation use sprintf()'s
+C<"%vd"> conversion:
+
+ printf "version is v%vd\n", $^V; # Perl's version
+
+See the documentation of C<use VERSION> and C<require VERSION>
+for a convenient way to fail if the running Perl interpreter is too old.
+
+See also C<$]> for an older representation of the Perl version.
+
+=item $WARNING
+
+=item $^W
+X<$^W> X<$WARNING>
+
+The current value of the warning switch, initially true if B<-w>
+was used, false otherwise, but directly modifiable. (Mnemonic:
+related to the B<-w> switch.) See also L<warnings>.
+
+=item ${^WARNING_BITS}
+
+The current set of warning checks enabled by the C<use warnings> pragma.
+See the documentation of C<warnings> for more details.
+
+=item ${^WIN32_SLOPPY_STAT}
+
+If this variable is set to a true value, then stat() on Windows will
+not try to open the file. This means that the link count cannot be
+determined and file attributes may be out of date if additional
+hardlinks to the file exist. On the other hand, not opening the file
+is considerably faster, especially for files on network drives.
+
+This variable could be set in the F<sitecustomize.pl> file to
+configure the local Perl installation to use "sloppy" stat() by
+default. See L<perlrun> for more information about site
+customization.
+
+=item $EXECUTABLE_NAME
+
+=item $^X
+X<$^X> X<$EXECUTABLE_NAME>
+
+The name used to execute the current copy of Perl, from C's
+C<argv[0]> or (where supported) F</proc/self/exe>.
+
+Depending on the host operating system, the value of $^X may be
+a relative or absolute pathname of the perl program file, or may
+be the string used to invoke perl but not the pathname of the
+perl program file. Also, most operating systems permit invoking
+programs that are not in the PATH environment variable, so there
+is no guarantee that the value of $^X is in PATH. For VMS, the
+value may or may not include a version number.
+
+You usually can use the value of $^X to re-invoke an independent
+copy of the same perl that is currently running, e.g.,
+
+ @first_run = `$^X -le "print int rand 100 for 1..100"`;
+
+But recall that not all operating systems support forking or
+capturing of the output of commands, so this complex statement
+may not be portable.
+
+It is not safe to use the value of $^X as a path name of a file,
+as some operating systems that have a mandatory suffix on
+executable files do not require use of the suffix when invoking
+a command. To convert the value of $^X to a path name, use the
+following statements:
+
+ # Build up a set of file names (not command names).
+ use Config;
+ $this_perl = $^X;
+ if ($^O ne 'VMS')
+ {$this_perl .= $Config{_exe}
+ unless $this_perl =~ m/$Config{_exe}$/i;}
+
+Because many operating systems permit anyone with read access to
+the Perl program file to make a copy of it, patch the copy, and
+then execute the copy, the security-conscious Perl programmer
+should take care to invoke the installed copy of perl, not the
+copy referenced by $^X. The following statements accomplish
+this goal, and produce a pathname that can be invoked as a
+command or referenced as a file.
+
+ use Config;
+ $secure_perl_path = $Config{perlpath};
+ if ($^O ne 'VMS')
+ {$secure_perl_path .= $Config{_exe}
+ unless $secure_perl_path =~ m/$Config{_exe}$/i;}
+
+=item ARGV
+X<ARGV>
+
+The special filehandle that iterates over command-line filenames in
+C<@ARGV>. Usually written as the null filehandle in the angle operator
+C<< <> >>. Note that currently C<ARGV> only has its magical effect
+within the C<< <> >> operator; elsewhere it is just a plain filehandle
+corresponding to the last file opened by C<< <> >>. In particular,
+passing C<\*ARGV> as a parameter to a function that expects a filehandle
+may not cause your function to automatically read the contents of all the
+files in C<@ARGV>.
+
+=item $ARGV
+X<$ARGV>
+
+contains the name of the current file when reading from <>.
+
+=item @ARGV
+X<@ARGV>
+
+The array @ARGV contains the command-line arguments intended for
+the script. C<$#ARGV> is generally the number of arguments minus
+one, because C<$ARGV[0]> is the first argument, I<not> the program's
+command name itself. See C<$0> for the command name.
+
+=item ARGVOUT
+X<ARGVOUT>
+
+The special filehandle that points to the currently open output file
+when doing edit-in-place processing with B<-i>. Useful when you have
+to do a lot of inserting and don't want to keep modifying $_. See
+L<perlrun> for the B<-i> switch.
+
+=item @F
+X<@F>
+
+The array @F contains the fields of each line read in when autosplit
+mode is turned on. See L<perlrun> for the B<-a> switch. This array
+is package-specific, and must be declared or given a full package name
+if not in package main when running under C<strict 'vars'>.
+
+=item @INC
+X<@INC>
+
+The array @INC contains the list of places that the C<do EXPR>,
+C<require>, or C<use> constructs look for their library files. It
+initially consists of the arguments to any B<-I> command-line
+switches, followed by the default Perl library, probably
+F</usr/local/lib/perl>, followed by ".", to represent the current
+directory. ("." will not be appended if taint checks are enabled, either by
+C<-T> or by C<-t>.) If you need to modify this at runtime, you should use
+the C<use lib> pragma to get the machine-dependent library properly
+loaded also:
+
+ use lib '/mypath/libdir/';
+ use SomeMod;
+
+You can also insert hooks into the file inclusion system by putting Perl
+code directly into @INC. Those hooks may be subroutine references, array
+references or blessed objects. See L<perlfunc/require> for details.
+
+=item @ARG
+
+=item @_
+X<@_> X<@ARG>
+
+Within a subroutine the array @_ contains the parameters passed to that
+subroutine. See L<perlsub>.
+
+=item %INC
+X<%INC>
+
+The hash %INC contains entries for each filename included via the
+C<do>, C<require>, or C<use> operators. The key is the filename
+you specified (with module names converted to pathnames), and the
+value is the location of the file found. The C<require>
+operator uses this hash to determine whether a particular file has
+already been included.
+
+If the file was loaded via a hook (e.g. a subroutine reference, see
+L<perlfunc/require> for a description of these hooks), this hook is
+by default inserted into %INC in place of a filename. Note, however,
+that the hook may have set the %INC entry by itself to provide some more
+specific info.
+
+=item %ENV
+
+=item $ENV{expr}
+X<%ENV>
+
+The hash %ENV contains your current environment. Setting a
+value in C<ENV> changes the environment for any child processes
+you subsequently fork() off.
+
+=item %SIG
+
+=item $SIG{expr}
+X<%SIG>
+
+The hash C<%SIG> contains signal handlers for signals. For example:
+
+ sub handler { # 1st argument is signal name
+ my($sig) = @_;
+ print "Caught a SIG$sig--shutting down\n";
+ close(LOG);
+ exit(0);
+ }
+
+ $SIG{'INT'} = \&handler;
+ $SIG{'QUIT'} = \&handler;
+ ...
+ $SIG{'INT'} = 'DEFAULT'; # restore default action
+ $SIG{'QUIT'} = 'IGNORE'; # ignore SIGQUIT
+
+Using a value of C<'IGNORE'> usually has the effect of ignoring the
+signal, except for the C<CHLD> signal. See L<perlipc> for more about
+this special case.
+
+Here are some other examples:
+
+ $SIG{"PIPE"} = "Plumber"; # assumes main::Plumber (not recommended)
+ $SIG{"PIPE"} = \&Plumber; # just fine; assume current Plumber
+ $SIG{"PIPE"} = *Plumber; # somewhat esoteric
+ $SIG{"PIPE"} = Plumber(); # oops, what did Plumber() return??
+
+Be sure not to use a bareword as the name of a signal handler,
+lest you inadvertently call it.
+
+If your system has the sigaction() function then signal handlers are
+installed using it. This means you get reliable signal handling.
+
+The default delivery policy of signals changed in Perl 5.8.0 from
+immediate (also known as "unsafe") to deferred, also known as
+"safe signals". See L<perlipc> for more information.
+
+Certain internal hooks can be also set using the %SIG hash. The
+routine indicated by C<$SIG{__WARN__}> is called when a warning message is
+about to be printed. The warning message is passed as the first
+argument. The presence of a C<__WARN__> hook causes the ordinary printing
+of warnings to C<STDERR> to be suppressed. You can use this to save warnings
+in a variable, or turn warnings into fatal errors, like this:
+
+ local $SIG{__WARN__} = sub { die $_[0] };
+ eval $proggie;
+
+As the C<'IGNORE'> hook is not supported by C<__WARN__>, you can
+disable warnings using the empty subroutine:
+
+ local $SIG{__WARN__} = sub {};
+
+The routine indicated by C<$SIG{__DIE__}> is called when a fatal exception
+is about to be thrown. The error message is passed as the first
+argument. When a C<__DIE__> hook routine returns, the exception
+processing continues as it would have in the absence of the hook,
+unless the hook routine itself exits via a C<goto>, a loop exit, or a C<die()>.
+The C<__DIE__> handler is explicitly disabled during the call, so that you
+can die from a C<__DIE__> handler. Similarly for C<__WARN__>.
+
+Due to an implementation glitch, the C<$SIG{__DIE__}> hook is called
+even inside an eval(). Do not use this to rewrite a pending exception
+in C<$@>, or as a bizarre substitute for overriding C<CORE::GLOBAL::die()>.
+This strange action at a distance may be fixed in a future release
+so that C<$SIG{__DIE__}> is only called if your program is about
+to exit, as was the original intent. Any other use is deprecated.
+
+C<__DIE__>/C<__WARN__> handlers are very special in one respect:
+they may be called to report (probable) errors found by the parser.
+In such a case the parser may be in inconsistent state, so any
+attempt to evaluate Perl code from such a handler will probably
+result in a segfault. This means that warnings or errors that
+result from parsing Perl should be used with extreme caution, like
+this:
+
+ require Carp if defined $^S;
+ Carp::confess("Something wrong") if defined &Carp::confess;
+ die "Something wrong, but could not load Carp to give backtrace...
+ To see backtrace try starting Perl with -MCarp switch";
+
+Here the first line will load Carp I<unless> it is the parser who
+called the handler. The second line will print backtrace and die if
+Carp was available. The third line will be executed only if Carp was
+not available.
+
+See L<perlfunc/die>, L<perlfunc/warn>, L<perlfunc/eval>, and
+L<warnings> for additional information.
+
+=back
+
+=head2 Error Indicators
+X<error> X<exception>
+
+The variables C<$@>, C<$!>, C<$^E>, and C<$?> contain information
+about different types of error conditions that may appear during
+execution of a Perl program. The variables are shown ordered by
+the "distance" between the subsystem which reported the error and
+the Perl process. They correspond to errors detected by the Perl
+interpreter, C library, operating system, or an external program,
+respectively.
+
+To illustrate the differences between these variables, consider the
+following Perl expression, which uses a single-quoted string:
+
+ eval q{
+ open my $pipe, "/cdrom/install |" or die $!;
+ my @res = <$pipe>;
+ close $pipe or die "bad pipe: $?, $!";
+ };
+
+After execution of this statement all 4 variables may have been set.
+
+C<$@> is set if the string to be C<eval>-ed did not compile (this
+may happen if C<open> or C<close> were imported with bad prototypes),
+or if Perl code executed during evaluation die()d . In these cases
+the value of $@ is the compile error, or the argument to C<die>
+(which will interpolate C<$!> and C<$?>). (See also L<Fatal>,
+though.)
+
+When the eval() expression above is executed, open(), C<< <PIPE> >>,
+and C<close> are translated to calls in the C run-time library and
+thence to the operating system kernel. C<$!> is set to the C library's
+C<errno> if one of these calls fails.
+
+Under a few operating systems, C<$^E> may contain a more verbose
+error indicator, such as in this case, "CDROM tray not closed."
+Systems that do not support extended error messages leave C<$^E>
+the same as C<$!>.
+
+Finally, C<$?> may be set to non-0 value if the external program
+F</cdrom/install> fails. The upper eight bits reflect specific
+error conditions encountered by the program (the program's exit()
+value). The lower eight bits reflect mode of failure, like signal
+death and core dump information See wait(2) for details. In
+contrast to C<$!> and C<$^E>, which are set only if error condition
+is detected, the variable C<$?> is set on each C<wait> or pipe
+C<close>, overwriting the old value. This is more like C<$@>, which
+on every eval() is always set on failure and cleared on success.
+
+For more details, see the individual descriptions at C<$@>, C<$!>, C<$^E>,
+and C<$?>.
+
+=head2 Technical Note on the Syntax of Variable Names
+
+Variable names in Perl can have several formats. Usually, they
+must begin with a letter or underscore, in which case they can be
+arbitrarily long (up to an internal limit of 251 characters) and
+may contain letters, digits, underscores, or the special sequence
+C<::> or C<'>. In this case, the part before the last C<::> or
+C<'> is taken to be a I<package qualifier>; see L<perlmod>.
+
+Perl variable names may also be a sequence of digits or a single
+punctuation or control character. These names are all reserved for
+special uses by Perl; for example, the all-digits names are used
+to hold data captured by backreferences after a regular expression
+match. Perl has a special syntax for the single-control-character
+names: It understands C<^X> (caret C<X>) to mean the control-C<X>
+character. For example, the notation C<$^W> (dollar-sign caret
+C<W>) is the scalar variable whose name is the single character
+control-C<W>. This is better than typing a literal control-C<W>
+into your program.
+
+Finally, new in Perl 5.6, Perl variable names may be alphanumeric
+strings that begin with control characters (or better yet, a caret).
+These variables must be written in the form C<${^Foo}>; the braces
+are not optional. C<${^Foo}> denotes the scalar variable whose
+name is a control-C<F> followed by two C<o>'s. These variables are
+reserved for future special uses by Perl, except for the ones that
+begin with C<^_> (control-underscore or caret-underscore). No
+control-character name that begins with C<^_> will acquire a special
+meaning in any future version of Perl; such names may therefore be
+used safely in programs. C<$^_> itself, however, I<is> reserved.
+
+Perl identifiers that begin with digits, control characters, or
+punctuation characters are exempt from the effects of the C<package>
+declaration and are always forced to be in package C<main>; they are
+also exempt from C<strict 'vars'> errors. A few other names are also
+exempt in these ways:
+
+ ENV STDIN
+ INC STDOUT
+ ARGV STDERR
+ ARGVOUT _
+ SIG
+
+In particular, the new special C<${^_XYZ}> variables are always taken
+to be in package C<main>, regardless of any C<package> declarations
+presently in scope.
+
+=head1 BUGS
+
+Due to an unfortunate accident of Perl's implementation, C<use
+English> imposes a considerable performance penalty on all regular
+expression matches in a program, regardless of whether they occur
+in the scope of C<use English>. For that reason, saying C<use
+English> in libraries is strongly discouraged. See the
+Devel::SawAmpersand module documentation from CPAN
+( http://www.cpan.org/modules/by-module/Devel/ )
+for more information. Writing C<use English '-no_match_vars';>
+avoids the performance penalty.
+
+Having to even think about the C<$^S> variable in your exception
+handlers is simply wrong. C<$SIG{__DIE__}> as currently implemented
+invites grievous and difficult to track down errors. Avoid it
+and use an C<END{}> or CORE::GLOBAL::die override instead.