diff options
Diffstat (limited to 'ext')
28 files changed, 5242 insertions, 2109 deletions
diff --git a/ext/Pod-Html/lib/Pod/Html.pm b/ext/Pod-Html/lib/Pod/Html.pm index b945419c97..5ae242b96f 100644 --- a/ext/Pod-Html/lib/Pod/Html.pm +++ b/ext/Pod-Html/lib/Pod/Html.pm @@ -3,7 +3,7 @@ use strict; require Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); -$VERSION = 1.11; +$VERSION = 1.12; @ISA = qw(Exporter); @EXPORT = qw(pod2html htmlify); @EXPORT_OK = qw(anchorify); @@ -11,11 +11,13 @@ $VERSION = 1.11; use Carp; use Config; use Cwd; +use File::Basename; use File::Spec; use File::Spec::Unix; use Getopt::Long; +use Pod::Simple::Search; -use locale; # make \w work right in non-ASCII lands +use locale; # make \w work right in non-ASCII lands =head1 NAME @@ -29,8 +31,7 @@ Pod::Html - module to convert pod files to HTML =head1 DESCRIPTION Converts files from pod format (see L<perlpod>) to HTML format. It -can automatically generate indexes and cross-references, and it keeps -a cache of things it knows how to cross-reference. +can automatically generate indexes and cross-references. =head1 FUNCTIONS @@ -40,7 +41,6 @@ a cache of things it knows how to cross-reference. "--podpath=lib:ext:pod:vms", "--podroot=/usr/src/perl", "--htmlroot=/perl/nmanual", - "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop", "--recurse", "--infile=foo.pod", "--outfile=/perl/nmanual/foo.html"); @@ -51,16 +51,10 @@ pod2html takes the following arguments: =item backlink - --backlink="Back to Top" + --backlink -Adds "Back to Top" links in front of every C<head1> heading (except for -the first). By default, no backlinks are generated. - -=item cachedir - - --cachedir=name - -Creates the item and directory caches in the given directory. +Turns every C<head1> heading into a link back to the top of the page. +By default, no backlinks are generated. =item css @@ -69,12 +63,6 @@ Creates the item and directory caches in the given directory. Specify the URL of a cascading style sheet. Also disables all HTML/CSS C<style> attributes that are output by default (to avoid conflicts). -=item flush - - --flush - -Flushes the item and directory caches. - =item header --header @@ -89,20 +77,6 @@ section. By default, no headers are generated. Displays the usage message. -=item hiddendirs - - --hiddendirs - --nohiddendirs - -Include hidden directories in the search for POD's in podpath if recurse -is set. -The default is not to traverse any directory whose name begins with C<.>. -See L</"podpath"> and L</"recurse">. - -[This option is for backward compatibility only. -It's hard to imagine that one would usefully create a module with a -name component beginning with C<.>.] - =item htmldir --htmldir=name @@ -134,19 +108,6 @@ behaviour. Specify the pod file to convert. Input is taken from STDIN if no infile is specified. -=item libpods - - --libpods=name:...:name - -List of page names (eg, "perlfunc") which contain linkable C<=item>s. - -=item netscape - - --netscape - --nonetscape - -B<Deprecated>, has no effect. For backwards compatibility only. - =item outfile --outfile=name @@ -154,6 +115,14 @@ B<Deprecated>, has no effect. For backwards compatibility only. Specify the HTML file to create. Output goes to STDOUT if no outfile is specified. +=item poderrors + + --poderrors + --nopoderrors + +Include a "POD ERRORS" section in the outfile if there were any POD +errors in the infile. This section is included by default. + =item podpath --podpath=name:...:name @@ -165,7 +134,8 @@ HTML converted forms can be linked to in cross references. --podroot=name -Specify the base directory for finding library pods. +Specify the base directory for finding library pods. Default is the +current working directory. =item quiet @@ -219,7 +189,9 @@ Uses C<$Config{pod2html}> to setup default options. =head1 AUTHOR -Tom Christiansen, E<lt>tchrist@perl.comE<gt>. +Marc Green, E<lt>marcgreen@cpan.orgE<gt>. + +Original version by Tom Christiansen, E<lt>tchrist@perl.comE<gt>. =head1 SEE ALSO @@ -231,258 +203,136 @@ This program is distributed under the Artistic License. =cut -my($Cachedir); -my($Dircache, $Itemcache); -my @Begin_Stack; -my @Libpods; my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl); my($Podfile, @Podpath, $Podroot); +my $Poderrors; my $Css; my $Recurse; my $Quiet; -my $HiddenDirs; my $Verbose; my $Doindex; my $Backlink; -my($Listlevel, @Listtype); -my $ListNewTerm; -use vars qw($Ignore); # need to localize it later. -my(%Items_Named, @Items_Seen); my($Title, $Header); -my $Top; -my $Paragraph; - -my %Sections; - -# Caches -my %Pages = (); # associative array used to find the location - # of pages referenced by L<> links. -my %Items = (); # associative array used to find the location - # of =item directives referenced by C<> links - -my %Local_Items; -my $Is83; +my %Pages = (); # associative array used to find the location + # of pages referenced by L<> links. my $Curdir = File::Spec->curdir; init_globals(); sub init_globals { - $Cachedir = "."; # The directory to which item and directory - # caches will be written. - - $Dircache = "pod2htmd.tmp"; - $Itemcache = "pod2htmi.tmp"; - - @Begin_Stack = (); # begin/end stack - - @Libpods = (); # files to search for links from C<> directives - $Htmlroot = "/"; # http-server base directory from which all - # relative paths in $podpath stem. - $Htmldir = ""; # The directory to which the html pages - # will (eventually) be written. - $Htmlfile = ""; # write to stdout by default - $Htmlfileurl = ""; # The url that other files would use to - # refer to this file. This is only used - # to make relative urls that point to - # other files. - - $Podfile = ""; # read from stdin by default - @Podpath = (); # list of directories containing library pods. - $Podroot = $Curdir; # filesystem base directory from which all - # relative paths in $podpath stem. + $Htmlroot = "/"; # http-server base directory from which all + # relative paths in $podpath stem. + $Htmldir = ""; # The directory to which the html pages + # will (eventually) be written. + $Htmlfile = ""; # write to stdout by default + $Htmlfileurl = ""; # The url that other files would use to + # refer to this file. This is only used + # to make relative urls that point to + # other files. + + $Poderrors = 1; + $Podfile = ""; # read from stdin by default + @Podpath = (); # list of directories containing library pods. + $Podroot = $Curdir; # filesystem base directory from which all + # relative paths in $podpath stem. $Css = ''; # Cascading style sheet - $Recurse = 1; # recurse on subdirectories in $podpath. - $Quiet = 0; # not quiet by default - $Verbose = 0; # not verbose by default - $Doindex = 1; # non-zero if we should generate an index - $Backlink = ''; # text for "back to top" links - $Listlevel = 0; # current list depth - @Listtype = (); # list types for open lists - $ListNewTerm = 0; # indicates new term in definition list; used - # to correctly open/close <dd> tags - $Ignore = 1; # whether or not to format text. we don't - # format text until we hit our first pod - # directive. - - @Items_Seen = (); # for multiples of the same item in perlfunc - %Items_Named = (); - $Header = 0; # produce block header/footer - $Title = ''; # title to give the pod(s) - $Top = 1; # true if we are at the top of the doc. used - # to prevent the first <hr /> directive. - $Paragraph = ''; # which paragraph we're processing (used - # for error messages) - %Sections = (); # sections within this page - - %Local_Items = (); - $Is83 = $^O eq 'dos'; # Is it an 8.3 filesystem? -} - -# -# clean_data: global clean-up of pod data -# -sub clean_data($){ - my( $dataref ) = @_; - for my $i ( 0..$#{$dataref} ) { - ${$dataref}[$i] =~ s/\s+\Z//; - - # have a look for all-space lines - if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){ - my @chunks = split( /^\s+$/m, ${$dataref}[$i] ); - splice( @$dataref, $i, 1, @chunks ); - } - } + $Recurse = 1; # recurse on subdirectories in $podpath. + $Quiet = 0; # not quiet by default + $Verbose = 0; # not verbose by default + $Doindex = 1; # non-zero if we should generate an index + $Backlink = 0; # no backlinks added by default + $Header = 0; # produce block header/footer + $Title = ''; # title to give the pod(s) } - sub pod2html { local(@ARGV) = @_; - local($/); local $_; init_globals(); - - $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN()); - - # cache of %Pages and %Items from last time we ran pod2html - - #undef $opt_help if defined $opt_help; - - # parse the command-line parameters parse_command_line(); - # escape the backlink argument (same goes for title but is done later...) - $Backlink = html_escape($Backlink) if defined $Backlink; + # prevent '//' in urls + $Htmlroot = "" if $Htmlroot eq "/"; + $Htmldir =~ s#/\z##; - # set some variables to their default values if necessary - my $pod; - unless (@ARGV && $ARGV[0]) { - if ($Podfile and $Podfile ne '-') { - open $pod, '<', $Podfile - or die "$0: cannot open $Podfile file for input: $!\n"; - } else { - open $pod, '-'; - } - } else { - $Podfile = $ARGV[0]; # XXX: might be more filenames - $pod = *ARGV; - } - $Htmlfile = "-" unless $Htmlfile; # stdout - $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a // - $Htmldir =~ s#/\z## ; # so we don't get a // if ( $Htmlroot eq '' && defined( $Htmldir ) && $Htmldir ne '' && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir - ) - { - # Set the 'base' url for this file, so that we can use it - # as the location from which to calculate relative links - # to other files. If this is '', then absolute links will - # be used throughout. - $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1); - } - - # read the pod a paragraph at a time - warn "Scanning for sections in input file(s)\n" if $Verbose; - $/ = ""; - my @poddata = <$pod>; - close $pod; - - # be eol agnostic - for (@poddata) { - if (/\r/) { - if (/\r\n/) { - @poddata = map { s/\r\n/\n/g; - /\n\n/ ? - map { "$_\n\n" } split /\n\n/ : - $_ } @poddata; - } else { - @poddata = map { s/\r/\n/g; - /\n\n/ ? - map { "$_\n\n" } split /\n\n/ : - $_ } @poddata; - } - last; - } - } - - clean_data( \@poddata ); - - # scan the pod for =head[1-6] directives and build an index - my $index = scan_headings(\%Sections, @poddata); - - unless($index) { - warn "No headings in $Podfile\n" if $Verbose; - } - - # open the output file - my $html; - if($Htmlfile and $Htmlfile ne '-') { - open $html, ">", $Htmlfile - or die "$0: cannot open $Htmlfile file for output: $!\n"; - } else { - open $html, ">-"; - } - - # put a title in the HTML file if one wasn't specified - if ($Title eq '') { - TITLE_SEARCH: { - for (my $i = 0; $i < @poddata; $i++) { - if ($poddata[$i] =~ /^=head1\s*NAME\b/m) { - for my $para ( @poddata[$i, $i+1] ) { - last TITLE_SEARCH - if ($Title) = $para =~ /(\S+\s+-+.*\S)/s; - } - } - - } - } - } - if (!$Title and $Podfile =~ /\.pod\z/) { - # probably a split pod so take first =head[12] as title - for (my $i = 0; $i < @poddata; $i++) { - last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/; - } - warn "adopted '$Title' as title for $Podfile\n" - if $Verbose and $Title; - } - if ($Title) { - $Title =~ s/\s*\(.*\)//; - } else { - warn "$0: no title for $Podfile.\n" unless $Quiet; - $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s; - $Title = ($Podfile eq "-" ? 'No Title' : $1); - warn "using $Title" if $Verbose; - } + ) { + # Set the 'base' url for this file, so that we can use it + # as the location from which to calculate relative links + # to other files. If this is '', then absolute links will + # be used throughout. + #$Htmlfileurl = "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1); + # Is the above not just "$Htmlfileurl = $Htmlfile"? + $Htmlfileurl = Unixify::unixify($Htmlfile); + + } + + my $pwd = getcwd(); + chdir($Podroot) || die "$0: error changing to directory $Podroot: $!\n"; + + # find all pod modules/pages in podpath, store in %Pages + # - callback used to remove Podroot and extension from each file + # - laborious to allow '.' in dirnames (e.g., /usr/share/perl/5.14.1) + Pod::Simple::Search->new->inc(0)->verbose($Verbose)->laborious(1) + ->callback(\&_save_page)->recurse($Recurse)->survey(@Podpath); + + chdir($pwd) || die "$0: error changing to directory $pwd: $!\n"; + + # set options for the parser + my $parser = Pod::Simple::XHTML::LocalPodLinks->new(); + $parser->anchor_items(1); # the old Pod::Html always did + $parser->backlink($Backlink); # linkify =head1 directives + $parser->htmldir($Htmldir); + $parser->htmlfileurl($Htmlfileurl); + $parser->htmlroot($Htmlroot); + $parser->index($Doindex); + $parser->no_errata_section(!$Poderrors); # note the inverse + $parser->output_string(\my $output); # written to file later + $parser->pages(\%Pages); + $parser->quiet($Quiet); + $parser->verbose($Verbose); + + # XXX: implement default title generator in pod::simple::xhtml + # copy the way the old Pod::Html did it $Title = html_escape($Title); + # We need to add this ourselves because we use our own header, not + # ::XHTML's header. We need to set $parser->backlink to linkify + # the =head1 directives + my $bodyid = $Backlink ? ' id="_podtop_"' : ''; + my $csslink = ''; my $bodystyle = ' style="background-color: white"'; my $tdstyle = ' style="background-color: #cccccc"'; if ($Css) { - $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />); - $csslink =~ s,\\,/,g; - $csslink =~ s,(/.):,$1|,; - $bodystyle = ''; - $tdstyle = ''; + $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />); + $csslink =~ s,\\,/,g; + $csslink =~ s,(/.):,$1|,; + $bodystyle = ''; + $tdstyle= ''; } - my $block = $Header ? <<END_OF_BLOCK : ''; + # header/footer block + my $block = $Header ? <<END_OF_BLOCK : ''; <table border="0" width="100%" cellspacing="0" cellpadding="3"> -<tr><td class="block"$tdstyle valign="middle"> -<big><strong><span class="block"> $Title</span></strong></big> +<tr><td class="_podblock_"$tdstyle valign="middle"> +<big><strong><span class="_podblock_"> $Title</span></strong></big> </td></tr> </table> END_OF_BLOCK - print $html <<END_OF_HEAD; + # create own header/footer because of --header + $parser->html_header(<<"HTMLHEAD"); <?xml version="1.0" ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> @@ -492,147 +342,43 @@ END_OF_BLOCK <link rev="made" href="mailto:$Config{perladmin}" /> </head> -<body$bodystyle> +<body$bodyid$bodystyle> $block -END_OF_HEAD - - # load/reload/validate/cache %Pages and %Items - get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse); - - # scan the pod for =item directives - scan_items( \%Local_Items, "", @poddata); - - # put an index at the top of the file. note, if $Doindex is 0 we - # still generate an index, but surround it with an html comment. - # that way some other program can extract it if desired. - $index =~ s/--+/-/g; - - my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : ""; - - unless ($Doindex) - { - $index = qq(<!--\n$index\n-->\n); - } +HTMLHEAD - print $html <<"END_OF_INDEX"; - -<!-- INDEX BEGIN --> -<div name="index"> -<p><a name=\"__index__\"></a></p> -$index -$hr -</div> -<!-- INDEX END --> - -END_OF_INDEX - - # now convert this file - my $after_item; # set to true after an =item - warn "Converting input file $Podfile\n" if $Verbose; - foreach my $i (0..$#poddata){ - $_ = $poddata[$i]; - $Paragraph = $i+1; - if (/^(=.*)/s) { # is it a pod directive? - $Ignore = 0; - $after_item = 0; - $_ = $1; - if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin - process_begin($html, $1, $2); - } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end - process_end($1, $2); - } elsif (/^=cut/) { # =cut - process_cut(); - } elsif (/^=pod/) { # =pod - process_pod(); - } else { - next if @Begin_Stack && $Begin_Stack[-1] ne 'html'; - - if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading - process_head( $html, $1, $2, $Doindex && $index ); - } elsif (/^=item\s*(.*\S)?/sm) { # =item text - process_item( $html, $1 ); - $after_item = 1; - } elsif (/^=over\s*(.*)/) { # =over N - process_over(); - } elsif (/^=back/) { # =back - process_back( $html ); - } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for - process_for( $html, $1, $2 ); - } else { - /^=(\S*)\s*/; - warn "$0: $Podfile: unknown pod directive '$1' in " - . "paragraph $Paragraph. ignoring.\n" unless $Quiet; - } - } - $Top = 0; - } - else { - next if $Ignore; - if (@Begin_Stack) { - print $html $_ if $Begin_Stack[-1] eq 'html'; - next; - } - my $text = $_; - - # Open tag for definition list as we have something to put in it - if( $ListNewTerm ){ - print $html "<dd>\n"; - $ListNewTerm = 0; - } - - if( $text =~ /\A\s+/ ){ - process_pre( \$text ); - print $html "<pre>\n$text</pre>\n"; - - } else { - process_text( \$text ); - - # experimental: check for a paragraph where all lines - # have some ...\t...\t...\n pattern - if( $text =~ /\t/ ){ - my @lines = split( "\n", $text ); - if( @lines > 1 ){ - my $all = 2; - foreach my $line ( @lines ){ - if( $line =~ /\S/ && $line !~ /\t/ ){ - $all--; - last if $all == 0; - } - } - if( $all > 0 ){ - $text =~ s/\t+/<td>/g; - $text =~ s/^/<tr><td>/gm; - $text = '<table cellspacing="0" cellpadding="0">' . - $text . '</table>'; - } - } - } - ## end of experimental - - print $html "<p>$text</p>\n"; - } - $after_item = 0; - } - } - - # finish off any pending directives - finish_list( $html ); - - # link to page index - print $html "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n" - if $Doindex and $index and $Backlink; - - print $html <<END_OF_TAIL; + $parser->html_footer(<<"HTMLFOOT"); $block </body> </html> -END_OF_TAIL +HTMLFOOT - # close the html file - close $html or die "Failed to close $Htmlfile: $!"; + my $input; + unless (@ARGV && $ARGV[0]) { + if ($Podfile and $Podfile ne '-') { + $input = $Podfile; + } else { + $input = '-'; # XXX: make a test case for this + } + } else { + $Podfile = $ARGV[0]; + $input = *ARGV; + } + + warn "Converting input file $Podfile\n" if $Verbose; + $parser->parse_file($input); - warn "Finished\n" if $Verbose; + # Write output to file + $Htmlfile = "-" unless $Htmlfile; # stdout + my $fhout; + if($Htmlfile and $Htmlfile ne '-') { + open $fhout, ">", $Htmlfile + or die "$0: cannot open $Htmlfile file for output: $!\n"; + } else { + open $fhout, ">-"; + } + print $fhout $output; + close $fhout or die "Failed to close $Htmlfile: $!"; } ############################################################################## @@ -643,1167 +389,84 @@ sub usage { die <<END_OF_USAGE; Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> --podpath=<name>:...:<name> --podroot=<name> - --libpods=<name>:...:<name> --recurse --verbose --index - --netscape --norecurse --noindex --cachedir=<name> - - --backlink - set text for "back to top" links (default: none). - --cachedir - directory for the item and directory cache files. - --css - stylesheet URL - --flush - flushes the item and directory caches. - --[no]header - produce block header/footer (default is no headers). - --help - prints this message. - --hiddendirs - search hidden directories in podpath - --htmldir - directory for resulting HTML files. - --htmlroot - http-server base directory from which all relative paths - in podpath stem (default is /). - --[no]index - generate an index at the top of the resulting html - (default behaviour). - --infile - filename for the pod to convert (input taken from stdin - by default). - --libpods - colon-separated list of pages to search for =item pod - directives in as targets of C<> and implicit links (empty - by default). note, these are not filenames, but rather - page names like those that appear in L<> links. - --outfile - filename for the resulting html file (output sent to - stdout by default). - --podpath - colon-separated list of directories containing library - pods (empty by default). - --podroot - filesystem base directory from which all relative paths - in podpath stem (default is .). - --[no]quiet - suppress some benign warning messages (default is off). - --[no]recurse - recurse on those subdirectories listed in podpath - (default behaviour). - --title - title that will appear in resulting html file. - --[no]verbose - self-explanatory (off by default). - --[no]netscape - deprecated, has no effect. for backwards compatibility only. + --recurse --verbose --index --norecurse --noindex + + --[no]backlink - turn =head1 directives into links pointing to the top of + the page (off by default). + --css - stylesheet URL + --[no]header - produce block header/footer (default is no headers). + --help - prints this message. + --htmldir - directory for resulting HTML files. + --htmlroot - http-server base directory from which all relative paths + in podpath stem (default is /). + --[no]index - generate an index at the top of the resulting html + (default behaviour). + --infile - filename for the pod to convert (input taken from stdin + by default). + --outfile - filename for the resulting html file (output sent to + stdout by default). + --[no]poderrors - include a POD ERRORS section in the output if there were + any POD errors in the input (default behavior). + --podpath - colon-separated list of directories containing library + pods (empty by default). + --podroot - filesystem base directory from which all relative paths + in podpath stem (default is .). + --[no]quiet - suppress some benign warning messages (default is off). + --[no]recurse - recurse on those subdirectories listed in podpath + (default behaviour). + --title - title that will appear in resulting html file. + --[no]verbose - self-explanatory (off by default). END_OF_USAGE } sub parse_command_line { - my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help, - $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods, - $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet, - $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs); + my ($opt_backlink,$opt_css,$opt_header,$opt_help, + $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile, + $opt_outfile,$opt_poderrors,$opt_podpath,$opt_podroot, + $opt_quiet,$opt_recurse,$opt_title,$opt_verbose); unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html}; my $result = GetOptions( - 'backlink=s' => \$opt_backlink, - 'cachedir=s' => \$opt_cachedir, - 'css=s' => \$opt_css, - 'flush' => \$opt_flush, - 'header!' => \$opt_header, - 'help' => \$opt_help, - 'hiddendirs!'=> \$opt_hiddendirs, - 'htmldir=s' => \$opt_htmldir, - 'htmlroot=s' => \$opt_htmlroot, - 'index!' => \$opt_index, - 'infile=s' => \$opt_infile, - 'libpods=s' => \$opt_libpods, - 'netscape!' => \$opt_netscape, - 'outfile=s' => \$opt_outfile, - 'podpath=s' => \$opt_podpath, - 'podroot=s' => \$opt_podroot, - 'quiet!' => \$opt_quiet, - 'recurse!' => \$opt_recurse, - 'title=s' => \$opt_title, - 'verbose!' => \$opt_verbose, - ); + 'backlink!' => \$opt_backlink, + 'css=s' => \$opt_css, + 'help' => \$opt_help, + 'header!' => \$opt_header, + 'htmldir=s' => \$opt_htmldir, + 'htmlroot=s' => \$opt_htmlroot, + 'index!' => \$opt_index, + 'infile=s' => \$opt_infile, + 'outfile=s' => \$opt_outfile, + 'poderrors!' => \$opt_poderrors, + 'podpath=s' => \$opt_podpath, + 'podroot=s' => \$opt_podroot, + 'quiet!' => \$opt_quiet, + 'recurse!' => \$opt_recurse, + 'title=s' => \$opt_title, + 'verbose!' => \$opt_verbose, + ); usage("-", "invalid parameters") if not $result; - usage("-") if defined $opt_help; # see if the user asked for help - $opt_help = ""; # just to make -w shut-up. + usage("-") if defined $opt_help; # see if the user asked for help + $opt_help = ""; # just to make -w shut-up. @Podpath = split(":", $opt_podpath) if defined $opt_podpath; - @Libpods = split(":", $opt_libpods) if defined $opt_libpods; - - $Backlink = $opt_backlink if defined $opt_backlink; - $Cachedir = $opt_cachedir if defined $opt_cachedir; - $Css = $opt_css if defined $opt_css; - $Header = $opt_header if defined $opt_header; - $Htmldir = $opt_htmldir if defined $opt_htmldir; - $Htmlroot = $opt_htmlroot if defined $opt_htmlroot; - $Doindex = $opt_index if defined $opt_index; - $Podfile = $opt_infile if defined $opt_infile; - $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs; - $Htmlfile = $opt_outfile if defined $opt_outfile; - $Podroot = $opt_podroot if defined $opt_podroot; - $Quiet = $opt_quiet if defined $opt_quiet; - $Recurse = $opt_recurse if defined $opt_recurse; - $Title = $opt_title if defined $opt_title; - $Verbose = $opt_verbose if defined $opt_verbose; - - warn "Flushing item and directory caches\n" - if $opt_verbose && defined $opt_flush; - $Dircache = "$Cachedir/pod2htmd.tmp"; - $Itemcache = "$Cachedir/pod2htmi.tmp"; - if (defined $opt_flush) { - 1 while unlink($Dircache, $Itemcache); - } -} - - -my $Saved_Cache_Key; - -sub get_cache { - my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; - my @cache_key_args = @_; - # A first-level cache: - # Don't bother reading the cache files if they still apply - # and haven't changed since we last read them. - - my $this_cache_key = cache_key(@cache_key_args); - - return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key; - - # load the cache of %Pages and %Items if possible. $tests will be - # non-zero if successful. - my $tests = 0; - if (-f $dircache && -f $itemcache) { - warn "scanning for item cache\n" if $Verbose; - $tests = load_cache($dircache, $itemcache, $podpath, $podroot); - } - - # if we didn't succeed in loading the cache then we must (re)build - # %Pages and %Items. - if (!$tests) { - warn "scanning directories in pod-path\n" if $Verbose; - scan_podpath($podroot, $recurse, 0); - } - $Saved_Cache_Key = cache_key(@cache_key_args); -} - -sub cache_key { - my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; - return join('!', $dircache, $itemcache, $recurse, - @$podpath, $podroot, stat($dircache), stat($itemcache)); -} - -# -# load_cache - tries to find if the caches stored in $dircache and $itemcache -# are valid caches of %Pages and %Items. if they are valid then it loads -# them and returns a non-zero value. -# -sub load_cache { - my($dircache, $itemcache, $podpath, $podroot) = @_; - my($tests); - local $_; - - $tests = 0; - - open(CACHE, "<$itemcache") || - die "$0: error opening $itemcache for reading: $!\n"; - $/ = "\n"; - - # is it the same podpath? - $_ = <CACHE>; - chomp($_); - $tests++ if (join(":", @$podpath) eq $_); - - # is it the same podroot? - $_ = <CACHE>; - chomp($_); - $tests++ if ($podroot eq $_); - - # load the cache if its good - if ($tests != 2) { - close(CACHE); - return 0; - } - - warn "loading item cache\n" if $Verbose; - while (<CACHE>) { - /(.*?) (.*)$/; - $Items{$1} = $2; - } - close(CACHE); - - warn "scanning for directory cache\n" if $Verbose; - open(CACHE, "<$dircache") || - die "$0: error opening $dircache for reading: $!\n"; - $/ = "\n"; - $tests = 0; - - # is it the same podpath? - $_ = <CACHE>; - chomp($_); - $tests++ if (join(":", @$podpath) eq $_); - - # is it the same podroot? - $_ = <CACHE>; - chomp($_); - $tests++ if ($podroot eq $_); - - # load the cache if its good - if ($tests != 2) { - close(CACHE); - return 0; - } - - warn "loading directory cache\n" if $Verbose; - while (<CACHE>) { - /(.*?) (.*)$/; - $Pages{$1} = $2; - } - - close(CACHE); - - return 1; -} - -# -# scan_podpath - scans the directories specified in @podpath for directories, -# .pod files, and .pm files. it also scans the pod files specified in -# @Libpods for =item directives. -# -sub scan_podpath { - my($podroot, $recurse, $append) = @_; - my($pwd, $dir); - my($libpod, $dirname, $pod, @files, @poddata); - - unless($append) { - %Items = (); - %Pages = (); - } - - # scan each directory listed in @Podpath - $pwd = getcwd(); - chdir($podroot) - || die "$0: error changing to directory $podroot: $!\n"; - foreach $dir (@Podpath) { - scan_dir($dir, $recurse); - } - - # scan the pods listed in @Libpods for =item directives - foreach $libpod (@Libpods) { - # if the page isn't defined then we won't know where to find it - # on the system. - next unless defined $Pages{$libpod} && $Pages{$libpod}; - - # if there is a directory then use the .pod and .pm files within it. - # NOTE: Only finds the first so-named directory in the tree. -# if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { - if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { - # find all the .pod and .pm files within the directory - $dirname = $1; - opendir(DIR, $dirname) || - die "$0: error opening directory $dirname: $!\n"; - @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR)); - closedir(DIR); - - # scan each .pod and .pm file for =item directives - foreach $pod (@files) { - open my $fh, '<', "$dirname/$pod" - or die "$0: error opening $dirname/$pod for input: $!\n"; - @poddata = <$fh>; - close $fh; - clean_data( \@poddata ); - - scan_items( \%Items, "$dirname/$pod", @poddata); - } - - # use the names of files as =item directives too. -### Don't think this should be done this way - confuses issues.(WL) -### foreach $pod (@files) { -### $pod =~ /^(.*)(\.pod|\.pm)$/; -### $Items{$1} = "$dirname/$1.html" if $1; -### } - } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ || - $Pages{$libpod} =~ /([^:]*\.pm):/) { - # scan the .pod or .pm file for =item directives - $pod = $1; - open my $fh, '<', $pod - or die "$0: error opening $pod for input: $!\n"; - @poddata = <$fh>; - close $fh; - clean_data( \@poddata ); - - scan_items( \%Items, "$pod", @poddata); - } else { - warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet; - } - } - @poddata = (); # clean-up a bit - - chdir($pwd) - || die "$0: error changing to directory $pwd: $!\n"; - - # cache the item list for later use - warn "caching items for later use\n" if $Verbose; - open my $cache, '>', $Itemcache - or die "$0: error open $Itemcache for writing: $!\n"; - - print $cache join(":", @Podpath) . "\n$podroot\n"; - foreach my $key (keys %Items) { - print $cache "$key $Items{$key}\n"; - } - - close $cache or die "error closing $Itemcache: $!"; - - # cache the directory list for later use - warn "caching directories for later use\n" if $Verbose; - open $cache, '>', $Dircache - or die "$0: error open $Dircache for writing: $!\n"; - - print $cache join(":", @Podpath) . "\n$podroot\n"; - foreach my $key (keys %Pages) { - print $cache "$key $Pages{$key}\n"; - } - - close $cache or die "error closing $Dircache: $!"; -} - -# -# scan_dir - scans the directory specified in $dir for subdirectories, .pod -# files, and .pm files. notes those that it finds. this information will -# be used later in order to figure out where the pages specified in L<> -# links are on the filesystem. -# -sub scan_dir { - my($dir, $recurse) = @_; - my($t, @subdirs, @pods, $pod, $dirname, @dirs); - local $_; - - @subdirs = (); - @pods = (); - - opendir(DIR, $dir) || - die "$0: error opening directory $dir: $!\n"; - while (defined($_ = readdir(DIR))) { - if (-d "$dir/$_" && $_ ne "." && $_ ne ".." - && ($HiddenDirs || !/^\./) - ) { # directory - $Pages{$_} = "" unless defined $Pages{$_}; - $Pages{$_} .= "$dir/$_:"; - push(@subdirs, $_); - } elsif (/\.pod\z/) { # .pod - s/\.pod\z//; - $Pages{$_} = "" unless defined $Pages{$_}; - $Pages{$_} .= "$dir/$_.pod:"; - push(@pods, "$dir/$_.pod"); - } elsif (/\.html\z/) { # .html - s/\.html\z//; - $Pages{$_} = "" unless defined $Pages{$_}; - $Pages{$_} .= "$dir/$_.pod:"; - } elsif (/\.pm\z/) { # .pm - s/\.pm\z//; - $Pages{$_} = "" unless defined $Pages{$_}; - $Pages{$_} .= "$dir/$_.pm:"; - push(@pods, "$dir/$_.pm"); - } elsif (-T "$dir/$_") { # script(?) - local *F; - if (open(F, "$dir/$_")) { - my $line; - while (defined($line = <F>)) { - if ($line =~ /^=(?:pod|head1)/) { - $Pages{$_} = "" unless defined $Pages{$_}; - $Pages{$_} .= "$dir/$_.pod:"; - last; - } - } - close(F); - } - } - } - closedir(DIR); - - # recurse on the subdirectories if necessary - if ($recurse) { - foreach my $subdir (@subdirs) { - scan_dir("$dir/$subdir", $recurse); - } - } -} - -# -# scan_headings - scan a pod file for head[1-6] tags, note the tags, and -# build an index. -# -sub scan_headings { - my($sections, @data) = @_; - my($tag, $which_head, $otitle, $listdepth, $index); - - local $Ignore = 0; - - $listdepth = 0; - $index = ""; - - # scan for =head directives, note their name, and build an index - # pointing to each of them. - foreach my $line (@data) { - if ($line =~ /^=(head)([1-6])\s+(.*)/) { - ($tag, $which_head, $otitle) = ($1,$2,$3); - - my $title = depod( $otitle ); - my $name = anchorify( $title ); - $$sections{$name} = 1; - $title = process_text( \$otitle ); - - while ($which_head != $listdepth) { - if ($which_head > $listdepth) { - $index .= "\n" . ("\t" x $listdepth) . "<ul>\n"; - $listdepth++; - } elsif ($which_head < $listdepth) { - $listdepth--; - $index .= "\n" . ("\t" x $listdepth) . "</ul>\n"; - } - } - - $index .= "\n" . ("\t" x $listdepth) . "<li>" . - "<a href=\"#" . $name . "\">" . - $title . "</a></li>"; - } - } - - # finish off the lists - while ($listdepth--) { - $index .= "\n" . ("\t" x $listdepth) . "</ul>\n"; - } - - # get rid of bogus lists - $index =~ s,\t*<ul>\s*</ul>\n,,g; - - return $index; -} - -# -# scan_items - scans the pod specified by $pod for =item directives. we -# will use this information later on in resolving C<> links. -# -sub scan_items { - my( $itemref, $pod, @poddata ) = @_; - my($i, $item); - local $_; - - $pod =~ s/\.pod\z//; - $pod .= ".html" if $pod; - - foreach $i (0..$#poddata) { - my $txt = depod( $poddata[$i] ); - - # figure out what kind of item it is. - # Build string for referencing this item. - if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bulleted list - next unless $1; - $item = $1; - } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list - $item = $1; - } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # definition list - $item = $1; - } else { - next; - } - my $fid = fragment_id( $item ); - $$itemref{$fid} = "$pod" if $fid; - } -} - -# -# process_head - convert a pod head[1-6] tag and convert it to HTML format. -# -sub process_head { - my($fh, $tag, $heading, $hasindex) = @_; - - # figure out the level of the =head - $tag =~ /head([1-6])/; - my $level = $1; - - finish_list( $fh ); - - print $fh "<p>\n"; - if( $level == 1 && ! $Top ){ - print $fh "<a href=\"#__index__\"><small>$Backlink</small></a>\n" - if $hasindex and $Backlink; - print $fh "</p>\n<hr />\n" - } else { - print $fh "</p>\n"; - } - - my $name = anchorify( depod( $heading ) ); - my $convert = process_text( \$heading ); - print $fh "<h$level><a name=\"$name\">$convert</a></h$level>\n"; -} - - -# -# emit_item_tag - print an =item's text -# Note: The global $EmittedItem is used for inhibiting self-references. -# -my $EmittedItem; - -sub emit_item_tag { - my( $fh, $otext, $text, $compact ) = @_; - my $item = fragment_id( depod($text) , -generate); - Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile") - if !defined $item; - $EmittedItem = $item; - ### print STDERR "emit_item_tag=$item ($text)\n"; - - print $fh '<strong>'; - if ($Items_Named{$item}++) { - print $fh process_text( \$otext ); - } else { - my $name = $item; - $name = anchorify($name); - print $fh qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>'; - } - print $fh "</strong>"; - undef( $EmittedItem ); -} - -sub new_listitem { - my ($fh, $tag) = @_; - # Open tag for definition list as we have something to put in it - if( ($tag ne 'dl') && ($ListNewTerm) ){ - print $fh "<dd>\n"; - $ListNewTerm = 0; - } - - if( $Items_Seen[$Listlevel]++ == 0 ){ - # start of new list - push( @Listtype, "$tag" ); - print $fh "<$tag>\n"; - } else { - # if this is not the first item, close the previous one - if ( $tag eq 'dl' ){ - print $fh "</dd>\n" unless $ListNewTerm; - } else { - print $fh "</li>\n"; - } - } - my $opentag = $tag eq 'dl' ? 'dt' : 'li'; - print $fh "<$opentag>"; -} - -# -# process_item - convert a pod item tag and convert it to HTML format. -# -sub process_item { - my ($fh, $otext) = @_; - - # lots of documents start a list without doing an =over. this is - # bad! but, the proper thing to do seems to be to just assume - # they did do an =over. so warn them once and then continue. - if( $Listlevel == 0 ){ - warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph. ignoring.\n" unless $Quiet; - process_over(); - } - - # remove formatting instructions from the text - my $text = depod( $otext ); - - # all the list variants: - if( $text =~ /\A\*/ ){ # bullet - new_listitem( $fh, 'ul' ); - if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text - my $tag = $1; - $otext =~ s/\A\*\s+//; - emit_item_tag( $fh, $otext, $tag, 1 ); - print $fh "\n"; - } - - } elsif( $text =~ /\A\d+/ ){ # numbered list - new_listitem( $fh, 'ol' ); - if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text - my $tag = $1; - $otext =~ s/\A\d+\.?\s*//; - emit_item_tag( $fh, $otext, $tag, 1 ); - print $fh "\n"; - } - - } else { # definition list - # new_listitem takes care of opening the <dt> tag - new_listitem( $fh, 'dl' ); - if ($text =~ /\A(.+)\Z/s ){ # should have text - emit_item_tag( $fh, $otext, $text, 1 ); - # write the definition term and close <dt> tag - print $fh "</dt>\n"; - } - # trigger opening a <dd> tag for the actual definition; will not - # happen if next paragraph is also a definition term (=item) - $ListNewTerm = 1; - } - print $fh "\n"; -} - -# -# process_over - process a pod over tag and start a corresponding HTML list. -# -sub process_over { - # start a new list - $Listlevel++; - push( @Items_Seen, 0 ); -} - -# -# process_back - process a pod back tag and convert it to HTML format. -# -sub process_back { - my $fh = shift; - if( $Listlevel == 0 ){ - warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph. ignoring.\n" unless $Quiet; - return; - } - - # close off the list. note, I check to see if $Listtype[$Listlevel] is - # defined because an =item directive may have never appeared and thus - # $Listtype[$Listlevel] may have never been initialized. - $Listlevel--; - if( defined $Listtype[$Listlevel] ){ - if ( $Listtype[$Listlevel] eq 'dl' ){ - print $fh "</dd>\n" unless $ListNewTerm; - } else { - print $fh "</li>\n"; - } - print $fh "</$Listtype[$Listlevel]>\n"; - pop( @Listtype ); - $ListNewTerm = 0; - } - - # clean up item count - pop( @Items_Seen ); -} - -# -# process_cut - process a pod cut tag, thus start ignoring pod directives. -# -sub process_cut { - $Ignore = 1; -} - -# -# process_pod - process a pod tag, thus stop ignoring pod directives -# until we see a corresponding cut. -# -sub process_pod { - # no need to set $Ignore to 0 cause the main loop did it -} - -# -# process_for - process a =for pod tag. if it's for html, spit -# it out verbatim, if illustration, center it, otherwise ignore it. -# -sub process_for { - my ($fh, $whom, $text) = @_; - if ( $whom =~ /^(pod2)?html$/i) { - print $fh $text; - } elsif ($whom =~ /^illustration$/i) { - 1 while chomp $text; - for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) { - $text .= $ext, last if -r "$text$ext"; - } - print $fh qq{<p align="center"><img src="$text" alt="$text illustration" /></p>}; - } -} - -# -# process_begin - process a =begin pod tag. this pushes -# whom we're beginning on the begin stack. if there's a -# begin stack, we only print if it us. -# -sub process_begin { - my ($fh, $whom, $text) = @_; - $whom = lc($whom); - push (@Begin_Stack, $whom); - if ( $whom =~ /^(pod2)?html$/) { - print $fh $text if $text; - } -} - -# -# process_end - process a =end pod tag. pop the -# begin stack. die if we're mismatched. -# -sub process_end { - my($whom, $text) = @_; - $whom = lc($whom); - if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) { - Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n") - } - pop( @Begin_Stack ); -} - -# -# process_pre - indented paragraph, made into <pre></pre> -# -sub process_pre { - my( $text ) = @_; - my( $rest ); - return if $Ignore; - - $rest = $$text; - - # insert spaces in place of tabs - $rest =~ s#(.+)# - my $line = $1; - 1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e; - $line; - #eg; - - # convert some special chars to HTML escapes - $rest = html_escape($rest); - - # try and create links for all occurrences of perl.* within - # the preformatted text. - $rest =~ s{ - (\s*)(perl\w+) - }{ - if ( defined $Pages{$2} ){ # is a link - qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>); - } elsif (defined $Pages{dosify($2)}) { # is a link - qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>); - } else { - "$1$2"; - } - }xeg; - $rest =~ s{ - (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)? - }{ - my $url ; - if ( $Htmlfileurl ne '' ){ - # Here, we take advantage of the knowledge - # that $Htmlfileurl ne '' implies $Htmlroot eq ''. - # Since $Htmlroot eq '', we need to prepend $Htmldir - # on the fron of the link to get the absolute path - # of the link's target. We check for a leading '/' - # to avoid corrupting links that are #, file:, etc. - my $old_url = $3 ; - $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/}; - $url = relativize_url( "$old_url.html", $Htmlfileurl ); - } else { - $url = "$3.html" ; - } - "$1$url" ; - }xeg; - - # Look for embedded URLs and make them into links. We don't - # relativize them since they are best left as the author intended. - - my $urls = '(' . join ('|', qw{ - http - telnet - mailto - news - gopher - file - wais - ftp - } ) - . ')'; - - my $ltrs = '\w'; - my $gunk = '/#~:.?+=&%@!\-'; - my $punc = '.:!?\-;'; - my $any = "${ltrs}${gunk}${punc}"; - - $rest =~ s{ - \b # start at word boundary - ( # begin $1 { - $urls : # need resource and a colon - (?!:) # Ignore File::, among others. - [$any] +? # followed by one or more of any valid - # character, but be conservative and - # take only what you need to.... - ) # end $1 } - (?= - " > # maybe pre-quoted '<a href="...">' - | # or: - [$punc]* # 0 or more punctuation - (?: # followed - [^$any] # by a non-url char - | # or - $ # end of the string - ) # - | # or else - $ # then end of the string - ) - }{<a href="$1">$1</a>}igox; - - # text should be as it is (verbatim) - $$text = $rest; -} - - -# -# pure text processing -# -# pure_text/inIS_text: differ with respect to automatic C<> recognition. -# we don't want this to happen within IS -# -sub pure_text($){ - my $text = shift(); - process_puretext( $text, 1 ); -} - -sub inIS_text($){ - my $text = shift(); - process_puretext( $text, 0 ); -} - -# -# process_puretext - process pure text (without pod-escapes) converting -# double-quotes and handling implicit C<> links. -# -sub process_puretext { - my($text, $notinIS) = @_; - - ## Guessing at func() or [\$\@%&]*var references in plain text is destined - ## to produce some strange looking ref's. uncomment to disable: - ## $notinIS = 0; - - my(@words, $lead, $trail); - - # keep track of leading and trailing white-space - $lead = ($text =~ s/\A(\s+)//s ? $1 : ""); - $trail = ($text =~ s/(\s+)\Z//s ? $1 : ""); - - # split at space/non-space boundaries - @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text ); - - # process each word individually - foreach my $word (@words) { - # skip space runs - next if $word =~ /^\s*$/; - # see if we can infer a link or a function call - # - # NOTE: This is a word based search, it won't automatically - # mark "substr($var, 1, 2)" because the 1st word would be "substr($var" - # User has to enclose those with proper C<> - - if( $notinIS && $word =~ - m/ - ^([a-z_]{2,}) # The function name - \( - ([0-9][a-z]* # Manual page(1) or page(1M) - |[^)]*[\$\@\%][^)]+ # ($foo), (1, @foo), (%hash) - | # () - ) - \) - ([.,;]?)$ # a possible punctuation follows - /xi - ) { - # has parenthesis so should have been a C<> ref - ## try for a pagename (perlXXX(1))? - my( $func, $args, $rest ) = ( $1, $2, $3 || '' ); - if( $args =~ /^\d+$/ ){ - my $url = page_sect( $word, '' ); - if( defined $url ){ - $word = qq(<a href="$url" class="man">the $word manpage</a>$rest); - next; - } - } - ## try function name for a link, append tt'ed argument list - $word = emit_C( $func, '', "($args)") . $rest; - -#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing. -## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) { -## # perl variables, should be a C<> ref -## $word = emit_C( $word ); - - } elsif ($word =~ m,^\w+://\w,) { - # looks like a URL - # Don't relativize it: leave it as the author intended - $word = qq(<a href="$word">$word</a>); - } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) { - # looks like an e-mail address - my ($w1, $w2, $w3) = ("", $word, ""); - ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/; - ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/; - $word = qq($w1<a href="mailto:$w2">$w2</a>$w3); - } else { - $word = html_escape($word) if $word =~ /["&<>]/; - } - } - - # put everything back together - return $lead . join( '', @words ) . $trail; -} - - -# -# process_text - handles plaintext that appears in the input pod file. -# there may be pod commands embedded within the text so those must be -# converted to html commands. -# - -sub process_text1($$;$$); -sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' } -sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 } - -sub process_text { - return if $Ignore; - my( $tref ) = @_; - my $res = process_text1( 0, $tref ); - $res =~ s/\s+$//s; - $$tref = $res; -} - -sub process_text_rfc_links { - my $text = shift; - - # For every "RFCnnnn" or "RFC nnn", link it to the authoritative - # ource. Do not use the /i modifier here. Require "RFC" to be written in - # in capital letters. - - $text =~ s{ - (?<=[^<>[:alpha:]]) # Make sure this is not an URL already - (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits - } - {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx; - - $text; -} - -sub process_text1($$;$$){ - my( $lev, $rstr, $func, $closing ) = @_; - my $res = ''; - - unless (defined $func) { - $func = ''; - $lev++; - } - - if( $func eq 'B' ){ - # B<text> - boldface - $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>'; - - } elsif( $func eq 'C' ){ - # C<code> - can be a ref or <code></code> - # need to extract text - my $par = go_ahead( $rstr, 'C', $closing ); - - ## clean-up of the link target - my $text = depod( $par ); - - ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ; - ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n"; - - $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) ); - - } elsif( $func eq 'E' ){ - # E<x> - convert to character - $$rstr =~ s/^([^>]*)>//; - my $escape = $1; - $escape =~ s/^0?x([\dA-F]+)$/#x$1/i - or $escape =~ s/^0([0-7]+)$/'#'.oct($1)/ei - or $escape =~ s/^(\d+)$/#$1/; - $res = "&$escape;"; - - } elsif( $func eq 'F' ){ - # F<filename> - italicize - $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>'; - - } elsif( $func eq 'I' ){ - # I<text> - italicize - $res = '<em>' . process_text1( $lev, $rstr ) . '</em>'; - - } elsif( $func eq 'L' ){ - # L<link> - link - ## L<text|cross-ref> => produce text, use cross-ref for linking - ## L<cross-ref> => make text from cross-ref - ## need to extract text - my $par = go_ahead( $rstr, 'L', $closing ); - - # some L<>'s that shouldn't be: - # a) full-blown URL's are emitted as-is - if( $par =~ m{^\w+://}s ){ - return make_URL_href( $par ); - } - # b) C<...> is stripped and treated as C<> - if( $par =~ /^C<(.*)>$/ ){ - my $text = depod( $1 ); - return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) ); - } - - # analyze the contents - $par =~ s/\n/ /g; # undo word-wrapped tags - my $opar = $par; - my $linktext; - if( $par =~ s{^([^|]+)\|}{} ){ - $linktext = $1; - } - - # make sure sections start with a / - $par =~ s{^"}{/"}; - - my( $page, $section, $ident ); - - # check for link patterns - if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident - # we've got a name/ident (no quotes) - if (length $2) { - ( $page, $ident ) = ( $1, $2 ); - } else { - ( $page, $section ) = ( $1, $2 ); - } - ### print STDERR "--> L<$par> to page $page, ident $ident\n"; - - } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section" - # even though this should be a "section", we go for ident first - ( $page, $ident ) = ( $1, $2 ); - ### print STDERR "--> L<$par> to page $page, section $section\n"; - - } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes - ( $page, $section ) = ( '', $par ); - ### print STDERR "--> L<$par> to void page, section $section\n"; - - } else { - ( $page, $section ) = ( $par, '' ); - ### print STDERR "--> L<$par> to page $par, void section\n"; - } - - # now, either $section or $ident is defined. the convoluted logic - # below tries to resolve L<> according to what the user specified. - # failing this, we try to find the next best thing... - my( $url, $ltext, $fid ); - - RESOLVE: { - if( defined $ident ){ - ## try to resolve $ident as an item - ( $url, $fid ) = coderef( $page, $ident ); - if( $url ){ - if( ! defined( $linktext ) ){ - $linktext = $ident; - $linktext .= " in " if $ident && $page; - $linktext .= "the $page manpage" if $page; - } - ### print STDERR "got coderef url=$url\n"; - last RESOLVE; - } - ## no luck: go for a section (auto-quoting!) - $section = $ident; - } - ## now go for a section - my $htmlsection = htmlify( $section ); - $url = page_sect( $page, $htmlsection ); - if( $url ){ - if( ! defined( $linktext ) ){ - $linktext = $section; - $linktext .= " in " if $section && $page; - $linktext .= "the $page manpage" if $page; - } - ### print STDERR "got page/section url=$url\n"; - last RESOLVE; - } - ## no luck: go for an ident - if( $section ){ - $ident = $section; - } else { - $ident = $page; - $page = undef(); - } - ( $url, $fid ) = coderef( $page, $ident ); - if( $url ){ - if( ! defined( $linktext ) ){ - $linktext = $ident; - $linktext .= " in " if $ident && $page; - $linktext .= "the $page manpage" if $page; - } - ### print STDERR "got section=>coderef url=$url\n"; - last RESOLVE; - } - - # warning; show some text. - $linktext = $opar unless defined $linktext; - warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet; - } - - # now we have a URL or just plain code - $$rstr = $linktext . '>' . $$rstr; - if( defined( $url ) ){ - $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>'; - } else { - $res = '<em>' . process_text1( $lev, $rstr ) . '</em>'; - } - - } elsif( $func eq 'S' ){ - # S<text> - non-breaking spaces - $res = process_text1( $lev, $rstr ); - $res =~ s/ / /g; - - } elsif( $func eq 'X' ){ - # X<> - ignore - warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n" - unless $$rstr =~ s/^[^>]*>// or $Quiet; - } elsif( $func eq 'Z' ){ - # Z<> - empty - warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n" - unless $$rstr =~ s/^>// or $Quiet; - - } else { - my $term = pattern $closing; - while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){ - # all others: either recurse into new function or - # terminate at closing angle bracket(s) - my $pt = $1; - $pt .= $2 if !$3 && $lev == 1; - $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt ); - return $res if !$3 && $lev > 1; - if( $3 ){ - $res .= process_text1( $lev, $rstr, $3, closing $4 ); - } - } - if( $lev == 1 ){ - $res .= pure_text( $$rstr ); - } elsif( ! $Quiet ) { - my $snippet = substr($$rstr,0,60); - warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n" - - } - $res = process_text_rfc_links($res); - } - return $res; -} - -# -# go_ahead: extract text of an IS (can be nested) -# -sub go_ahead($$$){ - my( $rstr, $func, $closing ) = @_; - my $res = ''; - my @closing = ($closing); - while( $$rstr =~ - s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){ - $res .= $1; - unless( $3 ){ - shift @closing; - return $res unless @closing; - } else { - unshift @closing, closing $4; - } - $res .= $2; - } - unless ($Quiet) { - my $snippet = substr($$rstr,0,60); - warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n" - } - return $res; -} - -# -# emit_C - output result of C<text> -# $text is the depod-ed text -# -sub emit_C($;$$){ - my( $text, $nocode, $args ) = @_; - $args = '' unless defined $args; - my $res; - my( $url, $fid ) = coderef( undef(), $text ); - - # need HTML-safe text - my $linktext = html_escape( "$text$args" ); - - if( defined( $url ) && - (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){ - $res = "<a href=\"$url\"><code>$linktext</code></a>"; - } elsif( 0 && $nocode ){ - $res = $linktext; - } else { - $res = "<code>$linktext</code>"; - } - return $res; + $Backlink = $opt_backlink if defined $opt_backlink; + $Css = $opt_css if defined $opt_css; + $Header = $opt_header if defined $opt_header; + $Htmldir = $opt_htmldir if defined $opt_htmldir; + $Htmlroot = $opt_htmlroot if defined $opt_htmlroot; + $Doindex = $opt_index if defined $opt_index; + $Podfile = $opt_infile if defined $opt_infile; + $Htmlfile = $opt_outfile if defined $opt_outfile; + $Poderrors = $opt_poderrors if defined $opt_poderrors; + $Podroot = $opt_podroot if defined $opt_podroot; + $Quiet = $opt_quiet if defined $opt_quiet; + $Recurse = $opt_recurse if defined $opt_recurse; + $Title = $opt_title if defined $opt_title; + $Verbose = $opt_verbose if defined $opt_verbose; } # @@ -1820,240 +483,6 @@ sub html_escape { return $rest; } - -# -# dosify - convert filenames to 8.3 -# -sub dosify { - my($str) = @_; - return lc($str) if $^O eq 'VMS'; # VMS just needs casing - if ($Is83) { - $str = lc $str; - $str =~ s/(\.\w+)/substr ($1,0,4)/ge; - $str =~ s/(\w+)/substr ($1,0,8)/ge; - } - return $str; -} - -# -# page_sect - make a URL from the text of a L<> -# -sub page_sect($$) { - my( $page, $section ) = @_; - my( $linktext, $page83, $link); # work strings - - # check if we know that this is a section in this page - if (!defined $Pages{$page} && defined $Sections{$page}) { - $section = $page; - $page = ""; - ### print STDERR "reset page='', section=$section\n"; - } - - $page83=dosify($page); - $page=$page83 if (defined $Pages{$page83}); - if ($page eq "") { - $link = "#" . anchorify( $section ); - } elsif ( $page =~ /::/ ) { - $page =~ s,::,/,g; - # Search page cache for an entry keyed under the html page name, - # then look to see what directory that page might be in. NOTE: - # this will only find one page. A better solution might be to produce - # an intermediate page that is an index to all such pages. - my $page_name = $page ; - $page_name =~ s,^.*/,,s ; - if ( defined( $Pages{ $page_name } ) && - $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/ - ) { - $page = $1 ; - } - else { - # NOTE: This branch assumes that all A::B pages are located in - # $Htmlroot/A/B.html . This is often incorrect, since they are - # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could - # analyze the contents of %Pages and figure out where any - # cousins of A::B are, then assume that. So, if A::B isn't found, - # but A::C is found in lib/A/C.pm, then A::B is assumed to be in - # lib/A/B.pm. This is also limited, but it's an improvement. - # Maybe a hints file so that the links point to the correct places - # nonetheless? - - } - $link = "$Htmlroot/$page.html"; - $link .= "#" . anchorify( $section ) if ($section); - } elsif (!defined $Pages{$page}) { - $link = ""; - } else { - $section = anchorify( $section ) if $section ne ""; - ### print STDERR "...section=$section\n"; - - # if there is a directory by the name of the page, then assume that an - # appropriate section will exist in the subdirectory -# if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { - if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { - $link = "$Htmlroot/$1/$section.html"; - ### print STDERR "...link=$link\n"; - - # since there is no directory by the name of the page, the section will - # have to exist within a .html of the same name. thus, make sure there - # is a .pod or .pm that might become that .html - } else { - $section = "#$section" if $section; - ### print STDERR "...section=$section\n"; - - # check if there is a .pod with the page name. - # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm) - if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) { - $link = "$Htmlroot/$1.html$section"; - } else { - $link = ""; - } - } - } - - if ($link) { - # Here, we take advantage of the knowledge that $Htmlfileurl ne '' - # implies $Htmlroot eq ''. This means that the link in question - # needs a prefix of $Htmldir if it begins with '/'. The test for - # the initial '/' is done to avoid '#'-only links, and to allow - # for other kinds of links, like file:, ftp:, etc. - my $url ; - if ( $Htmlfileurl ne '' ) { - $link = "$Htmldir$link" if $link =~ m{^/}s; - $url = relativize_url( $link, $Htmlfileurl ); -# print( " b: [$link,$Htmlfileurl,$url]\n" ); - } - else { - $url = $link ; - } - return $url; - - } else { - return undef(); - } -} - -# -# relativize_url - convert an absolute URL to one relative to a base URL. -# Assumes both end in a filename. -# -sub relativize_url { - my ($dest,$source) = @_ ; - - my ($dest_volume,$dest_directory,$dest_file) = - File::Spec::Unix->splitpath( $dest ) ; - $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ; - - my ($source_volume,$source_directory,$source_file) = - File::Spec::Unix->splitpath( $source ) ; - $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ; - - my $rel_path = '' ; - if ( $dest ne '' ) { - $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ; - } - - if ( $rel_path ne '' && - substr( $rel_path, -1 ) ne '/' && - substr( $dest_file, 0, 1 ) ne '#' - ) { - $rel_path .= "/$dest_file" ; - } - else { - $rel_path .= "$dest_file" ; - } - - return $rel_path ; -} - - -# -# coderef - make URL from the text of a C<> -# -sub coderef($$){ - my( $page, $item ) = @_; - my( $url ); - - my $fid = fragment_id( $item ); - - if( defined( $page ) && $page ne "" ){ - # we have been given a $page... - $page =~ s{::}{/}g; - - Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile") - if !defined $fid; - # Do we take it? Item could be a section! - my $base = $Items{$fid} || ""; - $base =~ s{[^/]*/}{}; - if( $base ne "$page.html" ){ - ### print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n"; - $page = undef(); - } - - } else { - # no page - local items precede cached items - if( defined( $fid ) ){ - if( exists $Local_Items{$fid} ){ - $page = $Local_Items{$fid}; - } else { - $page = $Items{$fid}; - } - } - } - - # if there was a pod file that we found earlier with an appropriate - # =item directive, then create a link to that page. - if( defined $page ){ - if( $page ){ - if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){ - $page = $1 . '.html'; - } - my $link = "$Htmlroot/$page#" . anchorify($fid); - - # Here, we take advantage of the knowledge that $Htmlfileurl - # ne '' implies $Htmlroot eq ''. - if ( $Htmlfileurl ne '' ) { - $link = "$Htmldir$link" ; - $url = relativize_url( $link, $Htmlfileurl ) ; - } else { - $url = $link ; - } - } else { - $url = "#" . anchorify($fid); - } - - confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/; - } - return( $url, $fid ); -} - - - -# -# Adapted from Nick Ing-Simmons' PodToHtml package. -sub relative_url { - my $source_file = shift ; - my $destination_file = shift; - - my $source = URI::file->new_abs($source_file); - my $uo = URI::file->new($destination_file,$source)->abs; - return $uo->rel->as_string; -} - - -# -# finish_list - finish off any pending HTML lists. this should be called -# after the entire pod file has been read and converted. -# -sub finish_list { - my $fh = shift; - if( $Listlevel ){ - warn "$0: $Podfile: unterminated list(s) at =head in paragraph $Paragraph. ignoring.\n" unless $Quiet; - while( $Listlevel ){ - process_back( $fh ); - } - } -} - # # htmlify - converts a pod section specification to a suitable section # specification for HTML. Note that we keep spaces and special characters @@ -2082,166 +511,153 @@ sub anchorify { } # -# depod - convert text by eliminating all interior sequences -# Note: can be called with copy or modify semantics +# store POD files in %Pages # -my %E2c; -$E2c{lt} = '<'; -$E2c{gt} = '>'; -$E2c{sol} = '/'; -$E2c{verbar} = '|'; -$E2c{amp} = '&'; # in Tk's pods - -sub depod1($;$$); - -sub depod($){ - my $string; - if( ref( $_[0] ) ){ - $string = ${$_[0]}; - ${$_[0]} = depod1( \$string ); - } else { - $string = $_[0]; - depod1( \$string ); - } -} +sub _save_page { + my ($modspec, $modname) = @_; -sub depod1($;$$){ - my( $rstr, $func, $closing ) = @_; - my $res = ''; - return $res unless defined $$rstr; - if( ! defined( $func ) ){ - # skip to next begin of an interior sequence - while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){ - # recurse into its text - $res .= $1 . depod1( $rstr, $2, closing $3); - } - $res .= $$rstr; - } elsif( $func eq 'E' ){ - # E<x> - convert to character - $$rstr =~ s/^([^>]*)>//; - $res .= $E2c{$1} || ""; - } elsif( $func eq 'X' ){ - # X<> - ignore - $$rstr =~ s/^[^>]*>//; - } elsif( $func eq 'Z' ){ - # Z<> - empty - $$rstr =~ s/^>//; - } else { - # all others: either recurse into new function or - # terminate at closing angle bracket - my $term = pattern $closing; - while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){ - $res .= $1; - last unless $3; - $res .= depod1( $rstr, $3, closing $4 ); - } - ## If we're here and $2 ne '>': undelimited interior sequence. - ## Ignored, as this is called without proper indication of where we are. - ## Rely on process_text to produce diagnostics. - } - return $res; -} + # Remove Podroot from path + foreach my $podpath (@Podpath) { + my $beg_path = File::Spec->catdir($Podroot, $podpath); + if ($beg_path eq substr($modspec, 0, length($beg_path))) { + # Replace $Podroot/$podpath with $podpath + substr($modspec, 0, length($beg_path), $podpath); + last; + } + } -{ - my %seen; # static fragment record hash + # Convert path to unix style path + $modspec = Unixify::unixify($modspec); -sub fragment_id_readable { - my $text = shift; - my $generate = shift; # optional flag + my ($file, $dir) = fileparse($modspec, qr/\.[^.]*/); # strip .ext + $Pages{$modname} = $dir.$file; +} - my $orig = $text; +1; - # leave the words for the fragment identifier, - # change everything else to underbars. - $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency. - $text =~ s/_{2,}/_/g; - $text =~ s/\A_//; - $text =~ s/_\Z//; +package Pod::Simple::XHTML::LocalPodLinks; +use strict; +use warnings; +use base 'Pod::Simple::XHTML'; - unless ($text) - { - # Nothing left after removing punctuation, so leave it as is - # E.g. if option is named: "=item -#" +use File::Spec; +use File::Spec::Unix; - $text = $orig; - } +__PACKAGE__->_accessorize( + 'htmldir', + 'htmlfileurl', + 'htmlroot', + 'pages', # Page name => relative/path/to/page from root POD dir + 'quiet', + 'verbose', +); + +sub resolve_pod_page_link { + my ($self, $to, $section) = @_; + + return undef unless defined $to || defined $section; + if (defined $section) { + $section = '#' . $self->idify($section, 1); + return $section unless defined $to; + } else { + $section = ''; + } + + my $path; # path to $to according to %Pages + unless (exists $self->pages->{$to}) { + # Try to find a POD that ends with $to and use that. + # e.g., given L<XHTML>, if there is no $Podpath/XHTML in %Pages, + # look for $Podpath/*/XHTML in %Pages, with * being any path, + # as a substitute (e.g., $Podpath/Pod/Simple/XHTML) + my @matches; + foreach my $modname (keys %{$self->pages}) { + push @matches, $modname if $modname =~ /::$to\z/; + } - if ($generate) { - if ( exists $seen{$text} ) { - # This already exists, make it unique - $seen{$text}++; - $text = $text . $seen{$text}; + if ($#matches == -1) { + warn "Cannot find \"$to\" in podpath: " . + "cannot find suitable replacement path, cannot resolve link\n" + unless $self->quiet; + return ''; + } elsif ($#matches == 0) { + warn "Cannot find \"$to\" in podpath: " . + "using $matches[0] as replacement path to $to\n" + unless $self->quiet; + $path = $self->pages->{$matches[0]}; } else { - $seen{$text} = 1; # first time seen this fragment + warn "Cannot find \"$to\" in podpath: " . + "more than one possible replacement path to $to, " . + "using $matches[-1]\n" unless $self->quiet; + # Use [-1] so newer (higher numbered) perl PODs are used + $path = $self->pages->{$matches[-1]}; } + } else { + $path = $self->pages->{$to}; } - $text; -}} - -my @HC; -sub fragment_id_obfuscated { # This was the old "_2d_2d__" - my $text = shift; - my $generate = shift; # optional flag - - # text? Normalize by obfuscating the fragment id to make it unique - $text =~ s/\s+/_/sg; - - $text =~ s{(\W)}{ - defined( $HC[ord($1)] ) ? $HC[ord($1)] - : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe; - $text = substr( $text, 0, 50 ); + # The use of catdir here (instead of catfile) ensures there will be one + # '/' between htmlroot and $path; not zero (if htmlroot == ''), not two + # (if htmlroot =~ m#/\z# and $path =~ m#\a/#), just one. + my $url = File::Spec::Unix->catdir( Unixify::unixify($self->htmlroot), + $path); + if ($self->htmlfileurl ne '') { + # then $self->htmlroot eq '' (by definition of htmlfileurl) so + # $self->htmldir needs to be prepended to link to get the absolute path + # that will be relativized + $url = relativize_url( + File::Spec::Unix->catdir( Unixify::unixify($self->htmldir), $url), + $self->htmlfileurl # already unixified + ); + } - $text; + return $url . ".html$section"; } # -# fragment_id - construct a fragment identifier from: -# a) =item text -# b) contents of C<...> +# relativize_url - convert an absolute URL to one relative to a base URL. +# Assumes both end in a filename. # +sub relativize_url { + my ($dest, $source) = @_; -sub fragment_id { - my $text = shift; - my $generate = shift; # optional flag - - $text =~ s/\s+\Z//s; - if( $text ){ - # a method or function? - return $1 if $text =~ /(\w+)\s*\(/; - return $1 if $text =~ /->\s*(\w+)\s*\(?/; - - # a variable name? - return $1 if $text =~ /^([\$\@%*]\S+)/; - - # some pattern matching operator? - return $1 if $text =~ m|^(\w+/).*/\w*$|; + # Remove each file from its path + my ($dest_volume, $dest_directory, $dest_file) = + File::Spec::Unix->splitpath( $dest ); + $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ); - # fancy stuff... like "do { }" - return $1 if $text =~ m|^(\w+)\s*{.*}$|; + my ($source_volume, $source_directory, $source_file) = + File::Spec::Unix->splitpath( $source ); + $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ); - # honour the perlfunc manpage: func [PAR[,[ ]PAR]...] - # and some funnies with ... Module ... - return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$}; - return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$}; + my $rel_path = ''; + if ($dest ne '') { + $rel_path = File::Spec::Unix->abs2rel( $dest, $source ); + } - return fragment_id_readable($text, $generate); + if ($rel_path ne '' && substr( $rel_path, -1 ) ne '/') { + $rel_path .= "/$dest_file"; } else { - return; + $rel_path .= "$dest_file"; } + + return $rel_path; } -# -# make_URL_href - generate HTML href from URL -# Special treatment for CGI queries. -# -sub make_URL_href($){ - my( $url ) = @_; - if( $url !~ - s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){ - $url = "<a href=\"$url\">$url</a>"; - } - return $url; +1; + +package Unixify; +use warnings; +use strict; + +use File::Spec; +use File::Spec::Unix; + +sub unixify { + my $full_path = shift; + return '' unless $full_path; + + return File::Spec::Unix->catfile( # change \s to /s and such + File::Spec->splitdir($full_path)); } 1; diff --git a/ext/Pod-Html/t/crossref.pod b/ext/Pod-Html/t/crossref.pod new file mode 100644 index 0000000000..f6dddf1f58 --- /dev/null +++ b/ext/Pod-Html/t/crossref.pod @@ -0,0 +1,41 @@ +=head1 NAME + +htmlcrossref - Test HTML cross reference links + +=head1 LINKS + +L</"section1"> + +L<htmllink/section 2> + +L</"item1"> + +L</"non existant section"> + +L<var-copy> + +L<var-copy/$"> + +C<var-copy> + +C<var-copy/$"> + +L<podspec-copy/First:> + +C<podspec-copy/First:> + +L<notperldoc> + +=head1 TARGETS + +=head2 section1 + +This is section one. + +=over 4 + +=item item1 X<item> X<one> + +This is item one. + +=back diff --git a/ext/Pod-Html/t/crossref.t b/ext/Pod-Html/t/crossref.t new file mode 100644 index 0000000000..ec178e0024 --- /dev/null +++ b/ext/Pod-Html/t/crossref.t @@ -0,0 +1,104 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +END { + rem_test_dir(); +} + +use strict; +use Cwd; +use File::Spec; +use File::Spec::Functions; +use Test::More tests => 1; + +SKIP: { + my $output = make_test_dir(); + skip "$output", 1 if $output; + + my ($v, $d) = splitpath(cwd(), 1); + my $relcwd = substr($d, length(File::Spec->rootdir())); + + convert_n_test("crossref", "cross references", + "--podpath=". catdir($relcwd, 't') . ":" . catdir($relcwd, 'testdir/test.lib'), + "--podroot=$v". File::Spec->rootdir, + "--quiet", + ); +} + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> + <li><a href="#TARGETS">TARGETS</a> + <ul> + <li><a href="#section1">section1</a></li> + </ul> + </li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>htmlcrossref - Test HTML cross reference links</p> + +<h1 id="LINKS">LINKS</h1> + +<p><a href="#section1">"section1"</a></p> + +<p><a href="/[RELCURRENTWORKINGDIRECTORY]/t/htmllink.html#section-2">"section 2" in htmllink</a></p> + +<p><a href="#item1">"item1"</a></p> + +<p><a href="#non-existant-section">"non existant section"</a></p> + +<p><a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html">var-copy</a></p> + +<p><a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html#pod-">"$"" in var-copy</a></p> + +<p><code>var-copy</code></p> + +<p><code>var-copy/$"</code></p> + +<p><a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/podspec-copy.html#First:">"First:" in podspec-copy</a></p> + +<p><code>podspec-copy/First:</code></p> + +<p><a>notperldoc</a></p> + +<h1 id="TARGETS">TARGETS</h1> + +<h2 id="section1">section1</h2> + +<p>This is section one.</p> + +<dl> + +<dt id="item1">item1 </dt> +<dd> + +<p>This is item one.</p> + +</dd> +</dl> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/feature.pod b/ext/Pod-Html/t/feature.pod new file mode 100644 index 0000000000..4de4fb10cb --- /dev/null +++ b/ext/Pod-Html/t/feature.pod @@ -0,0 +1,21 @@ +=head1 Head 1 + +A paragraph + +=for html some html + +=begin image + +|--| +| | +|--| + +=end image + +Another paragraph + +=head1 Another Head 1 + +some text and a link L<crossref> + +=cut diff --git a/ext/Pod-Html/t/feature.t b/ext/Pod-Html/t/feature.t new file mode 100644 index 0000000000..5f27454327 --- /dev/null +++ b/ext/Pod-Html/t/feature.t @@ -0,0 +1,70 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +use strict; +use Cwd; +use File::Spec::Functions; +use Test::More tests => 1; + +my $cwd = cwd(); + +convert_n_test("feature", "misc pod-html features", + "--backlink", + "--css=style.css", + "--header", # no styling b/c of --ccs + "--htmldir=". catdir($cwd, 't'), + "--noindex", + "--podpath=t", + "--podroot=$cwd", + "--title=a title", + + ); + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title>a title</title> +<link rel="stylesheet" href="style.css" type="text/css" /> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body id="_podtop_"> +<table border="0" width="100%" cellspacing="0" cellpadding="3"> +<tr><td class="_podblock_" valign="middle"> +<big><strong><span class="_podblock_"> a title</span></strong></big> +</td></tr> +</table> + + + +<a href="#_podtop_"><h1 id="Head-1">Head 1</h1></a> + +<p>A paragraph</p> + + + +some html + +<p>Another paragraph</p> + +<a href="#_podtop_"><h1 id="Another-Head-1">Another Head 1</h1></a> + +<p>some text and a link <a href="t/crossref.html">crossref</a></p> + +<table border="0" width="100%" cellspacing="0" cellpadding="3"> +<tr><td class="_podblock_" valign="middle"> +<big><strong><span class="_podblock_"> a title</span></strong></big> +</td></tr> +</table> + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/feature2.pod b/ext/Pod-Html/t/feature2.pod new file mode 100644 index 0000000000..4de4fb10cb --- /dev/null +++ b/ext/Pod-Html/t/feature2.pod @@ -0,0 +1,21 @@ +=head1 Head 1 + +A paragraph + +=for html some html + +=begin image + +|--| +| | +|--| + +=end image + +Another paragraph + +=head1 Another Head 1 + +some text and a link L<crossref> + +=cut diff --git a/ext/Pod-Html/t/feature2.t b/ext/Pod-Html/t/feature2.t new file mode 100644 index 0000000000..feeb84f910 --- /dev/null +++ b/ext/Pod-Html/t/feature2.t @@ -0,0 +1,71 @@ +#!/usr/bin/perl -w # -*- perl -*- + + +BEGIN { + require "t/pod2html-lib.pl"; +} + +use strict; +use Cwd; +use Test::More tests => 1; + +my $cwd = cwd(); + +convert_n_test("feature2", "misc pod-html features 2", + "--backlink", + "--header", + "--podpath=.", + "--podroot=$cwd", + "--norecurse", + "--verbose", + ); + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body id="_podtop_" style="background-color: white"> +<table border="0" width="100%" cellspacing="0" cellpadding="3"> +<tr><td class="_podblock_" style="background-color: #cccccc" valign="middle"> +<big><strong><span class="_podblock_"> </span></strong></big> +</td></tr> +</table> + + + +<ul id="index"> + <li><a href="#Head-1">Head 1</a></li> + <li><a href="#Another-Head-1">Another Head 1</a></li> +</ul> + +<a href="#_podtop_"><h1 id="Head-1">Head 1</h1></a> + +<p>A paragraph</p> + + + +some html + +<p>Another paragraph</p> + +<a href="#_podtop_"><h1 id="Another-Head-1">Another Head 1</h1></a> + +<p>some text and a link <a>crossref</a></p> + +<table border="0" width="100%" cellspacing="0" cellpadding="3"> +<tr><td class="_podblock_" style="background-color: #cccccc" valign="middle"> +<big><strong><span class="_podblock_"> </span></strong></big> +</td></tr> +</table> + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/htmldir1.pod b/ext/Pod-Html/t/htmldir1.pod new file mode 100644 index 0000000000..e505caaa18 --- /dev/null +++ b/ext/Pod-Html/t/htmldir1.pod @@ -0,0 +1,15 @@ +=head1 NAME + +htmldir - Test --htmldir feature + +=head1 LINKS + +Normal text, a L<link> to nowhere, + +a link to L<var-copy>, + +L<htmlescp>, + +L<feature/Another Head 1>, + +and another L<feature/"Another Head 1">. diff --git a/ext/Pod-Html/t/htmldir1.t b/ext/Pod-Html/t/htmldir1.t new file mode 100644 index 0000000000..a0e747ffe5 --- /dev/null +++ b/ext/Pod-Html/t/htmldir1.t @@ -0,0 +1,86 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +END { + rem_test_dir(); +} + +use strict; +use Cwd; +use File::Spec; +use File::Spec::Functions; +use Test::More tests => 2; + +# XXX Separate tests that rely on test.lib from the others so they are the only +# ones skipped (instead of all of them). This applies to htmldir{1,3,5}.t, and +# crossref.t (as of 10/29/11). +SKIP: { + my $output = make_test_dir(); + skip "$output", 2 if $output; + + my ($v, $d) = splitpath(cwd(), 1); + my $relcwd = substr($d, length(File::Spec->rootdir())); + + my $data_pos = tell DATA; # to read <DATA> twice + + + convert_n_test("htmldir1", "test --htmldir and --htmlroot 1a", + "--podpath=". catdir($relcwd, 't') . ":" . catfile($relcwd, 'testdir/test.lib'), + "--podroot=$v". File::Spec->rootdir, + "--htmldir=t", + ); + + seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same) + + convert_n_test("htmldir1", "test --htmldir and --htmlroot 1b", + "--podpath=$relcwd", + "--podroot=$v". File::Spec->rootdir, + "--htmldir=". catfile $relcwd, 't', + "--htmlroot=/", + ); +} + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>htmldir - Test --htmldir feature</p> + +<h1 id="LINKS">LINKS</h1> + +<p>Normal text, a <a>link</a> to nowhere,</p> + +<p>a link to <a href="/[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html">var-copy</a>,</p> + +<p><a href="/[RELCURRENTWORKINGDIRECTORY]/t/htmlescp.html">htmlescp</a>,</p> + +<p><a href="/[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">"Another Head 1" in feature</a>,</p> + +<p>and another <a href="/[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">"Another Head 1" in feature</a>.</p> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/htmldir2.pod b/ext/Pod-Html/t/htmldir2.pod new file mode 100644 index 0000000000..1c9e97b3e1 --- /dev/null +++ b/ext/Pod-Html/t/htmldir2.pod @@ -0,0 +1,15 @@ +=head1 NAME + +htmldir - Test --htmldir feature + +=head1 LINKS + +Normal text, a L<link> to nowhere, + +a link to L<perlvar-copy>, + +L<htmlescp>, + +L<feature/Another Head 1>, + +and another L<feature/"Another Head 1">. diff --git a/ext/Pod-Html/t/htmldir2.t b/ext/Pod-Html/t/htmldir2.t new file mode 100644 index 0000000000..b5f5b48105 --- /dev/null +++ b/ext/Pod-Html/t/htmldir2.t @@ -0,0 +1,74 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +use strict; +use Cwd; +use Test::More tests => 3; + +my $cwd = cwd(); +my $data_pos = tell DATA; # to read <DATA> twice + +convert_n_test("htmldir2", "test --htmldir and --htmlroot 2a", + "--podpath=t", + "--htmldir=t", +); + +seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same) + +convert_n_test("htmldir2", "test --htmldir and --htmlroot 2b", + "--podpath=t", +); + +seek DATA, $data_pos, 0; # to read <DATA> thrice (expected output is the same) + +# this test makes sure paths are absolute unless --htmldir is specified +convert_n_test("htmldir2", "test --htmldir and --htmlroot 2c", + "--podpath=t", + "--podroot=$cwd", + "--norecurse", # testing --norecurse, too +); + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>htmldir - Test --htmldir feature</p> + +<h1 id="LINKS">LINKS</h1> + +<p>Normal text, a <a>link</a> to nowhere,</p> + +<p>a link to <a>perlvar-copy</a>,</p> + +<p><a href="/t/htmlescp.html">htmlescp</a>,</p> + +<p><a href="/t/feature.html#Another-Head-1">"Another Head 1" in feature</a>,</p> + +<p>and another <a href="/t/feature.html#Another-Head-1">"Another Head 1" in feature</a>.</p> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/htmldir3.pod b/ext/Pod-Html/t/htmldir3.pod new file mode 100644 index 0000000000..e505caaa18 --- /dev/null +++ b/ext/Pod-Html/t/htmldir3.pod @@ -0,0 +1,15 @@ +=head1 NAME + +htmldir - Test --htmldir feature + +=head1 LINKS + +Normal text, a L<link> to nowhere, + +a link to L<var-copy>, + +L<htmlescp>, + +L<feature/Another Head 1>, + +and another L<feature/"Another Head 1">. diff --git a/ext/Pod-Html/t/htmldir3.t b/ext/Pod-Html/t/htmldir3.t new file mode 100644 index 0000000000..805c9d9d41 --- /dev/null +++ b/ext/Pod-Html/t/htmldir3.t @@ -0,0 +1,83 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +END { + rem_test_dir(); +} + +use strict; +use Cwd; +use File::Spec; +use File::Spec::Functions; +use Test::More tests => 2; + +SKIP: { + my $output = make_test_dir(); + skip "$output", 2 if $output; + + my $cwd = cwd(); + my ($v, $d) = splitpath($cwd, 1); + my $relcwd = substr($d, length(File::Spec->rootdir())); + + my $data_pos = tell DATA; # to read <DATA> twice + + convert_n_test("htmldir3", "test --htmldir and --htmlroot 3a", + "--podpath=$relcwd", + "--podroot=$v". File::Spec->rootdir, + "--htmldir=". catdir($cwd, 't', ''), # test removal trailing slash, + ); + + seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same) + + convert_n_test("htmldir3", "test --htmldir and --htmlroot 3b", + "--podpath=". catdir($relcwd, 't'), + "--podroot=$v". File::Spec->rootdir, + "--htmldir=t", + "--outfile=t/htmldir3.html", + ); +} + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>htmldir - Test --htmldir feature</p> + +<h1 id="LINKS">LINKS</h1> + +<p>Normal text, a <a>link</a> to nowhere,</p> + +<p>a link to <a href="[RELCURRENTWORKINGDIRECTORY]/testdir/test.lib/var-copy.html">var-copy</a>,</p> + +<p><a href="[RELCURRENTWORKINGDIRECTORY]/t/htmlescp.html">htmlescp</a>,</p> + +<p><a href="[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">"Another Head 1" in feature</a>,</p> + +<p>and another <a href="[RELCURRENTWORKINGDIRECTORY]/t/feature.html#Another-Head-1">"Another Head 1" in feature</a>.</p> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/htmldir4.pod b/ext/Pod-Html/t/htmldir4.pod new file mode 100644 index 0000000000..1c9e97b3e1 --- /dev/null +++ b/ext/Pod-Html/t/htmldir4.pod @@ -0,0 +1,15 @@ +=head1 NAME + +htmldir - Test --htmldir feature + +=head1 LINKS + +Normal text, a L<link> to nowhere, + +a link to L<perlvar-copy>, + +L<htmlescp>, + +L<feature/Another Head 1>, + +and another L<feature/"Another Head 1">. diff --git a/ext/Pod-Html/t/htmldir4.t b/ext/Pod-Html/t/htmldir4.t new file mode 100644 index 0000000000..034fffe65f --- /dev/null +++ b/ext/Pod-Html/t/htmldir4.t @@ -0,0 +1,70 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +use strict; +use Cwd; +use File::Spec::Functions ':ALL'; +use Test::More tests => 2; + +my $cwd = cwd(); +my $data_pos = tell DATA; # to read <DATA> twice + +convert_n_test("htmldir4", "test --htmldir and --htmlroot 4a", + "--podpath=t", + "--htmldir=t", + "--outfile=". catfile('t', 'htmldir4.html'), +); + +seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same) + +convert_n_test("htmldir4", "test --htmldir and --htmlroot 4b", + "--podpath=t", + "--podroot=$cwd", + "--htmldir=". catdir($cwd, 't'), + "--norecurse", +); + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>htmldir - Test --htmldir feature</p> + +<h1 id="LINKS">LINKS</h1> + +<p>Normal text, a <a>link</a> to nowhere,</p> + +<p>a link to <a>perlvar-copy</a>,</p> + +<p><a href="t/htmlescp.html">htmlescp</a>,</p> + +<p><a href="t/feature.html#Another-Head-1">"Another Head 1" in feature</a>,</p> + +<p>and another <a href="t/feature.html#Another-Head-1">"Another Head 1" in feature</a>.</p> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/htmldir5.pod b/ext/Pod-Html/t/htmldir5.pod new file mode 100644 index 0000000000..e505caaa18 --- /dev/null +++ b/ext/Pod-Html/t/htmldir5.pod @@ -0,0 +1,15 @@ +=head1 NAME + +htmldir - Test --htmldir feature + +=head1 LINKS + +Normal text, a L<link> to nowhere, + +a link to L<var-copy>, + +L<htmlescp>, + +L<feature/Another Head 1>, + +and another L<feature/"Another Head 1">. diff --git a/ext/Pod-Html/t/htmldir5.t b/ext/Pod-Html/t/htmldir5.t new file mode 100644 index 0000000000..15a3901036 --- /dev/null +++ b/ext/Pod-Html/t/htmldir5.t @@ -0,0 +1,74 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +END { + rem_test_dir(); +} + +use strict; +use Cwd; +use File::Spec::Functions; +use Test::More tests => 1; + +SKIP: { + my $output = make_test_dir(); + skip "$output", 1 if $output; + + + my $cwd = catdir cwd(); # catdir converts path separators to that of the OS + # running the test + # XXX but why don't the other tests complain about + # this? + + convert_n_test("htmldir5", "test --htmldir and --htmlroot 5", + "--podpath=t:testdir/test.lib", + "--podroot=$cwd", + "--htmldir=$cwd", + "--htmlroot=/", + ); +} + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>htmldir - Test --htmldir feature</p> + +<h1 id="LINKS">LINKS</h1> + +<p>Normal text, a <a>link</a> to nowhere,</p> + +<p>a link to <a href="../testdir/test.lib/var-copy.html">var-copy</a>,</p> + +<p><a href="./htmlescp.html">htmlescp</a>,</p> + +<p><a href="./feature.html#Another-Head-1">"Another Head 1" in feature</a>,</p> + +<p>and another <a href="./feature.html#Another-Head-1">"Another Head 1" in feature</a>.</p> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/htmlescp.t b/ext/Pod-Html/t/htmlescp.t index 3314829dfd..30c75b4a32 100644 --- a/ext/Pod-Html/t/htmlescp.t +++ b/ext/Pod-Html/t/htmlescp.t @@ -14,7 +14,7 @@ __DATA__ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> -<title>NAME</title> +<title></title> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> <link rev="made" href="mailto:[PERLADMIN]" /> </head> @@ -22,35 +22,27 @@ __DATA__ <body style="background-color: white"> -<!-- INDEX BEGIN --> -<div name="index"> -<p><a name="__index__"></a></p> -<ul> - - <li><a href="#name">NAME</a></li> - <li><a href="#description">DESCRIPTION</a></li> +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#DESCRIPTION">DESCRIPTION</a></li> </ul> -<hr name="index" /> -</div> -<!-- INDEX END --> +<h1 id="NAME">NAME</h1> -<p> -</p> -<h1><a name="name">NAME</a></h1> <p>Escape Sequences Test</p> -<p> -</p> -<hr /> -<h1><a name="description">DESCRIPTION</a></h1> -<p>I am a stupid fool who puts naked < & > characters in my POD -instead of escaping them as < and >.</p> -<p>Here is some <strong>bold</strong> text, some <em>italic</em> plus <em class="file">/etc/fstab</em> -file and something that looks like an <html> tag. -This is some <code>$code($arg1)</code>.</p> -<p>Some numeric escapes: P e r l</p> + +<h1 id="DESCRIPTION">DESCRIPTION</h1> + +<p>I am a stupid fool who puts naked < & > characters in my POD instead of escaping them as < and >.</p> + +<p>Here is some <b>bold</b> text, some <i>italic</i> plus <i>/etc/fstab</i> file and something that looks like an <html> tag. This is some <code>$code($arg1)</code>.</p> + +<p>Some numeric escapes: P e r l</p> + </body> </html> + + diff --git a/ext/Pod-Html/t/htmllink.t b/ext/Pod-Html/t/htmllink.t index 592fef3262..9c26dbfcf2 100644 --- a/ext/Pod-Html/t/htmllink.t +++ b/ext/Pod-Html/t/htmllink.t @@ -14,7 +14,7 @@ __DATA__ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> -<title>htmllink - Test HTML links</title> +<title></title> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> <link rev="made" href="mailto:[PERLADMIN]" /> </head> @@ -22,106 +22,136 @@ __DATA__ <body style="background-color: white"> -<!-- INDEX BEGIN --> -<div name="index"> -<p><a name="__index__"></a></p> -<ul> +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#LINKS">LINKS</a></li> + <li><a href="#TARGETS">TARGETS</a> + <ul> + <li><a href="#section1">section1</a></li> + <li><a href="#section-2">section 2</a></li> + <li><a href="#section-three">section three</a></li> + </ul> + </li> +</ul> - <li><a href="#name">NAME</a></li> - <li><a href="#links">LINKS</a></li> - <li><a href="#targets">TARGETS</a></li> - <ul> +<h1 id="NAME">NAME</h1> - <li><a href="#section1">section1</a></li> - <li><a href="#section_2">section 2</a></li> - <li><a href="#section_three">section three</a></li> - </ul> +<p>htmllink - Test HTML links</p> -</ul> +<h1 id="LINKS">LINKS</h1> -<hr name="index" /> -</div> -<!-- INDEX END --> +<p><a href="#section1">"section1"</a></p> + +<p><a href="#section-2">"section 2"</a></p> + +<p><a href="#section-three">"section three"</a></p> + +<p><a href="#item1">"item1"</a></p> + +<p><a href="#item-2">"item 2"</a></p> + +<p><a href="#item-three">"item three"</a></p> + +<p><a href="#section1">"section1"</a></p> + +<p><a href="#section-2">"section 2"</a></p> + +<p><a href="#section-three">"section three"</a></p> + +<p><a href="#item1">"item1"</a></p> + +<p><a href="#item-2">"item 2"</a></p> + +<p><a href="#item-three">"item three"</a></p> + +<p><a href="#section1">"section1"</a></p> + +<p><a href="#section-2">"section 2"</a></p> + +<p><a href="#section-three">"section three"</a></p> + +<p><a href="#item1">"item1"</a></p> + +<p><a href="#item-2">"item 2"</a></p> + +<p><a href="#item-three">"item three"</a></p> -<p> -</p> -<h1><a name="name">NAME</a></h1> -<p>htmllink - Test HTML links</p> -<p> -</p> -<hr /> -<h1><a name="links">LINKS</a></h1> -<p><a href="#section1">section1</a></p> -<p><a href="#section_2">section 2</a></p> -<p><a href="#section_three">section three</a></p> -<p><a href="#item1">item1</a></p> -<p><a href="#item_2">item 2</a></p> -<p><a href="#item_three">item three</a></p> -<p><a href="#section1">section1</a></p> -<p><a href="#section_2">section 2</a></p> -<p><a href="#section_three">section three</a></p> -<p><a href="#item1">item1</a></p> -<p><a href="#item_2">item 2</a></p> -<p><a href="#item_three">item three</a></p> -<p><a href="#section1">section1</a></p> -<p><a href="#section_2">section 2</a></p> -<p><a href="#section_three">section three</a></p> -<p><a href="#item1">item1</a></p> -<p><a href="#item_2">item 2</a></p> -<p><a href="#item_three">item three</a></p> <p><a href="#section1">text</a></p> -<p><a href="#section_2">text</a></p> -<p><a href="#section_three">text</a></p> + +<p><a href="#section-2">text</a></p> + +<p><a href="#section-three">text</a></p> + <p><a href="#item1">text</a></p> -<p><a href="#item_2">text</a></p> -<p><a href="#item_three">text</a></p> + +<p><a href="#item-2">text</a></p> + +<p><a href="#item-three">text</a></p> + <p><a href="#section1">text</a></p> -<p><a href="#section_2">text</a></p> -<p><a href="#section_three">text</a></p> + +<p><a href="#section-2">text</a></p> + +<p><a href="#section-three">text</a></p> + <p><a href="#item1">text</a></p> -<p><a href="#item_2">text</a></p> -<p><a href="#item_three">text</a></p> + +<p><a href="#item-2">text</a></p> + +<p><a href="#item-three">text</a></p> + <p><a href="#section1">text</a></p> -<p><a href="#section_2">text</a></p> -<p><a href="#section_three">text</a></p> + +<p><a href="#section-2">text</a></p> + +<p><a href="#section-three">text</a></p> + <p><a href="#item1">text</a></p> -<p><a href="#item_2">text</a></p> -<p><a href="#item_three">text</a></p> -<p> -</p> -<hr /> -<h1><a name="targets">TARGETS</a></h1> -<p> -</p> -<h2><a name="section1">section1</a></h2> + +<p><a href="#item-2">text</a></p> + +<p><a href="#item-three">text</a></p> + +<h1 id="TARGETS">TARGETS</h1> + +<h2 id="section1">section1</h2> + <p>This is section one.</p> -<p> -</p> -<h2><a name="section_2">section 2</a></h2> + +<h2 id="section-2">section 2</h2> + <p>This is section two.</p> -<p> -</p> -<h2><a name="section_three">section three</a></h2> + +<h2 id="section-three">section three</h2> + <p>This is section three.</p> + <dl> -<dt><strong><a name="item1" class="item">item1</a></strong></dt> +<dt id="item1">item1 </dt> <dd> + <p>This is item one.</p> -</dd> -<dt><strong><a name="item_2" class="item">item 2</a></strong></dt> +</dd> +<dt id="item-2">item 2 </dt> <dd> + <p>This is item two.</p> -</dd> -<dt><strong><a name="item_three" class="item">item three</a></strong></dt> +</dd> +<dt id="item-three">item three </dt> <dd> + <p>This is item three.</p> + </dd> </dl> + </body> </html> + + diff --git a/ext/Pod-Html/t/htmlview.pod b/ext/Pod-Html/t/htmlview.pod index 2ac15d36fe..05272c46cc 100644 --- a/ext/Pod-Html/t/htmlview.pod +++ b/ext/Pod-Html/t/htmlview.pod @@ -17,10 +17,10 @@ This is the description. This is some more regular text. Here is some B<bold> text, some I<italic> and something that looks -like an E<lt>htmlE<gt> tag. This is some C<$code($arg1)>. +like an E<lt>htmlE<gt> tag. This is some C<$code($arg1)>. -This C<text contains embedded B<bold> and I<italic> tags>. These can -be nested, allowing B<bold and I<bold E<amp> italic> text>. The module also +This C<text contains embedded B<bold> and I<italic> tags>. These can +be nested, allowing B<bold and I<bold E<amp> italic> text>. The module also supports the extended B<< syntax >> and permits I<< nested tags E<amp> other B<<< cool >>> stuff >> @@ -30,7 +30,7 @@ Here is a list of methods =head2 new() -Constructor method. Accepts the following config options: +Constructor method. Accepts the following config options: =over 4 @@ -60,6 +60,16 @@ The waz item. The baz item. +=over 4 + +=item * + +A correct list within a list + +=item * Boomerang + +=back + =back Title on the same line as the =item + * bullets @@ -78,6 +88,24 @@ Title on the same line as the =item + numerical bullets =over +=item 1 + +Cat + +=item 2 + +Sat + +=item 3 + +Mat + +=back + +Numbered list with text on the same line + +=over + =item 1 Cat =item 2 Sat diff --git a/ext/Pod-Html/t/htmlview.t b/ext/Pod-Html/t/htmlview.t index dc15c6b194..97e0536524 100644 --- a/ext/Pod-Html/t/htmlview.t +++ b/ext/Pod-Html/t/htmlview.t @@ -14,7 +14,7 @@ __DATA__ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> -<title>NAME</title> +<title></title> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> <link rev="made" href="mailto:[PERLADMIN]" /> </head> @@ -22,162 +22,214 @@ __DATA__ <body style="background-color: white"> -<!-- INDEX BEGIN --> -<div name="index"> -<p><a name="__index__"></a></p> -<ul> +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#SYNOPSIS">SYNOPSIS</a></li> + <li><a href="#DESCRIPTION">DESCRIPTION</a></li> + <li><a href="#METHODS-OTHER-STUFF">METHODS => OTHER STUFF</a> + <ul> + <li><a href="#new-">new()</a></li> + <li><a href="#old-">old()</a></li> + </ul> + </li> + <li><a href="#TESTING-FOR-AND-BEGIN">TESTING FOR AND BEGIN</a></li> + <li><a href="#TESTING-URLs-hyperlinking">TESTING URLs hyperlinking</a></li> + <li><a href="#SEE-ALSO">SEE ALSO</a></li> + <li><a href="#POD-ERRORS">POD ERRORS</a></li> +</ul> - <li><a href="#name">NAME</a></li> - <li><a href="#synopsis">SYNOPSIS</a></li> - <li><a href="#description">DESCRIPTION</a></li> - <li><a href="#methods____other_stuff">METHODS => OTHER STUFF</a></li> - <ul> +<h1 id="NAME">NAME</h1> - <li><a href="#new__"><code>new()</code></a></li> - <li><a href="#old__"><code>old()</code></a></li> - </ul> +<p>Test HTML Rendering</p> - <li><a href="#testing_for_and_begin">TESTING FOR AND BEGIN</a></li> - <li><a href="#testing_urls_hyperlinking">TESTING URLs hyperlinking</a></li> - <li><a href="#see_also">SEE ALSO</a></li> -</ul> +<h1 id="SYNOPSIS">SYNOPSIS</h1> -<hr name="index" /> -</div> -<!-- INDEX END --> +<pre><code> use My::Module; + + my $module = My::Module->new();</code></pre> + +<h1 id="DESCRIPTION">DESCRIPTION</h1> -<p> -</p> -<h1><a name="name">NAME</a></h1> -<p>Test HTML Rendering</p> -<p> -</p> -<hr /> -<h1><a name="synopsis">SYNOPSIS</a></h1> -<pre> - use My::Module;</pre> -<pre> - my $module = My::Module->new();</pre> -<p> -</p> -<hr /> -<h1><a name="description">DESCRIPTION</a></h1> <p>This is the description.</p> -<pre> - Here is a verbatim section.</pre> + +<pre><code> Here is a verbatim section.</code></pre> + <p>This is some more regular text.</p> -<p>Here is some <strong>bold</strong> text, some <em>italic</em> and something that looks -like an <html> tag. This is some <code>$code($arg1)</code>.</p> -<p>This <code>text contains embedded bold and italic tags</code>. These can -be nested, allowing <strong>bold and <em>bold & italic</em> text</strong>. The module also -supports the extended <strong>syntax </strong>> and permits <em>nested tags & -other <strong>cool </strong></em>> stuff >></p> -<p> -</p> -<hr /> -<h1><a name="methods____other_stuff">METHODS => OTHER STUFF</a></h1> + +<p>Here is some <b>bold</b> text, some <i>italic</i> and something that looks like an <html> tag. This is some <code>$code($arg1)</code>.</p> + +<p>This <code>text contains embedded <b>bold</b> and <i>italic</i> tags</code>. These can be nested, allowing <b>bold and <i>bold & italic</i> text</b>. The module also supports the extended <b>syntax</b> and permits <i>nested tags & other <b>cool</b> stuff</i></p> + +<h1 id="METHODS-OTHER-STUFF">METHODS => OTHER STUFF</h1> + <p>Here is a list of methods</p> -<p> -</p> -<h2><a name="new__"><code>new()</code></a></h2> -<p>Constructor method. Accepts the following config options:</p> + +<h2 id="new-">new()</h2> + +<p>Constructor method. Accepts the following config options:</p> + <dl> -<dt><strong><a name="foo" class="item">foo</a></strong></dt> +<dt id="foo">foo</dt> <dd> + <p>The foo item.</p> -</dd> -<dt><strong><a name="bar" class="item">bar</a></strong></dt> +</dd> +<dt id="bar">bar</dt> <dd> + <p>The bar item.</p> -<p>This is a list within a list</p> + <ul> -<li> + +<p>This is a list within a list</p> + +<p>*</p> + <p>The wiz item.</p> -</li> -<li> + +<p>*</p> + <p>The waz item.</p> -</li> + </ul> -</dd> -<dt><strong><a name="baz" class="item">baz</a></strong></dt> +</dd> +<dt id="baz">baz</dt> <dd> + <p>The baz item.</p> + +<ul> + +<li><p>A correct list within a list</p> + +</li> +<li><p>Boomerang</p> + +</li> +</ul> + </dd> </dl> + <p>Title on the same line as the =item + * bullets</p> + <ul> -<li><strong><a name="black_cat" class="item"><code>Black</code> Cat</a></strong> + +<li><p><code>Black</code> Cat</p> </li> -<li><strong><a name="sat_on_the" class="item">Sat <em>on</em> the</a></strong> +<li><p>Sat <span style="white-space: nowrap;"><i>on</i> the</span></p> </li> -<li><strong><a name="mat" class="item">Mat<!></a></strong> +<li><p>Mat<!></p> </li> </ul> + <p>Title on the same line as the =item + numerical bullets</p> + <ol> -<li><strong><a name="cat" class="item">Cat</a></strong> + +<li><p>Cat</p> </li> -<li><strong><a name="sat" class="item">Sat</a></strong> +<li><p>Sat</p> </li> -<li><strong><a name="mat2" class="item">Mat</a></strong> +<li><p>Mat</p> </li> </ol> -<p>No bullets, no title</p> + +<p>Numbered list with text on the same line</p> + <dl> -<dt> + +<dt id="Cat">1 Cat</dt> <dd> -<p>Cat</p> + </dd> -<dt> +<dt id="Sat">2 Sat</dt> <dd> -<p>Sat</p> + </dd> -<dt> +<dt id="Mat">3 Mat</dt> <dd> -<p>Mat</p> + </dd> </dl> -<p> -</p> -<h2><a name="old__"><code>old()</code></a></h2> + +<p>No bullets, no title</p> + +<ul> + +<li><p>Cat</p> + +</li> +<li><p>Sat</p> + +</li> +<li><p>Mat</p> + +</li> +</ul> + +<h2 id="old-">old()</h2> + <p>Destructor method</p> -<p> -</p> -<hr /> -<h1><a name="testing_for_and_begin">TESTING FOR AND BEGIN</a></h1> + +<h1 id="TESTING-FOR-AND-BEGIN">TESTING FOR AND BEGIN</h1> + + + <br /> <p> blah blah -</p><p>intermediate text</p> +</p> + +<p>intermediate text</p> + + + <more> HTML -</more>some text<p> -</p> -<hr /> -<h1><a name="testing_urls_hyperlinking">TESTING URLs hyperlinking</a></h1> -<p>This is an href link1: <a href="http://example.com">http://example.com</a></p> -<p>This is an href link2: <a href="http://example.com/foo/bar.html">http://example.com/foo/bar.html</a></p> -<p>This is an email link: <a href="mailto:mailto:foo@bar.com">mailto:foo@bar.com</a></p> -<pre> - This is a link in a verbatim block <a href="<a href="http://perl.org">http://perl.org</a>"> Perl </a></pre> -<p> -</p> -<hr /> -<h1><a name="see_also">SEE ALSO</a></h1> -<p>See also <a href="/t/htmlescp.html">Test Page 2</a>, the <a href="/Your/Module.html">the Your::Module manpage</a> and <a href="/Their/Module.html">the Their::Module manpage</a> -manpages and the other interesting file <em class="file">/usr/local/my/module/rocks</em> -as well.</p> +</more>some text + +<h1 id="TESTING-URLs-hyperlinking">TESTING URLs hyperlinking</h1> + +<p>This is an href link1: http://example.com</p> + +<p>This is an href link2: http://example.com/foo/bar.html</p> + +<p>This is an email link: mailto:foo@bar.com</p> + +<pre><code> This is a link in a verbatim block <a href="http://perl.org"> Perl </a></code></pre> + +<h1 id="SEE-ALSO">SEE ALSO</h1> + +<p>See also <a href="/t/htmlescp.html">Test Page 2</a>, the <a>Your::Module</a> and <a>Their::Module</a> manpages and the other interesting file <i>/usr/local/my/module/rocks</i> as well.</p> + +<h1 id="POD-ERRORS">POD ERRORS</h1> + +<p>Hey! <b>The above document had some coding errors, which are explained below:</b></p> + +<dl> + +<dt id="Around-line-45:">Around line 45:</dt> +<dd> + +<p>You can't have =items (as at line 49) unless the first thing after the =over is an =item</p> + +</dd> +</dl> + </body> </html> + + diff --git a/ext/Pod-Html/t/pod2html-lib.pl b/ext/Pod-Html/t/pod2html-lib.pl index 2259d4528e..7a71e4c5b7 100644 --- a/ext/Pod-Html/t/pod2html-lib.pl +++ b/ext/Pod-Html/t/pod2html-lib.pl @@ -1,22 +1,51 @@ require Cwd; require Pod::Html; require Config; -use File::Spec::Functions; +use File::Spec::Functions ':ALL'; +use File::Path 'remove_tree'; +use File::Copy; + +# make_test_dir and rem_test_dir dynamically create and remove testdir/test.lib. +# it is created dynamically to pass t/filenames.t, which does not allow '.'s in +# filenames as '.' is the directory separator on VMS. All tests that require +# testdir/test.lib to be present are skipped if test.lib cannot be created. +sub make_test_dir { + if (-d 'testdir/test.lib') { + warn "Directory 'test.lib' exists (it shouldn't yet) - removing it"; + rem_test_dir(); + } + mkdir('testdir/test.lib') or return "Could not make test.lib directory: $!\n"; + copy('testdir/perlpodspec-copy.pod', 'testdir/test.lib/podspec-copy.pod') + or return "Could not copy perlpodspec-copy: $!"; + copy('testdir/perlvar-copy.pod', 'testdir/test.lib/var-copy.pod') + or return "Could not copy perlvar-copy: $!"; + return 0; +} + +sub rem_test_dir { + remove_tree('testdir/test.lib') + or warn "Error removing temporary directory 'testdir/test.lib'"; +} sub convert_n_test { - my($podfile, $testname) = @_; + my($podfile, $testname, @p2h_args) = @_; my $cwd = Cwd::cwd(); + my ($vol, $dir) = splitpath($cwd, 1); + my $relcwd = substr($dir, length(File::Spec->rootdir())); + my $new_dir = catdir $cwd, "t"; my $infile = catfile $new_dir, "$podfile.pod"; my $outfile = catfile $new_dir, "$podfile.html"; - + + # To add/modify args to p2h, use @p2h_args Pod::Html::pod2html( + "--infile=$infile", + "--outfile=$outfile", "--podpath=t", - "--podroot=$cwd", "--htmlroot=/", - "--infile=$infile", - "--outfile=$outfile" + "--podroot=$cwd", + @p2h_args, ); @@ -26,6 +55,7 @@ sub convert_n_test { # expected $expect = <DATA>; $expect =~ s/\[PERLADMIN\]/$Config::Config{perladmin}/; + $expect =~ s/\[RELCURRENTWORKINGDIRECTORY\]/$relcwd/g; if (ord("A") == 193) { # EBCDIC. $expect =~ s/item_mat_3c_21_3e/item_mat_4c_5a_6e/; } @@ -54,8 +84,6 @@ sub convert_n_test { # pod2html creates these 1 while unlink $outfile; - 1 while unlink "pod2htmd.tmp"; - 1 while unlink "pod2htmi.tmp"; } 1; diff --git a/ext/Pod-Html/t/poderr.pod b/ext/Pod-Html/t/poderr.pod new file mode 100644 index 0000000000..f54ab49283 --- /dev/null +++ b/ext/Pod-Html/t/poderr.pod @@ -0,0 +1,19 @@ +=head1 NAME + +Test POD ERROR section + +=over 4 + +This text is not allowed + +=item * + +The wiz item. + +=item * + +The waz item. + +=back + +=cut diff --git a/ext/Pod-Html/t/poderr.t b/ext/Pod-Html/t/poderr.t new file mode 100644 index 0000000000..270d948af4 --- /dev/null +++ b/ext/Pod-Html/t/poderr.t @@ -0,0 +1,68 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +use strict; +use Test::More tests => 1; + +convert_n_test("poderr", "pod error section"); + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#POD-ERRORS">POD ERRORS</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>Test POD ERROR section</p> + +<ul> + +<p>This text is not allowed</p> + +<p>*</p> + +<p>The wiz item.</p> + +<p>*</p> + +<p>The waz item.</p> + +</ul> + +<h1 id="POD-ERRORS">POD ERRORS</h1> + +<p>Hey! <b>The above document had some coding errors, which are explained below:</b></p> + +<dl> + +<dt id="Around-line-5:">Around line 5:</dt> +<dd> + +<p>You can't have =items (as at line 9) unless the first thing after the =over is an =item</p> + +</dd> +</dl> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/t/podnoerr.pod b/ext/Pod-Html/t/podnoerr.pod new file mode 100644 index 0000000000..f54ab49283 --- /dev/null +++ b/ext/Pod-Html/t/podnoerr.pod @@ -0,0 +1,19 @@ +=head1 NAME + +Test POD ERROR section + +=over 4 + +This text is not allowed + +=item * + +The wiz item. + +=item * + +The waz item. + +=back + +=cut diff --git a/ext/Pod-Html/t/podnoerr.t b/ext/Pod-Html/t/podnoerr.t new file mode 100644 index 0000000000..cb74812db2 --- /dev/null +++ b/ext/Pod-Html/t/podnoerr.t @@ -0,0 +1,55 @@ +#!/usr/bin/perl -w # -*- perl -*- + +BEGIN { + require "t/pod2html-lib.pl"; +} + +use strict; +use Test::More tests => 1; + +convert_n_test("podnoerr", "pod error section", + "--nopoderrors", +); + +__DATA__ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title></title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:[PERLADMIN]" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>Test POD ERROR section</p> + +<ul> + +<p>This text is not allowed</p> + +<p>*</p> + +<p>The wiz item.</p> + +<p>*</p> + +<p>The waz item.</p> + +</ul> + + +</body> + +</html> + + diff --git a/ext/Pod-Html/testdir/perlpodspec-copy.pod b/ext/Pod-Html/testdir/perlpodspec-copy.pod new file mode 100644 index 0000000000..4f914ef0e4 --- /dev/null +++ b/ext/Pod-Html/testdir/perlpodspec-copy.pod @@ -0,0 +1,1899 @@ + +=head1 NAME + +perlpodspeccopy - Plain Old Documentation: format specification and notes + +=head1 DESCRIPTION + +This document is detailed notes on the Pod markup language. Most +people will only have to read L<perlpod|perlpod> to know how to write +in Pod, but this document may answer some incidental questions to do +with parsing and rendering Pod. + +In this document, "must" / "must not", "should" / +"should not", and "may" have their conventional (cf. RFC 2119) +meanings: "X must do Y" means that if X doesn't do Y, it's against +this specification, and should really be fixed. "X should do Y" +means that it's recommended, but X may fail to do Y, if there's a +good reason. "X may do Y" is merely a note that X can do Y at +will (although it is up to the reader to detect any connotation of +"and I think it would be I<nice> if X did Y" versus "it wouldn't +really I<bother> me if X did Y"). + +Notably, when I say "the parser should do Y", the +parser may fail to do Y, if the calling application explicitly +requests that the parser I<not> do Y. I often phrase this as +"the parser should, by default, do Y." This doesn't I<require> +the parser to provide an option for turning off whatever +feature Y is (like expanding tabs in verbatim paragraphs), although +it implicates that such an option I<may> be provided. + +=head1 Pod Definitions + +Pod is embedded in files, typically Perl source files -- although you +can write a file that's nothing but Pod. + +A B<line> in a file consists of zero or more non-newline characters, +terminated by either a newline or the end of the file. + +A B<newline sequence> is usually a platform-dependent concept, but +Pod parsers should understand it to mean any of CR (ASCII 13), LF +(ASCII 10), or a CRLF (ASCII 13 followed immediately by ASCII 10), in +addition to any other system-specific meaning. The first CR/CRLF/LF +sequence in the file may be used as the basis for identifying the +newline sequence for parsing the rest of the file. + +A B<blank line> is a line consisting entirely of zero or more spaces +(ASCII 32) or tabs (ASCII 9), and terminated by a newline or end-of-file. +A B<non-blank line> is a line containing one or more characters other +than space or tab (and terminated by a newline or end-of-file). + +(I<Note:> Many older Pod parsers did not accept a line consisting of +spaces/tabs and then a newline as a blank line -- the only lines they +considered blank were lines consisting of I<no characters at all>, +terminated by a newline.) + +B<Whitespace> is used in this document as a blanket term for spaces, +tabs, and newline sequences. (By itself, this term usually refers +to literal whitespace. That is, sequences of whitespace characters +in Pod source, as opposed to "EE<lt>32>", which is a formatting +code that I<denotes> a whitespace character.) + +A B<Pod parser> is a module meant for parsing Pod (regardless of +whether this involves calling callbacks or building a parse tree or +directly formatting it). A B<Pod formatter> (or B<Pod translator>) +is a module or program that converts Pod to some other format (HTML, +plaintext, TeX, PostScript, RTF). A B<Pod processor> might be a +formatter or translator, or might be a program that does something +else with the Pod (like counting words, scanning for index points, +etc.). + +Pod content is contained in B<Pod blocks>. A Pod block starts with a +line that matches <m/\A=[a-zA-Z]/>, and continues up to the next line +that matches C<m/\A=cut/> -- or up to the end of the file, if there is +no C<m/\A=cut/> line. + +=for comment + The current perlsyn says: + [beginquote] + Note that pod translators should look at only paragraphs beginning + with a pod directive (it makes parsing easier), whereas the compiler + actually knows to look for pod escapes even in the middle of a + paragraph. This means that the following secret stuff will be ignored + by both the compiler and the translators. + $a=3; + =secret stuff + warn "Neither POD nor CODE!?" + =cut back + print "got $a\n"; + You probably shouldn't rely upon the warn() being podded out forever. + Not all pod translators are well-behaved in this regard, and perhaps + the compiler will become pickier. + [endquote] + I think that those paragraphs should just be removed; paragraph-based + parsing seems to have been largely abandoned, because of the hassle + with non-empty blank lines messing up what people meant by "paragraph". + Even if the "it makes parsing easier" bit were especially true, + it wouldn't be worth the confusion of having perl and pod2whatever + actually disagree on what can constitute a Pod block. + +Within a Pod block, there are B<Pod paragraphs>. A Pod paragraph +consists of non-blank lines of text, separated by one or more blank +lines. + +For purposes of Pod processing, there are four types of paragraphs in +a Pod block: + +=over + +=item * + +A command paragraph (also called a "directive"). The first line of +this paragraph must match C<m/\A=[a-zA-Z]/>. Command paragraphs are +typically one line, as in: + + =head1 NOTES + + =item * + +But they may span several (non-blank) lines: + + =for comment + Hm, I wonder what it would look like if + you tried to write a BNF for Pod from this. + + =head3 Dr. Strangelove, or: How I Learned to + Stop Worrying and Love the Bomb + +I<Some> command paragraphs allow formatting codes in their content +(i.e., after the part that matches C<m/\A=[a-zA-Z]\S*\s*/>), as in: + + =head1 Did You Remember to C<use strict;>? + +In other words, the Pod processing handler for "head1" will apply the +same processing to "Did You Remember to CE<lt>use strict;>?" that it +would to an ordinary paragraph -- i.e., formatting codes (like +"CE<lt>...>") are parsed and presumably formatted appropriately, and +whitespace in the form of literal spaces and/or tabs is not +significant. + +=item * + +A B<verbatim paragraph>. The first line of this paragraph must be a +literal space or tab, and this paragraph must not be inside a "=begin +I<identifier>", ... "=end I<identifier>" sequence unless +"I<identifier>" begins with a colon (":"). That is, if a paragraph +starts with a literal space or tab, but I<is> inside a +"=begin I<identifier>", ... "=end I<identifier>" region, then it's +a data paragraph, unless "I<identifier>" begins with a colon. + +Whitespace I<is> significant in verbatim paragraphs (although, in +processing, tabs are probably expanded). + +=item * + +An B<ordinary paragraph>. A paragraph is an ordinary paragraph +if its first line matches neither C<m/\A=[a-zA-Z]/> nor +C<m/\A[ \t]/>, I<and> if it's not inside a "=begin I<identifier>", +... "=end I<identifier>" sequence unless "I<identifier>" begins with +a colon (":"). + +=item * + +A B<data paragraph>. This is a paragraph that I<is> inside a "=begin +I<identifier>" ... "=end I<identifier>" sequence where +"I<identifier>" does I<not> begin with a literal colon (":"). In +some sense, a data paragraph is not part of Pod at all (i.e., +effectively it's "out-of-band"), since it's not subject to most kinds +of Pod parsing; but it is specified here, since Pod +parsers need to be able to call an event for it, or store it in some +form in a parse tree, or at least just parse I<around> it. + +=back + +For example: consider the following paragraphs: + + # <- that's the 0th column + + =head1 Foo + + Stuff + + $foo->bar + + =cut + +Here, "=head1 Foo" and "=cut" are command paragraphs because the first +line of each matches C<m/\A=[a-zA-Z]/>. "I<[space][space]>$foo->bar" +is a verbatim paragraph, because its first line starts with a literal +whitespace character (and there's no "=begin"..."=end" region around). + +The "=begin I<identifier>" ... "=end I<identifier>" commands stop +paragraphs that they surround from being parsed as ordinary or verbatim +paragraphs, if I<identifier> doesn't begin with a colon. This +is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +=head1 Pod Commands + +This section is intended to supplement and clarify the discussion in +L<perlpod/"Command Paragraph">. These are the currently recognized +Pod commands: + +=over + +=item "=head1", "=head2", "=head3", "=head4" + +This command indicates that the text in the remainder of the paragraph +is a heading. That text may contain formatting codes. Examples: + + =head1 Object Attributes + + =head3 What B<Not> to Do! + +=item "=pod" + +This command indicates that this paragraph begins a Pod block. (If we +are already in the middle of a Pod block, this command has no effect at +all.) If there is any text in this command paragraph after "=pod", +it must be ignored. Examples: + + =pod + + This is a plain Pod paragraph. + + =pod This text is ignored. + +=item "=cut" + +This command indicates that this line is the end of this previously +started Pod block. If there is any text after "=cut" on the line, it must be +ignored. Examples: + + =cut + + =cut The documentation ends here. + + =cut + # This is the first line of program text. + sub foo { # This is the second. + +It is an error to try to I<start> a Pod block with a "=cut" command. In +that case, the Pod processor must halt parsing of the input file, and +must by default emit a warning. + +=item "=over" + +This command indicates that this is the start of a list/indent +region. If there is any text following the "=over", it must consist +of only a nonzero positive numeral. The semantics of this numeral is +explained in the L</"About =over...=back Regions"> section, further +below. Formatting codes are not expanded. Examples: + + =over 3 + + =over 3.5 + + =over + +=item "=item" + +This command indicates that an item in a list begins here. Formatting +codes are processed. The semantics of the (optional) text in the +remainder of this paragraph are +explained in the L</"About =over...=back Regions"> section, further +below. Examples: + + =item + + =item * + + =item * + + =item 14 + + =item 3. + + =item C<< $thing->stuff(I<dodad>) >> + + =item For transporting us beyond seas to be tried for pretended + offenses + + =item He is at this time transporting large armies of foreign + mercenaries to complete the works of death, desolation and + tyranny, already begun with circumstances of cruelty and perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the head of a civilized nation. + +=item "=back" + +This command indicates that this is the end of the region begun +by the most recent "=over" command. It permits no text after the +"=back" command. + +=item "=begin formatname" + +This marks the following paragraphs (until the matching "=end +formatname") as being for some special kind of processing. Unless +"formatname" begins with a colon, the contained non-command +paragraphs are data paragraphs. But if "formatname" I<does> begin +with a colon, then non-command paragraphs are ordinary paragraphs +or data paragraphs. This is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +It is advised that formatnames match the regexp +C<m/\A:?[-a-zA-Z0-9_]+\z/>. Implementors should anticipate future +expansion in the semantics and syntax of the first parameter +to "=begin"/"=end"/"=for". + +=item "=end formatname" + +This marks the end of the region opened by the matching +"=begin formatname" region. If "formatname" is not the formatname +of the most recent open "=begin formatname" region, then this +is an error, and must generate an error message. This +is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +=item "=for formatname text..." + +This is synonymous with: + + =begin formatname + + text... + + =end formatname + +That is, it creates a region consisting of a single paragraph; that +paragraph is to be treated as a normal paragraph if "formatname" +begins with a ":"; if "formatname" I<doesn't> begin with a colon, +then "text..." will constitute a data paragraph. There is no way +to use "=for formatname text..." to express "text..." as a verbatim +paragraph. + +=item "=encoding encodingname" + +This command, which should occur early in the document (at least +before any non-US-ASCII data!), declares that this document is +encoded in the encoding I<encodingname>, which must be +an encoding name that L<Encode> recognizes. (Encode's list +of supported encodings, in L<Encode::Supported>, is useful here.) +If the Pod parser cannot decode the declared encoding, it +should emit a warning and may abort parsing the document +altogether. + +A document having more than one "=encoding" line should be +considered an error. Pod processors may silently tolerate this if +the not-first "=encoding" lines are just duplicates of the +first one (e.g., if there's a "=encoding utf8" line, and later on +another "=encoding utf8" line). But Pod processors should complain if +there are contradictory "=encoding" lines in the same document +(e.g., if there is a "=encoding utf8" early in the document and +"=encoding big5" later). Pod processors that recognize BOMs +may also complain if they see an "=encoding" line +that contradicts the BOM (e.g., if a document with a UTF-16LE +BOM has an "=encoding shiftjis" line). + +=back + +If a Pod processor sees any command other than the ones listed +above (like "=head", or "=haed1", or "=stuff", or "=cuttlefish", +or "=w123"), that processor must by default treat this as an +error. It must not process the paragraph beginning with that +command, must by default warn of this as an error, and may +abort the parse. A Pod parser may allow a way for particular +applications to add to the above list of known commands, and to +stipulate, for each additional command, whether formatting +codes should be processed. + +Future versions of this specification may add additional +commands. + + + +=head1 Pod Formatting Codes + +(Note that in previous drafts of this document and of perlpod, +formatting codes were referred to as "interior sequences", and +this term may still be found in the documentation for Pod parsers, +and in error messages from Pod processors.) + +There are two syntaxes for formatting codes: + +=over + +=item * + +A formatting code starts with a capital letter (just US-ASCII [A-Z]) +followed by a "<", any number of characters, and ending with the first +matching ">". Examples: + + That's what I<you> think! + + What's C<dump()> for? + + X<C<chmod> and C<unlink()> Under Different Operating Systems> + +=item * + +A formatting code starts with a capital letter (just US-ASCII [A-Z]) +followed by two or more "<"'s, one or more whitespace characters, +any number of characters, one or more whitespace characters, +and ending with the first matching sequence of two or more ">"'s, where +the number of ">"'s equals the number of "<"'s in the opening of this +formatting code. Examples: + + That's what I<< you >> think! + + C<<< open(X, ">>thing.dat") || die $! >>> + + B<< $foo->bar(); >> + +With this syntax, the whitespace character(s) after the "CE<lt><<" +and before the ">>" (or whatever letter) are I<not> renderable -- they +do not signify whitespace, are merely part of the formatting codes +themselves. That is, these are all synonymous: + + C<thing> + C<< thing >> + C<< thing >> + C<<< thing >>> + C<<<< + thing + >>>> + +and so on. + +=back + +In parsing Pod, a notably tricky part is the correct parsing of +(potentially nested!) formatting codes. Implementors should +consult the code in the C<parse_text> routine in Pod::Parser as an +example of a correct implementation. + +=over + +=item C<IE<lt>textE<gt>> -- italic text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<BE<lt>textE<gt>> -- bold text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<CE<lt>codeE<gt>> -- code text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<FE<lt>filenameE<gt>> -- style for filenames + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<XE<lt>topic nameE<gt>> -- an index entry + +See the brief discussion in L<perlpod/"Formatting Codes">. + +This code is unusual in that most formatters completely discard +this code and its content. Other formatters will render it with +invisible codes that can be used in building an index of +the current document. + +=item C<ZE<lt>E<gt>> -- a null (zero-effect) formatting code + +Discussed briefly in L<perlpod/"Formatting Codes">. + +This code is unusual is that it should have no content. That is, +a processor may complain if it sees C<ZE<lt>potatoesE<gt>>. Whether +or not it complains, the I<potatoes> text should ignored. + +=item C<LE<lt>nameE<gt>> -- a hyperlink + +The complicated syntaxes of this code are discussed at length in +L<perlpod/"Formatting Codes">, and implementation details are +discussed below, in L</"About LE<lt>...E<gt> Codes">. Parsing the +contents of LE<lt>content> is tricky. Notably, the content has to be +checked for whether it looks like a URL, or whether it has to be split +on literal "|" and/or "/" (in the right order!), and so on, +I<before> EE<lt>...> codes are resolved. + +=item C<EE<lt>escapeE<gt>> -- a character escape + +See L<perlpod/"Formatting Codes">, and several points in +L</Notes on Implementing Pod Processors>. + +=item C<SE<lt>textE<gt>> -- text contains non-breaking spaces + +This formatting code is syntactically simple, but semantically +complex. What it means is that each space in the printable +content of this code signifies a non-breaking space. + +Consider: + + C<$x ? $y : $z> + + S<C<$x ? $y : $z>> + +Both signify the monospace (c[ode] style) text consisting of +"$x", one space, "?", one space, ":", one space, "$z". The +difference is that in the latter, with the S code, those spaces +are not "normal" spaces, but instead are non-breaking spaces. + +=back + + +If a Pod processor sees any formatting code other than the ones +listed above (as in "NE<lt>...>", or "QE<lt>...>", etc.), that +processor must by default treat this as an error. +A Pod parser may allow a way for particular +applications to add to the above list of known formatting codes; +a Pod parser might even allow a way to stipulate, for each additional +command, whether it requires some form of special processing, as +LE<lt>...> does. + +Future versions of this specification may add additional +formatting codes. + +Historical note: A few older Pod processors would not see a ">" as +closing a "CE<lt>" code, if the ">" was immediately preceded by +a "-". This was so that this: + + C<$foo->bar> + +would parse as equivalent to this: + + C<$foo-E<gt>bar> + +instead of as equivalent to a "C" formatting code containing +only "$foo-", and then a "bar>" outside the "C" formatting code. This +problem has since been solved by the addition of syntaxes like this: + + C<< $foo->bar >> + +Compliant parsers must not treat "->" as special. + +Formatting codes absolutely cannot span paragraphs. If a code is +opened in one paragraph, and no closing code is found by the end of +that paragraph, the Pod parser must close that formatting code, +and should complain (as in "Unterminated I code in the paragraph +starting at line 123: 'Time objects are not...'"). So these +two paragraphs: + + I<I told you not to do this! + + Don't make me say it again!> + +...must I<not> be parsed as two paragraphs in italics (with the I +code starting in one paragraph and starting in another.) Instead, +the first paragraph should generate a warning, but that aside, the +above code must parse as if it were: + + I<I told you not to do this!> + + Don't make me say it again!E<gt> + +(In SGMLish jargon, all Pod commands are like block-level +elements, whereas all Pod formatting codes are like inline-level +elements.) + + + +=head1 Notes on Implementing Pod Processors + +The following is a long section of miscellaneous requirements +and suggestions to do with Pod processing. + +=over + +=item * + +Pod formatters should tolerate lines in verbatim blocks that are of +any length, even if that means having to break them (possibly several +times, for very long lines) to avoid text running off the side of the +page. Pod formatters may warn of such line-breaking. Such warnings +are particularly appropriate for lines are over 100 characters long, which +are usually not intentional. + +=item * + +Pod parsers must recognize I<all> of the three well-known newline +formats: CR, LF, and CRLF. See L<perlport|perlport>. + +=item * + +Pod parsers should accept input lines that are of any length. + +=item * + +Since Perl recognizes a Unicode Byte Order Mark at the start of files +as signaling that the file is Unicode encoded as in UTF-16 (whether +big-endian or little-endian) or UTF-8, Pod parsers should do the +same. Otherwise, the character encoding should be understood as +being UTF-8 if the first highbit byte sequence in the file seems +valid as a UTF-8 sequence, or otherwise as Latin-1. + +Future versions of this specification may specify +how Pod can accept other encodings. Presumably treatment of other +encodings in Pod parsing would be as in XML parsing: whatever the +encoding declared by a particular Pod file, content is to be +stored in memory as Unicode characters. + +=item * + +The well known Unicode Byte Order Marks are as follows: if the +file begins with the two literal byte values 0xFE 0xFF, this is +the BOM for big-endian UTF-16. If the file begins with the two +literal byte value 0xFF 0xFE, this is the BOM for little-endian +UTF-16. If the file begins with the three literal byte values +0xEF 0xBB 0xBF, this is the BOM for UTF-8. + +=for comment + use bytes; print map sprintf(" 0x%02X", ord $_), split '', "\x{feff}"; + 0xEF 0xBB 0xBF + +=for comment + If toke.c is modified to support UTF-32, add mention of those here. + +=item * + +A naive but sufficient heuristic for testing the first highbit +byte-sequence in a BOM-less file (whether in code or in Pod!), to see +whether that sequence is valid as UTF-8 (RFC 2279) is to check whether +that the first byte in the sequence is in the range 0xC0 - 0xFD +I<and> whether the next byte is in the range +0x80 - 0xBF. If so, the parser may conclude that this file is in +UTF-8, and all highbit sequences in the file should be assumed to +be UTF-8. Otherwise the parser should treat the file as being +in Latin-1. In the unlikely circumstance that the first highbit +sequence in a truly non-UTF-8 file happens to appear to be UTF-8, one +can cater to our heuristic (as well as any more intelligent heuristic) +by prefacing that line with a comment line containing a highbit +sequence that is clearly I<not> valid as UTF-8. A line consisting +of simply "#", an e-acute, and any non-highbit byte, +is sufficient to establish this file's encoding. + +=for comment + If/WHEN some brave soul makes these heuristics into a generic + text-file class (or PerlIO layer?), we can presumably delete + mention of these icky details from this file, and can instead + tell people to just use appropriate class/layer. + Auto-recognition of newline sequences would be another desirable + feature of such a class/layer. + HINT HINT HINT. + +=for comment + "The probability that a string of characters + in any other encoding appears as valid UTF-8 is low" - RFC2279 + +=item * + +This document's requirements and suggestions about encodings +do not apply to Pod processors running on non-ASCII platforms, +notably EBCDIC platforms. + +=item * + +Pod processors must treat a "=for [label] [content...]" paragraph as +meaning the same thing as a "=begin [label]" paragraph, content, and +an "=end [label]" paragraph. (The parser may conflate these two +constructs, or may leave them distinct, in the expectation that the +formatter will nevertheless treat them the same.) + +=item * + +When rendering Pod to a format that allows comments (i.e., to nearly +any format other than plaintext), a Pod formatter must insert comment +text identifying its name and version number, and the name and +version numbers of any modules it might be using to process the Pod. +Minimal examples: + + %% POD::Pod2PS v3.14159, using POD::Parser v1.92 + + <!-- Pod::HTML v3.14159, using POD::Parser v1.92 --> + + {\doccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08} + + .\" Pod::Man version 3.14159, using POD::Parser version 1.92 + +Formatters may also insert additional comments, including: the +release date of the Pod formatter program, the contact address for +the author(s) of the formatter, the current time, the name of input +file, the formatting options in effect, version of Perl used, etc. + +Formatters may also choose to note errors/warnings as comments, +besides or instead of emitting them otherwise (as in messages to +STDERR, or C<die>ing). + +=item * + +Pod parsers I<may> emit warnings or error messages ("Unknown E code +EE<lt>zslig>!") to STDERR (whether through printing to STDERR, or +C<warn>ing/C<carp>ing, or C<die>ing/C<croak>ing), but I<must> allow +suppressing all such STDERR output, and instead allow an option for +reporting errors/warnings +in some other way, whether by triggering a callback, or noting errors +in some attribute of the document object, or some similarly unobtrusive +mechanism -- or even by appending a "Pod Errors" section to the end of +the parsed form of the document. + +=item * + +In cases of exceptionally aberrant documents, Pod parsers may abort the +parse. Even then, using C<die>ing/C<croak>ing is to be avoided; where +possible, the parser library may simply close the input file +and add text like "*** Formatting Aborted ***" to the end of the +(partial) in-memory document. + +=item * + +In paragraphs where formatting codes (like EE<lt>...>, BE<lt>...>) +are understood (i.e., I<not> verbatim paragraphs, but I<including> +ordinary paragraphs, and command paragraphs that produce renderable +text, like "=head1"), literal whitespace should generally be considered +"insignificant", in that one literal space has the same meaning as any +(nonzero) number of literal spaces, literal newlines, and literal tabs +(as long as this produces no blank lines, since those would terminate +the paragraph). Pod parsers should compact literal whitespace in each +processed paragraph, but may provide an option for overriding this +(since some processing tasks do not require it), or may follow +additional special rules (for example, specially treating +period-space-space or period-newline sequences). + +=item * + +Pod parsers should not, by default, try to coerce apostrophe (') and +quote (") into smart quotes (little 9's, 66's, 99's, etc), nor try to +turn backtick (`) into anything else but a single backtick character +(distinct from an open quote character!), nor "--" into anything but +two minus signs. They I<must never> do any of those things to text +in CE<lt>...> formatting codes, and never I<ever> to text in verbatim +paragraphs. + +=item * + +When rendering Pod to a format that has two kinds of hyphens (-), one +that's a non-breaking hyphen, and another that's a breakable hyphen +(as in "object-oriented", which can be split across lines as +"object-", newline, "oriented"), formatters are encouraged to +generally translate "-" to non-breaking hyphen, but may apply +heuristics to convert some of these to breaking hyphens. + +=item * + +Pod formatters should make reasonable efforts to keep words of Perl +code from being broken across lines. For example, "Foo::Bar" in some +formatting systems is seen as eligible for being broken across lines +as "Foo::" newline "Bar" or even "Foo::-" newline "Bar". This should +be avoided where possible, either by disabling all line-breaking in +mid-word, or by wrapping particular words with internal punctuation +in "don't break this across lines" codes (which in some formats may +not be a single code, but might be a matter of inserting non-breaking +zero-width spaces between every pair of characters in a word.) + +=item * + +Pod parsers should, by default, expand tabs in verbatim paragraphs as +they are processed, before passing them to the formatter or other +processor. Parsers may also allow an option for overriding this. + +=item * + +Pod parsers should, by default, remove newlines from the end of +ordinary and verbatim paragraphs before passing them to the +formatter. For example, while the paragraph you're reading now +could be considered, in Pod source, to end with (and contain) +the newline(s) that end it, it should be processed as ending with +(and containing) the period character that ends this sentence. + +=item * + +Pod parsers, when reporting errors, should make some effort to report +an approximate line number ("Nested EE<lt>>'s in Paragraph #52, near +line 633 of Thing/Foo.pm!"), instead of merely noting the paragraph +number ("Nested EE<lt>>'s in Paragraph #52 of Thing/Foo.pm!"). Where +this is problematic, the paragraph number should at least be +accompanied by an excerpt from the paragraph ("Nested EE<lt>>'s in +Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for +the CE<lt>interest rate> attribute...'"). + +=item * + +Pod parsers, when processing a series of verbatim paragraphs one +after another, should consider them to be one large verbatim +paragraph that happens to contain blank lines. I.e., these two +lines, which have a blank line between them: + + use Foo; + + print Foo->VERSION + +should be unified into one paragraph ("\tuse Foo;\n\n\tprint +Foo->VERSION") before being passed to the formatter or other +processor. Parsers may also allow an option for overriding this. + +While this might be too cumbersome to implement in event-based Pod +parsers, it is straightforward for parsers that return parse trees. + +=item * + +Pod formatters, where feasible, are advised to avoid splitting short +verbatim paragraphs (under twelve lines, say) across pages. + +=item * + +Pod parsers must treat a line with only spaces and/or tabs on it as a +"blank line" such as separates paragraphs. (Some older parsers +recognized only two adjacent newlines as a "blank line" but would not +recognize a newline, a space, and a newline, as a blank line. This +is noncompliant behavior.) + +=item * + +Authors of Pod formatters/processors should make every effort to +avoid writing their own Pod parser. There are already several in +CPAN, with a wide range of interface styles -- and one of them, +Pod::Parser, comes with modern versions of Perl. + +=item * + +Characters in Pod documents may be conveyed either as literals, or by +number in EE<lt>n> codes, or by an equivalent mnemonic, as in +EE<lt>eacute> which is exactly equivalent to EE<lt>233>. + +Characters in the range 32-126 refer to those well known US-ASCII +characters (also defined there by Unicode, with the same meaning), +which all Pod formatters must render faithfully. Characters +in the ranges 0-31 and 127-159 should not be used (neither as +literals, nor as EE<lt>number> codes), except for the +literal byte-sequences for newline (13, 13 10, or 10), and tab (9). + +Characters in the range 160-255 refer to Latin-1 characters (also +defined there by Unicode, with the same meaning). Characters above +255 should be understood to refer to Unicode characters. + +=item * + +Be warned +that some formatters cannot reliably render characters outside 32-126; +and many are able to handle 32-126 and 160-255, but nothing above +255. + +=item * + +Besides the well-known "EE<lt>lt>" and "EE<lt>gt>" codes for +less-than and greater-than, Pod parsers must understand "EE<lt>sol>" +for "/" (solidus, slash), and "EE<lt>verbar>" for "|" (vertical bar, +pipe). Pod parsers should also understand "EE<lt>lchevron>" and +"EE<lt>rchevron>" as legacy codes for characters 171 and 187, i.e., +"left-pointing double angle quotation mark" = "left pointing +guillemet" and "right-pointing double angle quotation mark" = "right +pointing guillemet". (These look like little "<<" and ">>", and they +are now preferably expressed with the HTML/XHTML codes "EE<lt>laquo>" +and "EE<lt>raquo>".) + +=item * + +Pod parsers should understand all "EE<lt>html>" codes as defined +in the entity declarations in the most recent XHTML specification at +C<www.W3.org>. Pod parsers must understand at least the entities +that define characters in the range 160-255 (Latin-1). Pod parsers, +when faced with some unknown "EE<lt>I<identifier>>" code, +shouldn't simply replace it with nullstring (by default, at least), +but may pass it through as a string consisting of the literal characters +E, less-than, I<identifier>, greater-than. Or Pod parsers may offer the +alternative option of processing such unknown +"EE<lt>I<identifier>>" codes by firing an event especially +for such codes, or by adding a special node-type to the in-memory +document tree. Such "EE<lt>I<identifier>>" may have special meaning +to some processors, or some processors may choose to add them to +a special error report. + +=item * + +Pod parsers must also support the XHTML codes "EE<lt>quot>" for +character 34 (doublequote, "), "EE<lt>amp>" for character 38 +(ampersand, &), and "EE<lt>apos>" for character 39 (apostrophe, '). + +=item * + +Note that in all cases of "EE<lt>whatever>", I<whatever> (whether +an htmlname, or a number in any base) must consist only of +alphanumeric characters -- that is, I<whatever> must watch +C<m/\A\w+\z/>. So "EE<lt> 0 1 2 3 >" is invalid, because +it contains spaces, which aren't alphanumeric characters. This +presumably does not I<need> special treatment by a Pod processor; +" 0 1 2 3 " doesn't look like a number in any base, so it would +presumably be looked up in the table of HTML-like names. Since +there isn't (and cannot be) an HTML-like entity called " 0 1 2 3 ", +this will be treated as an error. However, Pod processors may +treat "EE<lt> 0 1 2 3 >" or "EE<lt>e-acute>" as I<syntactically> +invalid, potentially earning a different error message than the +error message (or warning, or event) generated by a merely unknown +(but theoretically valid) htmlname, as in "EE<lt>qacute>" +[sic]. However, Pod parsers are not required to make this +distinction. + +=item * + +Note that EE<lt>number> I<must not> be interpreted as simply +"codepoint I<number> in the current/native character set". It always +means only "the character represented by codepoint I<number> in +Unicode." (This is identical to the semantics of &#I<number>; in XML.) + +This will likely require many formatters to have tables mapping from +treatable Unicode codepoints (such as the "\xE9" for the e-acute +character) to the escape sequences or codes necessary for conveying +such sequences in the target output format. A converter to *roff +would, for example know that "\xE9" (whether conveyed literally, or via +a EE<lt>...> sequence) is to be conveyed as "e\\*'". +Similarly, a program rendering Pod in a Mac OS application window, would +presumably need to know that "\xE9" maps to codepoint 142 in MacRoman +encoding that (at time of writing) is native for Mac OS. Such +Unicode2whatever mappings are presumably already widely available for +common output formats. (Such mappings may be incomplete! Implementers +are not expected to bend over backwards in an attempt to render +Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any +of the other weird things that Unicode can encode.) And +if a Pod document uses a character not found in such a mapping, the +formatter should consider it an unrenderable character. + +=item * + +If, surprisingly, the implementor of a Pod formatter can't find a +satisfactory pre-existing table mapping from Unicode characters to +escapes in the target format (e.g., a decent table of Unicode +characters to *roff escapes), it will be necessary to build such a +table. If you are in this circumstance, you should begin with the +characters in the range 0x00A0 - 0x00FF, which is mostly the heavily +used accented characters. Then proceed (as patience permits and +fastidiousness compels) through the characters that the (X)HTML +standards groups judged important enough to merit mnemonics +for. These are declared in the (X)HTML specifications at the +www.W3.org site. At time of writing (September 2001), the most recent +entity declaration files are: + + http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent + http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent + http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent + +Then you can progress through any remaining notable Unicode characters +in the range 0x2000-0x204D (consult the character tables at +www.unicode.org), and whatever else strikes your fancy. For example, +in F<xhtml-symbol.ent>, there is the entry: + + <!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech --> + +While the mapping "infin" to the character "\x{221E}" will (hopefully) +have been already handled by the Pod parser, the presence of the +character in this file means that it's reasonably important enough to +include in a formatter's table that maps from notable Unicode characters +to the codes necessary for rendering them. So for a Unicode-to-*roff +mapping, for example, this would merit the entry: + + "\x{221E}" => '\(in', + +It is eagerly hoped that in the future, increasing numbers of formats +(and formatters) will support Unicode characters directly (as (X)HTML +does with C<∞>, C<∞>, or C<∞>), reducing the need +for idiosyncratic mappings of Unicode-to-I<my_escapes>. + +=item * + +It is up to individual Pod formatter to display good judgement when +confronted with an unrenderable character (which is distinct from an +unknown EE<lt>thing> sequence that the parser couldn't resolve to +anything, renderable or not). It is good practice to map Latin letters +with diacritics (like "EE<lt>eacute>"/"EE<lt>233>") to the corresponding +unaccented US-ASCII letters (like a simple character 101, "e"), but +clearly this is often not feasible, and an unrenderable character may +be represented as "?", or the like. In attempting a sane fallback +(as from EE<lt>233> to "e"), Pod formatters may use the +%Latin1Code_to_fallback table in L<Pod::Escapes|Pod::Escapes>, or +L<Text::Unidecode|Text::Unidecode>, if available. + +For example, this Pod text: + + magic is enabled if you set C<$Currency> to 'E<euro>'. + +may be rendered as: +"magic is enabled if you set C<$Currency> to 'I<?>'" or as +"magic is enabled if you set C<$Currency> to 'B<[euro]>'", or as +"magic is enabled if you set C<$Currency> to '[x20AC]', etc. + +A Pod formatter may also note, in a comment or warning, a list of what +unrenderable characters were encountered. + +=item * + +EE<lt>...> may freely appear in any formatting code (other than +in another EE<lt>...> or in an ZE<lt>>). That is, "XE<lt>The +EE<lt>euro>1,000,000 Solution>" is valid, as is "LE<lt>The +EE<lt>euro>1,000,000 Solution|Million::Euros>". + +=item * + +Some Pod formatters output to formats that implement non-breaking +spaces as an individual character (which I'll call "NBSP"), and +others output to formats that implement non-breaking spaces just as +spaces wrapped in a "don't break this across lines" code. Note that +at the level of Pod, both sorts of codes can occur: Pod can contain a +NBSP character (whether as a literal, or as a "EE<lt>160>" or +"EE<lt>nbsp>" code); and Pod can contain "SE<lt>foo +IE<lt>barE<gt> baz>" codes, where "mere spaces" (character 32) in +such codes are taken to represent non-breaking spaces. Pod +parsers should consider supporting the optional parsing of "SE<lt>foo +IE<lt>barE<gt> baz>" as if it were +"fooI<NBSP>IE<lt>barE<gt>I<NBSP>baz", and, going the other way, the +optional parsing of groups of words joined by NBSP's as if each group +were in a SE<lt>...> code, so that formatters may use the +representation that maps best to what the output format demands. + +=item * + +Some processors may find that the C<SE<lt>...E<gt>> code is easiest to +implement by replacing each space in the parse tree under the content +of the S, with an NBSP. But note: the replacement should apply I<not> to +spaces in I<all> text, but I<only> to spaces in I<printable> text. (This +distinction may or may not be evident in the particular tree/event +model implemented by the Pod parser.) For example, consider this +unusual case: + + S<L</Autoloaded Functions>> + +This means that the space in the middle of the visible link text must +not be broken across lines. In other words, it's the same as this: + + L<"AutoloadedE<160>Functions"/Autoloaded Functions> + +However, a misapplied space-to-NBSP replacement could (wrongly) +produce something equivalent to this: + + L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions> + +...which is almost definitely not going to work as a hyperlink (assuming +this formatter outputs a format supporting hypertext). + +Formatters may choose to just not support the S format code, +especially in cases where the output format simply has no NBSP +character/code and no code for "don't break this stuff across lines". + +=item * + +Besides the NBSP character discussed above, implementors are reminded +of the existence of the other "special" character in Latin-1, the +"soft hyphen" character, also known as "discretionary hyphen", +i.e. C<EE<lt>173E<gt>> = C<EE<lt>0xADE<gt>> = +C<EE<lt>shyE<gt>>). This character expresses an optional hyphenation +point. That is, it normally renders as nothing, but may render as a +"-" if a formatter breaks the word at that point. Pod formatters +should, as appropriate, do one of the following: 1) render this with +a code with the same meaning (e.g., "\-" in RTF), 2) pass it through +in the expectation that the formatter understands this character as +such, or 3) delete it. + +For example: + + sigE<shy>action + manuE<shy>script + JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi + +These signal to a formatter that if it is to hyphenate "sigaction" +or "manuscript", then it should be done as +"sig-I<[linebreak]>action" or "manu-I<[linebreak]>script" +(and if it doesn't hyphenate it, then the C<EE<lt>shyE<gt>> doesn't +show up at all). And if it is +to hyphenate "Jarkko" and/or "Hietaniemi", it can do +so only at the points where there is a C<EE<lt>shyE<gt>> code. + +In practice, it is anticipated that this character will not be used +often, but formatters should either support it, or delete it. + +=item * + +If you think that you want to add a new command to Pod (like, say, a +"=biblio" command), consider whether you could get the same +effect with a for or begin/end sequence: "=for biblio ..." or "=begin +biblio" ... "=end biblio". Pod processors that don't understand +"=for biblio", etc, will simply ignore it, whereas they may complain +loudly if they see "=biblio". + +=item * + +Throughout this document, "Pod" has been the preferred spelling for +the name of the documentation format. One may also use "POD" or +"pod". For the documentation that is (typically) in the Pod +format, you may use "pod", or "Pod", or "POD". Understanding these +distinctions is useful; but obsessing over how to spell them, usually +is not. + +=back + + + + + +=head1 About LE<lt>...E<gt> Codes + +As you can tell from a glance at L<perlpod|perlpod>, the LE<lt>...> +code is the most complex of the Pod formatting codes. The points below +will hopefully clarify what it means and how processors should deal +with it. + +=over + +=item * + +In parsing an LE<lt>...> code, Pod parsers must distinguish at least +four attributes: + +=over + +=item First: + +The link-text. If there is none, this must be undef. (E.g., in +"LE<lt>Perl Functions|perlfunc>", the link-text is "Perl Functions". +In "LE<lt>Time::HiRes>" and even "LE<lt>|Time::HiRes>", there is no +link text. Note that link text may contain formatting.) + +=item Second: + +The possibly inferred link-text -- i.e., if there was no real link +text, then this is the text that we'll infer in its place. (E.g., for +"LE<lt>Getopt::Std>", the inferred link text is "Getopt::Std".) + +=item Third: + +The name or URL, or undef if none. (E.g., in "LE<lt>Perl +Functions|perlfunc>", the name -- also sometimes called the page -- +is "perlfunc". In "LE<lt>/CAVEATS>", the name is undef.) + +=item Fourth: + +The section (AKA "item" in older perlpods), or undef if none. E.g., +in "LE<lt>Getopt::Std/DESCRIPTIONE<gt>", "DESCRIPTION" is the section. (Note +that this is not the same as a manpage section like the "5" in "man 5 +crontab". "Section Foo" in the Pod sense means the part of the text +that's introduced by the heading or item whose text is "Foo".) + +=back + +Pod parsers may also note additional attributes including: + +=over + +=item Fifth: + +A flag for whether item 3 (if present) is a URL (like +"http://lists.perl.org" is), in which case there should be no section +attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or +possibly a man page name (like "crontab(5)" is). + +=item Sixth: + +The raw original LE<lt>...> content, before text is split on +"|", "/", etc, and before EE<lt>...> codes are expanded. + +=back + +(The above were numbered only for concise reference below. It is not +a requirement that these be passed as an actual list or array.) + +For example: + + L<Foo::Bar> + => undef, # link text + "Foo::Bar", # possibly inferred link text + "Foo::Bar", # name + undef, # section + 'pod', # what sort of link + "Foo::Bar" # original content + + L<Perlport's section on NL's|perlport/Newlines> + => "Perlport's section on NL's", # link text + "Perlport's section on NL's", # possibly inferred link text + "perlport", # name + "Newlines", # section + 'pod', # what sort of link + "Perlport's section on NL's|perlport/Newlines" # orig. content + + L<perlport/Newlines> + => undef, # link text + '"Newlines" in perlport', # possibly inferred link text + "perlport", # name + "Newlines", # section + 'pod', # what sort of link + "perlport/Newlines" # original content + + L<crontab(5)/"DESCRIPTION"> + => undef, # link text + '"DESCRIPTION" in crontab(5)', # possibly inferred link text + "crontab(5)", # name + "DESCRIPTION", # section + 'man', # what sort of link + 'crontab(5)/"DESCRIPTION"' # original content + + L</Object Attributes> + => undef, # link text + '"Object Attributes"', # possibly inferred link text + undef, # name + "Object Attributes", # section + 'pod', # what sort of link + "/Object Attributes" # original content + + L<http://www.perl.org/> + => undef, # link text + "http://www.perl.org/", # possibly inferred link text + "http://www.perl.org/", # name + undef, # section + 'url', # what sort of link + "http://www.perl.org/" # original content + +Note that you can distinguish URL-links from anything else by the +fact that they match C<m/\A\w+:[^:\s]\S*\z/>. So +C<LE<lt>http://www.perl.comE<gt>> is a URL, but +C<LE<lt>HTTP::ResponseE<gt>> isn't. + +=item * + +In case of LE<lt>...> codes with no "text|" part in them, +older formatters have exhibited great variation in actually displaying +the link or cross reference. For example, LE<lt>crontab(5)> would render +as "the C<crontab(5)> manpage", or "in the C<crontab(5)> manpage" +or just "C<crontab(5)>". + +Pod processors must now treat "text|"-less links as follows: + + L<name> => L<name|name> + L</section> => L<"section"|/section> + L<name/section> => L<"section" in name|name/section> + +=item * + +Note that section names might contain markup. I.e., if a section +starts with: + + =head2 About the C<-M> Operator + +or with: + + =item About the C<-M> Operator + +then a link to it would look like this: + + L<somedoc/About the C<-M> Operator> + +Formatters may choose to ignore the markup for purposes of resolving +the link and use only the renderable characters in the section name, +as in: + + <h1><a name="About_the_-M_Operator">About the <code>-M</code> + Operator</h1> + + ... + + <a href="somedoc#About_the_-M_Operator">About the <code>-M</code> + Operator" in somedoc</a> + +=item * + +Previous versions of perlpod distinguished C<LE<lt>name/"section"E<gt>> +links from C<LE<lt>name/itemE<gt>> links (and their targets). These +have been merged syntactically and semantically in the current +specification, and I<section> can refer either to a "=headI<n> Heading +Content" command or to a "=item Item Content" command. This +specification does not specify what behavior should be in the case +of a given document having several things all seeming to produce the +same I<section> identifier (e.g., in HTML, several things all producing +the same I<anchorname> in <a name="I<anchorname>">...</a> +elements). Where Pod processors can control this behavior, they should +use the first such anchor. That is, C<LE<lt>Foo/BarE<gt>> refers to the +I<first> "Bar" section in Foo. + +But for some processors/formats this cannot be easily controlled; as +with the HTML example, the behavior of multiple ambiguous +<a name="I<anchorname>">...</a> is most easily just left up to +browsers to decide. + +=item * + +Authors wanting to link to a particular (absolute) URL, must do so +only with "LE<lt>scheme:...>" codes (like +LE<lt>http://www.perl.org>), and must not attempt "LE<lt>Some Site +Name|scheme:...>" codes. This restriction avoids many problems +in parsing and rendering LE<lt>...> codes. + +=item * + +In a C<LE<lt>text|...E<gt>> code, text may contain formatting codes +for formatting or for EE<lt>...> escapes, as in: + + L<B<ummE<234>stuff>|...> + +For C<LE<lt>...E<gt>> codes without a "name|" part, only +C<EE<lt>...E<gt>> and C<ZE<lt>E<gt>> codes may occur -- no +other formatting codes. That is, authors should not use +"C<LE<lt>BE<lt>Foo::BarE<gt>E<gt>>". + +Note, however, that formatting codes and ZE<lt>>'s can occur in any +and all parts of an LE<lt>...> (i.e., in I<name>, I<section>, I<text>, +and I<url>). + +Authors must not nest LE<lt>...> codes. For example, "LE<lt>The +LE<lt>Foo::Bar> man page>" should be treated as an error. + +=item * + +Note that Pod authors may use formatting codes inside the "text" +part of "LE<lt>text|name>" (and so on for LE<lt>text|/"sec">). + +In other words, this is valid: + + Go read L<the docs on C<$.>|perlvar-copy/"$."> + +Some output formats that do allow rendering "LE<lt>...>" codes as +hypertext, might not allow the link-text to be formatted; in +that case, formatters will have to just ignore that formatting. + +=item * + +At time of writing, C<LE<lt>nameE<gt>> values are of two types: +either the name of a Pod page like C<LE<lt>Foo::BarE<gt>> (which +might be a real Perl module or program in an @INC / PATH +directory, or a .pod file in those places); or the name of a UNIX +man page, like C<LE<lt>crontab(5)E<gt>>. In theory, C<LE<lt>chmodE<gt>> +in ambiguous between a Pod page called "chmod", or the Unix man page +"chmod" (in whatever man-section). However, the presence of a string +in parens, as in "crontab(5)", is sufficient to signal that what +is being discussed is not a Pod page, and so is presumably a +UNIX man page. The distinction is of no importance to many +Pod processors, but some processors that render to hypertext formats +may need to distinguish them in order to know how to render a +given C<LE<lt>fooE<gt>> code. + +=item * + +Previous versions of perlpod allowed for a C<LE<lt>sectionE<gt>> syntax +(as in C<LE<lt>Object AttributesE<gt>>), which was not easily distinguishable +from C<LE<lt>nameE<gt>> syntax. This syntax is no longer in the +specification, and has been replaced by the C<LE<lt>"section"E<gt>> syntax +(where the quotes were formerly optional). Pod parsers should tolerate +the C<LE<lt>sectionE<gt>> syntax, for a while at least. The suggested +heuristic for distinguishing C<LE<lt>sectionE<gt>> from C<LE<lt>nameE<gt>> +is that if it contains any whitespace, it's a I<section>. Pod processors +may warn about this being deprecated syntax. + +=back + +=head1 About =over...=back Regions + +"=over"..."=back" regions are used for various kinds of list-like +structures. (I use the term "region" here simply as a collective +term for everything from the "=over" to the matching "=back".) + +=over + +=item * + +The non-zero numeric I<indentlevel> in "=over I<indentlevel>" ... +"=back" is used for giving the formatter a clue as to how many +"spaces" (ems, or roughly equivalent units) it should tab over, +although many formatters will have to convert this to an absolute +measurement that may not exactly match with the size of spaces (or M's) +in the document's base font. Other formatters may have to completely +ignore the number. The lack of any explicit I<indentlevel> parameter is +equivalent to an I<indentlevel> value of 4. Pod processors may +complain if I<indentlevel> is present but is not a positive number +matching C<m/\A(\d*\.)?\d+\z/>. + +=item * + +Authors of Pod formatters are reminded that "=over" ... "=back" may +map to several different constructs in your output format. For +example, in converting Pod to (X)HTML, it can map to any of +<ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or +<blockquote>...</blockquote>. Similarly, "=item" can map to <li> or +<dt>. + +=item * + +Each "=over" ... "=back" region should be one of the following: + +=over + +=item * + +An "=over" ... "=back" region containing only "=item *" commands, +each followed by some number of ordinary/verbatim paragraphs, other +nested "=over" ... "=back" regions, "=for..." paragraphs, and +"=begin"..."=end" regions. + +(Pod processors must tolerate a bare "=item" as if it were "=item +*".) Whether "*" is rendered as a literal asterisk, an "o", or as +some kind of real bullet character, is left up to the Pod formatter, +and may depend on the level of nesting. + +=item * + +An "=over" ... "=back" region containing only +C<m/\A=item\s+\d+\.?\s*\z/> paragraphs, each one (or each group of them) +followed by some number of ordinary/verbatim paragraphs, other nested +"=over" ... "=back" regions, "=for..." paragraphs, and/or +"=begin"..."=end" codes. Note that the numbers must start at 1 +in each section, and must proceed in order and without skipping +numbers. + +(Pod processors must tolerate lines like "=item 1" as if they were +"=item 1.", with the period.) + +=item * + +An "=over" ... "=back" region containing only "=item [text]" +commands, each one (or each group of them) followed by some number of +ordinary/verbatim paragraphs, other nested "=over" ... "=back" +regions, or "=for..." paragraphs, and "=begin"..."=end" regions. + +The "=item [text]" paragraph should not match +C<m/\A=item\s+\d+\.?\s*\z/> or C<m/\A=item\s+\*\s*\z/>, nor should it +match just C<m/\A=item\s*\z/>. + +=item * + +An "=over" ... "=back" region containing no "=item" paragraphs at +all, and containing only some number of +ordinary/verbatim paragraphs, and possibly also some nested "=over" +... "=back" regions, "=for..." paragraphs, and "=begin"..."=end" +regions. Such an itemless "=over" ... "=back" region in Pod is +equivalent in meaning to a "<blockquote>...</blockquote>" element in +HTML. + +=back + +Note that with all the above cases, you can determine which type of +"=over" ... "=back" you have, by examining the first (non-"=cut", +non-"=pod") Pod paragraph after the "=over" command. + +=item * + +Pod formatters I<must> tolerate arbitrarily large amounts of text +in the "=item I<text...>" paragraph. In practice, most such +paragraphs are short, as in: + + =item For cutting off our trade with all parts of the world + +But they may be arbitrarily long: + + =item For transporting us beyond seas to be tried for pretended + offenses + + =item He is at this time transporting large armies of foreign + mercenaries to complete the works of death, desolation and + tyranny, already begun with circumstances of cruelty and perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the head of a civilized nation. + +=item * + +Pod processors should tolerate "=item *" / "=item I<number>" commands +with no accompanying paragraph. The middle item is an example: + + =over + + =item 1 + + Pick up dry cleaning. + + =item 2 + + =item 3 + + Stop by the store. Get Abba Zabas, Stoli, and cheap lawn chairs. + + =back + +=item * + +No "=over" ... "=back" region can contain headings. Processors may +treat such a heading as an error. + +=item * + +Note that an "=over" ... "=back" region should have some +content. That is, authors should not have an empty region like this: + + =over + + =back + +Pod processors seeing such a contentless "=over" ... "=back" region, +may ignore it, or may report it as an error. + +=item * + +Processors must tolerate an "=over" list that goes off the end of the +document (i.e., which has no matching "=back"), but they may warn +about such a list. + +=item * + +Authors of Pod formatters should note that this construct: + + =item Neque + + =item Porro + + =item Quisquam Est + + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + =item Ut Enim + +is semantically ambiguous, in a way that makes formatting decisions +a bit difficult. On the one hand, it could be mention of an item +"Neque", mention of another item "Porro", and mention of another +item "Quisquam Est", with just the last one requiring the explanatory +paragraph "Qui dolorem ipsum quia dolor..."; and then an item +"Ut Enim". In that case, you'd want to format it like so: + + Neque + + Porro + + Quisquam Est + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +But it could equally well be a discussion of three (related or equivalent) +items, "Neque", "Porro", and "Quisquam Est", followed by a paragraph +explaining them all, and then a new item "Ut Enim". In that case, you'd +probably want to format it like so: + + Neque + Porro + Quisquam Est + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +But (for the foreseeable future), Pod does not provide any way for Pod +authors to distinguish which grouping is meant by the above +"=item"-cluster structure. So formatters should format it like so: + + Neque + + Porro + + Quisquam Est + + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +That is, there should be (at least roughly) equal spacing between +items as between paragraphs (although that spacing may well be less +than the full height of a line of text). This leaves it to the reader +to use (con)textual cues to figure out whether the "Qui dolorem +ipsum..." paragraph applies to the "Quisquam Est" item or to all three +items "Neque", "Porro", and "Quisquam Est". While not an ideal +situation, this is preferable to providing formatting cues that may +be actually contrary to the author's intent. + +=back + + + +=head1 About Data Paragraphs and "=begin/=end" Regions + +Data paragraphs are typically used for inlining non-Pod data that is +to be used (typically passed through) when rendering the document to +a specific format: + + =begin rtf + + \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par} + + =end rtf + +The exact same effect could, incidentally, be achieved with a single +"=for" paragraph: + + =for rtf \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par} + +(Although that is not formally a data paragraph, it has the same +meaning as one, and Pod parsers may parse it as one.) + +Another example of a data paragraph: + + =begin html + + I like <em>PIE</em>! + + <hr>Especially pecan pie! + + =end html + +If these were ordinary paragraphs, the Pod parser would try to +expand the "EE<lt>/em>" (in the first paragraph) as a formatting +code, just like "EE<lt>lt>" or "EE<lt>eacute>". But since this +is in a "=begin I<identifier>"..."=end I<identifier>" region I<and> +the identifier "html" doesn't begin have a ":" prefix, the contents +of this region are stored as data paragraphs, instead of being +processed as ordinary paragraphs (or if they began with a spaces +and/or tabs, as verbatim paragraphs). + +As a further example: At time of writing, no "biblio" identifier is +supported, but suppose some processor were written to recognize it as +a way of (say) denoting a bibliographic reference (necessarily +containing formatting codes in ordinary paragraphs). The fact that +"biblio" paragraphs were meant for ordinary processing would be +indicated by prefacing each "biblio" identifier with a colon: + + =begin :biblio + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =end :biblio + +This would signal to the parser that paragraphs in this begin...end +region are subject to normal handling as ordinary/verbatim paragraphs +(while still tagged as meant only for processors that understand the +"biblio" identifier). The same effect could be had with: + + =for :biblio + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + +The ":" on these identifiers means simply "process this stuff +normally, even though the result will be for some special target". +I suggest that parser APIs report "biblio" as the target identifier, +but also report that it had a ":" prefix. (And similarly, with the +above "html", report "html" as the target identifier, and note the +I<lack> of a ":" prefix.) + +Note that a "=begin I<identifier>"..."=end I<identifier>" region where +I<identifier> begins with a colon, I<can> contain commands. For example: + + =begin :biblio + + Wirth's classic is available in several editions, including: + + =for comment + hm, check abebooks.com for how much used copies cost. + + =over + + =item + + Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> + Teubner, Stuttgart. [Yes, it's in German.] + + =item + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =back + + =end :biblio + +Note, however, a "=begin I<identifier>"..."=end I<identifier>" +region where I<identifier> does I<not> begin with a colon, should not +directly contain "=head1" ... "=head4" commands, nor "=over", nor "=back", +nor "=item". For example, this may be considered invalid: + + =begin somedata + + This is a data paragraph. + + =head1 Don't do this! + + This is a data paragraph too. + + =end somedata + +A Pod processor may signal that the above (specifically the "=head1" +paragraph) is an error. Note, however, that the following should +I<not> be treated as an error: + + =begin somedata + + This is a data paragraph. + + =cut + + # Yup, this isn't Pod anymore. + sub excl { (rand() > .5) ? "hoo!" : "hah!" } + + =pod + + This is a data paragraph too. + + =end somedata + +And this too is valid: + + =begin someformat + + This is a data paragraph. + + And this is a data paragraph. + + =begin someotherformat + + This is a data paragraph too. + + And this is a data paragraph too. + + =begin :yetanotherformat + + =head2 This is a command paragraph! + + This is an ordinary paragraph! + + And this is a verbatim paragraph! + + =end :yetanotherformat + + =end someotherformat + + Another data paragraph! + + =end someformat + +The contents of the above "=begin :yetanotherformat" ... +"=end :yetanotherformat" region I<aren't> data paragraphs, because +the immediately containing region's identifier (":yetanotherformat") +begins with a colon. In practice, most regions that contain +data paragraphs will contain I<only> data paragraphs; however, +the above nesting is syntactically valid as Pod, even if it is +rare. However, the handlers for some formats, like "html", +will accept only data paragraphs, not nested regions; and they may +complain if they see (targeted for them) nested regions, or commands, +other than "=end", "=pod", and "=cut". + +Also consider this valid structure: + + =begin :biblio + + Wirth's classic is available in several editions, including: + + =over + + =item + + Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> + Teubner, Stuttgart. [Yes, it's in German.] + + =item + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =back + + Buy buy buy! + + =begin html + + <img src='wirth_spokesmodeling_book.png'> + + <hr> + + =end html + + Now now now! + + =end :biblio + +There, the "=begin html"..."=end html" region is nested inside +the larger "=begin :biblio"..."=end :biblio" region. Note that the +content of the "=begin html"..."=end html" region is data +paragraph(s), because the immediately containing region's identifier +("html") I<doesn't> begin with a colon. + +Pod parsers, when processing a series of data paragraphs one +after another (within a single region), should consider them to +be one large data paragraph that happens to contain blank lines. So +the content of the above "=begin html"..."=end html" I<may> be stored +as two data paragraphs (one consisting of +"<img src='wirth_spokesmodeling_book.png'>\n" +and another consisting of "<hr>\n"), but I<should> be stored as +a single data paragraph (consisting of +"<img src='wirth_spokesmodeling_book.png'>\n\n<hr>\n"). + +Pod processors should tolerate empty +"=begin I<something>"..."=end I<something>" regions, +empty "=begin :I<something>"..."=end :I<something>" regions, and +contentless "=for I<something>" and "=for :I<something>" +paragraphs. I.e., these should be tolerated: + + =for html + + =begin html + + =end html + + =begin :biblio + + =end :biblio + +Incidentally, note that there's no easy way to express a data +paragraph starting with something that looks like a command. Consider: + + =begin stuff + + =shazbot + + =end stuff + +There, "=shazbot" will be parsed as a Pod command "shazbot", not as a data +paragraph "=shazbot\n". However, you can express a data paragraph consisting +of "=shazbot\n" using this code: + + =for stuff =shazbot + +The situation where this is necessary, is presumably quite rare. + +Note that =end commands must match the currently open =begin command. That +is, they must properly nest. For example, this is valid: + + =begin outer + + X + + =begin inner + + Y + + =end inner + + Z + + =end outer + +while this is invalid: + + =begin outer + + X + + =begin inner + + Y + + =end outer + + Z + + =end inner + +This latter is improper because when the "=end outer" command is seen, the +currently open region has the formatname "inner", not "outer". (It just +happens that "outer" is the format name of a higher-up region.) This is +an error. Processors must by default report this as an error, and may halt +processing the document containing that error. A corollary of this is that +regions cannot "overlap" -- i.e., the latter block above does not represent +a region called "outer" which contains X and Y, overlapping a region called +"inner" which contains Y and Z. But because it is invalid (as all +apparently overlapping regions would be), it doesn't represent that, or +anything at all. + +Similarly, this is invalid: + + =begin thing + + =end hting + +This is an error because the region is opened by "thing", and the "=end" +tries to close "hting" [sic]. + +This is also invalid: + + =begin thing + + =end + +This is invalid because every "=end" command must have a formatname +parameter. + +=head1 SEE ALSO + +L<perlpod>, L<perlsyn/"PODs: Embedded Documentation">, +L<podchecker> + +=head1 AUTHOR + +Sean M. Burke + +=cut + + diff --git a/ext/Pod-Html/testdir/perlvar-copy.pod b/ext/Pod-Html/testdir/perlvar-copy.pod new file mode 100644 index 0000000000..04d2a7474c --- /dev/null +++ b/ext/Pod-Html/testdir/perlvar-copy.pod @@ -0,0 +1,1737 @@ +=head1 NAME + +perlvarcopy - Perl predefined variables + +=head1 DESCRIPTION + +=head2 Predefined Names + +The following names have special meaning to Perl. Most +punctuation names have reasonable mnemonics, or analogs in the +shells. Nevertheless, if you wish to use long variable names, +you need only say + + use English; + +at the top of your program. This aliases all the short names to the long +names in the current package. Some even have medium names, generally +borrowed from B<awk>. In general, it's best to use the + + use English '-no_match_vars'; + +invocation if you don't need $PREMATCH, $MATCH, or $POSTMATCH, as it avoids +a certain performance hit with the use of regular expressions. See +L<English>. + +Variables that depend on the currently selected filehandle may be set by +calling an appropriate object method on the IO::Handle object, although +this is less efficient than using the regular built-in variables. (Summary +lines below for this contain the word HANDLE.) First you must say + + use IO::Handle; + +after which you may use either + + method HANDLE EXPR + +or more safely, + + HANDLE->method(EXPR) + +Each method returns the old value of the IO::Handle attribute. +The methods each take an optional EXPR, which, if supplied, specifies the +new value for the IO::Handle attribute in question. If not supplied, +most methods do nothing to the current value--except for +autoflush(), which will assume a 1 for you, just to be different. + +Because loading in the IO::Handle class is an expensive operation, you should +learn how to use the regular built-in variables. + +A few of these variables are considered "read-only". This means that if +you try to assign to this variable, either directly or indirectly through +a reference, you'll raise a run-time exception. + +You should be very careful when modifying the default values of most +special variables described in this document. In most cases you want +to localize these variables before changing them, since if you don't, +the change may affect other modules which rely on the default values +of the special variables that you have changed. This is one of the +correct ways to read the whole file at once: + + open my $fh, "<", "foo" or die $!; + local $/; # enable localized slurp mode + my $content = <$fh>; + close $fh; + +But the following code is quite bad: + + open my $fh, "<", "foo" or die $!; + undef $/; # enable slurp mode + my $content = <$fh>; + close $fh; + +since some other module, may want to read data from some file in the +default "line mode", so if the code we have just presented has been +executed, the global value of C<$/> is now changed for any other code +running inside the same Perl interpreter. + +Usually when a variable is localized you want to make sure that this +change affects the shortest scope possible. So unless you are already +inside some short C<{}> block, you should create one yourself. For +example: + + my $content = ''; + open my $fh, "<", "foo" or die $!; + { + local $/; + $content = <$fh>; + } + close $fh; + +Here is an example of how your own code can go broken: + + for (1..5){ + nasty_break(); + print "$_ "; + } + sub nasty_break { + $_ = 5; + # do something with $_ + } + +You probably expect this code to print: + + 1 2 3 4 5 + +but instead you get: + + 5 5 5 5 5 + +Why? Because nasty_break() modifies C<$_> without localizing it +first. The fix is to add local(): + + local $_ = 5; + +It's easy to notice the problem in such a short example, but in more +complicated code you are looking for trouble if you don't localize +changes to the special variables. + +The following list is ordered by scalar variables first, then the +arrays, then the hashes. + +=over 8 + +=item $ARG + +=item $_ +X<$_> X<$ARG> + +The default input and pattern-searching space. The following pairs are +equivalent: + + while (<>) {...} # equivalent only in while! + while (defined($_ = <>)) {...} + + /^Subject:/ + $_ =~ /^Subject:/ + + tr/a-z/A-Z/ + $_ =~ tr/a-z/A-Z/ + + chomp + chomp($_) + +Here are the places where Perl will assume $_ even if you +don't use it: + +=over 3 + +=item * + +The following functions: + +abs, alarm, chomp, chop, chr, chroot, cos, defined, eval, exp, glob, +hex, int, lc, lcfirst, length, log, lstat, mkdir, oct, ord, pos, print, +quotemeta, readlink, readpipe, ref, require, reverse (in scalar context only), +rmdir, sin, split (on its second argument), sqrt, stat, study, uc, ucfirst, +unlink, unpack. + +=item * + +All file tests (C<-f>, C<-d>) except for C<-t>, which defaults to STDIN. +See L<perlfunc/-X> + + +=item * + +The pattern matching operations C<m//>, C<s///> and C<tr///> (aka C<y///>) +when used without an C<=~> operator. + +=item * + +The default iterator variable in a C<foreach> loop if no other +variable is supplied. + +=item * + +The implicit iterator variable in the grep() and map() functions. + +=item * + +The implicit variable of given(). + +=item * + +The default place to put an input record when a C<< <FH> >> +operation's result is tested by itself as the sole criterion of a C<while> +test. Outside a C<while> test, this will not happen. + +=back + +As C<$_> is a global variable, this may lead in some cases to unwanted +side-effects. As of perl 5.9.1, you can now use a lexical version of +C<$_> by declaring it in a file or in a block with C<my>. Moreover, +declaring C<our $_> restores the global C<$_> in the current scope. + +(Mnemonic: underline is understood in certain operations.) + +=back + +=over 8 + +=item $a + +=item $b +X<$a> X<$b> + +Special package variables when using sort(), see L<perlfunc/sort>. +Because of this specialness $a and $b don't need to be declared +(using use vars, or our()) even when using the C<strict 'vars'> pragma. +Don't lexicalize them with C<my $a> or C<my $b> if you want to be +able to use them in the sort() comparison block or function. + +=back + +=over 8 + +=item $<I<digits>> +X<$1> X<$2> X<$3> + +Contains the subpattern from the corresponding set of capturing +parentheses from the last pattern match, not counting patterns +matched in nested blocks that have been exited already. (Mnemonic: +like \digits.) These variables are all read-only and dynamically +scoped to the current BLOCK. + +=item $MATCH + +=item $& +X<$&> X<$MATCH> + +The string matched by the last successful pattern match (not counting +any matches hidden within a BLOCK or eval() enclosed by the current +BLOCK). (Mnemonic: like & in some editors.) This variable is read-only +and dynamically scoped to the current BLOCK. + +The use of this variable anywhere in a program imposes a considerable +performance penalty on all regular expression matches. See L</BUGS>. + +See L</@-> for a replacement. + +=item ${^MATCH} +X<${^MATCH}> + +This is similar to C<$&> (C<$MATCH>) except that it does not incur the +performance penalty associated with that variable, and is only guaranteed +to return a defined value when the pattern was compiled or executed with +the C</p> modifier. + +=item $PREMATCH + +=item $` +X<$`> X<$PREMATCH> + +The string preceding whatever was matched by the last successful +pattern match (not counting any matches hidden within a BLOCK or eval +enclosed by the current BLOCK). (Mnemonic: C<`> often precedes a quoted +string.) This variable is read-only. + +The use of this variable anywhere in a program imposes a considerable +performance penalty on all regular expression matches. See L</BUGS>. + +See L</@-> for a replacement. + +=item ${^PREMATCH} +X<${^PREMATCH}> + +This is similar to C<$`> ($PREMATCH) except that it does not incur the +performance penalty associated with that variable, and is only guaranteed +to return a defined value when the pattern was compiled or executed with +the C</p> modifier. + +=item $POSTMATCH + +=item $' +X<$'> X<$POSTMATCH> + +The string following whatever was matched by the last successful +pattern match (not counting any matches hidden within a BLOCK or eval() +enclosed by the current BLOCK). (Mnemonic: C<'> often follows a quoted +string.) Example: + + local $_ = 'abcdefghi'; + /def/; + print "$`:$&:$'\n"; # prints abc:def:ghi + +This variable is read-only and dynamically scoped to the current BLOCK. + +The use of this variable anywhere in a program imposes a considerable +performance penalty on all regular expression matches. See L</BUGS>. + +See L</@-> for a replacement. + +=item ${^POSTMATCH} +X<${^POSTMATCH}> + +This is similar to C<$'> (C<$POSTMATCH>) except that it does not incur the +performance penalty associated with that variable, and is only guaranteed +to return a defined value when the pattern was compiled or executed with +the C</p> modifier. + +=item $LAST_PAREN_MATCH + +=item $+ +X<$+> X<$LAST_PAREN_MATCH> + +The text matched by the last bracket of the last successful search pattern. +This is useful if you don't know which one of a set of alternative patterns +matched. For example: + + /Version: (.*)|Revision: (.*)/ && ($rev = $+); + +(Mnemonic: be positive and forward looking.) +This variable is read-only and dynamically scoped to the current BLOCK. + +=item $LAST_SUBMATCH_RESULT + +=item $^N +X<$^N> + +The text matched by the used group most-recently closed (i.e. the group +with the rightmost closing parenthesis) of the last successful search +pattern. (Mnemonic: the (possibly) Nested parenthesis that most +recently closed.) + +This is primarily used inside C<(?{...})> blocks for examining text +recently matched. For example, to effectively capture text to a variable +(in addition to C<$1>, C<$2>, etc.), replace C<(...)> with + + (?:(...)(?{ $var = $^N })) + +By setting and then using C<$var> in this way relieves you from having to +worry about exactly which numbered set of parentheses they are. + +This variable is dynamically scoped to the current BLOCK. + +=item @LAST_MATCH_END + +=item @+ +X<@+> X<@LAST_MATCH_END> + +This array holds the offsets of the ends of the last successful +submatches in the currently active dynamic scope. C<$+[0]> is +the offset into the string of the end of the entire match. This +is the same value as what the C<pos> function returns when called +on the variable that was matched against. The I<n>th element +of this array holds the offset of the I<n>th submatch, so +C<$+[1]> is the offset past where $1 ends, C<$+[2]> the offset +past where $2 ends, and so on. You can use C<$#+> to determine +how many subgroups were in the last successful match. See the +examples given for the C<@-> variable. + +=item %LAST_PAREN_MATCH + +=item %+ +X<%+> + +Similar to C<@+>, the C<%+> hash allows access to the named capture +buffers, should they exist, in the last successful match in the +currently active dynamic scope. + +For example, C<$+{foo}> is equivalent to C<$1> after the following match: + + 'foo' =~ /(?<foo>foo)/; + +The keys of the C<%+> hash list only the names of buffers that have +captured (and that are thus associated to defined values). + +The underlying behaviour of C<%+> is provided by the +L<Tie::Hash::NamedCapture> module. + +B<Note:> C<%-> and C<%+> are tied views into a common internal hash +associated with the last successful regular expression. Therefore mixing +iterative access to them via C<each> may have unpredictable results. +Likewise, if the last successful match changes, then the results may be +surprising. + +=item HANDLE->input_line_number(EXPR) + +=item $INPUT_LINE_NUMBER + +=item $NR + +=item $. +X<$.> X<$NR> X<$INPUT_LINE_NUMBER> X<line number> + +Current line number for the last filehandle accessed. + +Each filehandle in Perl counts the number of lines that have been read +from it. (Depending on the value of C<$/>, Perl's idea of what +constitutes a line may not match yours.) When a line is read from a +filehandle (via readline() or C<< <> >>), or when tell() or seek() is +called on it, C<$.> becomes an alias to the line counter for that +filehandle. + +You can adjust the counter by assigning to C<$.>, but this will not +actually move the seek pointer. I<Localizing C<$.> will not localize +the filehandle's line count>. Instead, it will localize perl's notion +of which filehandle C<$.> is currently aliased to. + +C<$.> is reset when the filehandle is closed, but B<not> when an open +filehandle is reopened without an intervening close(). For more +details, see L<perlop/"IE<sol>O Operators">. Because C<< <> >> never does +an explicit close, line numbers increase across ARGV files (but see +examples in L<perlfunc/eof>). + +You can also use C<< HANDLE->input_line_number(EXPR) >> to access the +line counter for a given filehandle without having to worry about +which handle you last accessed. + +(Mnemonic: many programs use "." to mean the current line number.) + +=item IO::Handle->input_record_separator(EXPR) + +=item $INPUT_RECORD_SEPARATOR + +=item $RS + +=item $/ +X<$/> X<$RS> X<$INPUT_RECORD_SEPARATOR> + +The input record separator, newline by default. This +influences Perl's idea of what a "line" is. Works like B<awk>'s RS +variable, including treating empty lines as a terminator if set to +the null string. (An empty line cannot contain any spaces +or tabs.) You may set it to a multi-character string to match a +multi-character terminator, or to C<undef> to read through the end +of file. Setting it to C<"\n\n"> means something slightly +different than setting to C<"">, if the file contains consecutive +empty lines. Setting to C<""> will treat two or more consecutive +empty lines as a single empty line. Setting to C<"\n\n"> will +blindly assume that the next input character belongs to the next +paragraph, even if it's a newline. (Mnemonic: / delimits +line boundaries when quoting poetry.) + + local $/; # enable "slurp" mode + local $_ = <FH>; # whole file now here + s/\n[ \t]+/ /g; + +Remember: the value of C<$/> is a string, not a regex. B<awk> has to be +better for something. :-) + +Setting C<$/> to a reference to an integer, scalar containing an integer, or +scalar that's convertible to an integer will attempt to read records +instead of lines, with the maximum record size being the referenced +integer. So this: + + local $/ = \32768; # or \"32768", or \$var_containing_32768 + open my $fh, "<", $myfile or die $!; + local $_ = <$fh>; + +will read a record of no more than 32768 bytes from FILE. If you're +not reading from a record-oriented file (or your OS doesn't have +record-oriented files), then you'll likely get a full chunk of data +with every read. If a record is larger than the record size you've +set, you'll get the record back in pieces. Trying to set the record +size to zero or less will cause reading in the (rest of the) whole file. + +On VMS, record reads are done with the equivalent of C<sysread>, +so it's best not to mix record and non-record reads on the same +file. (This is unlikely to be a problem, because any file you'd +want to read in record mode is probably unusable in line mode.) +Non-VMS systems do normal I/O, so it's safe to mix record and +non-record reads of a file. + +See also L<perlport/"Newlines">. Also see C<$.>. + +=item HANDLE->autoflush(EXPR) + +=item $OUTPUT_AUTOFLUSH + +=item $| +X<$|> X<autoflush> X<flush> X<$OUTPUT_AUTOFLUSH> + +If set to nonzero, forces a flush right away and after every write +or print on the currently selected output channel. Default is 0 +(regardless of whether the channel is really buffered by the +system or not; C<$|> tells you only whether you've asked Perl +explicitly to flush after each write). STDOUT will +typically be line buffered if output is to the terminal and block +buffered otherwise. Setting this variable is useful primarily when +you are outputting to a pipe or socket, such as when you are running +a Perl program under B<rsh> and want to see the output as it's +happening. This has no effect on input buffering. See L<perlfunc/getc> +for that. See L<perldoc/select> on how to select the output channel. +See also L<IO::Handle>. (Mnemonic: when you want your pipes to be piping hot.) + +=item IO::Handle->output_field_separator EXPR + +=item $OUTPUT_FIELD_SEPARATOR + +=item $OFS + +=item $, +X<$,> X<$OFS> X<$OUTPUT_FIELD_SEPARATOR> + +The output field separator for the print operator. If defined, this +value is printed between each of print's arguments. Default is C<undef>. +(Mnemonic: what is printed when there is a "," in your print statement.) + +=item IO::Handle->output_record_separator EXPR + +=item $OUTPUT_RECORD_SEPARATOR + +=item $ORS + +=item $\ +X<$\> X<$ORS> X<$OUTPUT_RECORD_SEPARATOR> + +The output record separator for the print operator. If defined, this +value is printed after the last of print's arguments. Default is C<undef>. +(Mnemonic: you set C<$\> instead of adding "\n" at the end of the print. +Also, it's just like C<$/>, but it's what you get "back" from Perl.) + +=item $LIST_SEPARATOR + +=item $" +X<$"> X<$LIST_SEPARATOR> + +This is like C<$,> except that it applies to array and slice values +interpolated into a double-quoted string (or similar interpreted +string). Default is a space. (Mnemonic: obvious, I think.) + +=item $SUBSCRIPT_SEPARATOR + +=item $SUBSEP + +=item $; +X<$;> X<$SUBSEP> X<SUBSCRIPT_SEPARATOR> + +The subscript separator for multidimensional array emulation. If you +refer to a hash element as + + $foo{$a,$b,$c} + +it really means + + $foo{join($;, $a, $b, $c)} + +But don't put + + @foo{$a,$b,$c} # a slice--note the @ + +which means + + ($foo{$a},$foo{$b},$foo{$c}) + +Default is "\034", the same as SUBSEP in B<awk>. If your +keys contain binary data there might not be any safe value for C<$;>. +(Mnemonic: comma (the syntactic subscript separator) is a +semi-semicolon. Yeah, I know, it's pretty lame, but C<$,> is already +taken for something more important.) + +Consider using "real" multidimensional arrays as described +in L<perllol>. + +=item HANDLE->format_page_number(EXPR) + +=item $FORMAT_PAGE_NUMBER + +=item $% +X<$%> X<$FORMAT_PAGE_NUMBER> + +The current page number of the currently selected output channel. +Used with formats. +(Mnemonic: % is page number in B<nroff>.) + +=item HANDLE->format_lines_per_page(EXPR) + +=item $FORMAT_LINES_PER_PAGE + +=item $= +X<$=> X<$FORMAT_LINES_PER_PAGE> + +The current page length (printable lines) of the currently selected +output channel. Default is 60. +Used with formats. +(Mnemonic: = has horizontal lines.) + +=item HANDLE->format_lines_left(EXPR) + +=item $FORMAT_LINES_LEFT + +=item $- +X<$-> X<$FORMAT_LINES_LEFT> + +The number of lines left on the page of the currently selected output +channel. +Used with formats. +(Mnemonic: lines_on_page - lines_printed.) + +=item @LAST_MATCH_START + +=item @- +X<@-> X<@LAST_MATCH_START> + +$-[0] is the offset of the start of the last successful match. +C<$-[>I<n>C<]> is the offset of the start of the substring matched by +I<n>-th subpattern, or undef if the subpattern did not match. + +Thus after a match against $_, $& coincides with C<substr $_, $-[0], +$+[0] - $-[0]>. Similarly, $I<n> coincides with C<substr $_, $-[n], +$+[n] - $-[n]> if C<$-[n]> is defined, and $+ coincides with +C<substr $_, $-[$#-], $+[$#-] - $-[$#-]>. One can use C<$#-> to find the last +matched subgroup in the last successful match. Contrast with +C<$#+>, the number of subgroups in the regular expression. Compare +with C<@+>. + +This array holds the offsets of the beginnings of the last +successful submatches in the currently active dynamic scope. +C<$-[0]> is the offset into the string of the beginning of the +entire match. The I<n>th element of this array holds the offset +of the I<n>th submatch, so C<$-[1]> is the offset where $1 +begins, C<$-[2]> the offset where $2 begins, and so on. + +After a match against some variable $var: + +=over 5 + +=item C<$`> is the same as C<substr($var, 0, $-[0])> + +=item C<$&> is the same as C<substr($var, $-[0], $+[0] - $-[0])> + +=item C<$'> is the same as C<substr($var, $+[0])> + +=item C<$1> is the same as C<substr($var, $-[1], $+[1] - $-[1])> + +=item C<$2> is the same as C<substr($var, $-[2], $+[2] - $-[2])> + +=item C<$3> is the same as C<substr($var, $-[3], $+[3] - $-[3])> + +=back + +=item %- +X<%-> + +Similar to C<%+>, this variable allows access to the named capture buffers +in the last successful match in the currently active dynamic scope. To +each capture buffer name found in the regular expression, it associates a +reference to an array containing the list of values captured by all +buffers with that name (should there be several of them), in the order +where they appear. + +Here's an example: + + if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { + foreach my $bufname (sort keys %-) { + my $ary = $-{$bufname}; + foreach my $idx (0..$#$ary) { + print "\$-{$bufname}[$idx] : ", + (defined($ary->[$idx]) ? "'$ary->[$idx]'" : "undef"), + "\n"; + } + } + } + +would print out: + + $-{A}[0] : '1' + $-{A}[1] : '3' + $-{B}[0] : '2' + $-{B}[1] : '4' + +The keys of the C<%-> hash correspond to all buffer names found in +the regular expression. + +The behaviour of C<%-> is implemented via the +L<Tie::Hash::NamedCapture> module. + +B<Note:> C<%-> and C<%+> are tied views into a common internal hash +associated with the last successful regular expression. Therefore mixing +iterative access to them via C<each> may have unpredictable results. +Likewise, if the last successful match changes, then the results may be +surprising. + +=item HANDLE->format_name(EXPR) + +=item $FORMAT_NAME + +=item $~ +X<$~> X<$FORMAT_NAME> + +The name of the current report format for the currently selected output +channel. Default is the name of the filehandle. (Mnemonic: brother to +C<$^>.) + +=item HANDLE->format_top_name(EXPR) + +=item $FORMAT_TOP_NAME + +=item $^ +X<$^> X<$FORMAT_TOP_NAME> + +The name of the current top-of-page format for the currently selected +output channel. Default is the name of the filehandle with _TOP +appended. (Mnemonic: points to top of page.) + +=item IO::Handle->format_line_break_characters EXPR + +=item $FORMAT_LINE_BREAK_CHARACTERS + +=item $: +X<$:> X<FORMAT_LINE_BREAK_CHARACTERS> + +The current set of characters after which a string may be broken to +fill continuation fields (starting with ^) in a format. Default is +S<" \n-">, to break on whitespace or hyphens. (Mnemonic: a "colon" in +poetry is a part of a line.) + +=item IO::Handle->format_formfeed EXPR + +=item $FORMAT_FORMFEED + +=item $^L +X<$^L> X<$FORMAT_FORMFEED> + +What formats output as a form feed. Default is \f. + +=item $ACCUMULATOR + +=item $^A +X<$^A> X<$ACCUMULATOR> + +The current value of the write() accumulator for format() lines. A format +contains formline() calls that put their result into C<$^A>. After +calling its format, write() prints out the contents of C<$^A> and empties. +So you never really see the contents of C<$^A> unless you call +formline() yourself and then look at it. See L<perlform> and +L<perlfunc/formline()>. + +=item $CHILD_ERROR + +=item $? +X<$?> X<$CHILD_ERROR> + +The status returned by the last pipe close, backtick (C<``>) command, +successful call to wait() or waitpid(), or from the system() +operator. This is just the 16-bit status word returned by the +traditional Unix wait() system call (or else is made up to look like it). Thus, the +exit value of the subprocess is really (C<<< $? >> 8 >>>), and +C<$? & 127> gives which signal, if any, the process died from, and +C<$? & 128> reports whether there was a core dump. (Mnemonic: +similar to B<sh> and B<ksh>.) + +Additionally, if the C<h_errno> variable is supported in C, its value +is returned via $? if any C<gethost*()> function fails. + +If you have installed a signal handler for C<SIGCHLD>, the +value of C<$?> will usually be wrong outside that handler. + +Inside an C<END> subroutine C<$?> contains the value that is going to be +given to C<exit()>. You can modify C<$?> in an C<END> subroutine to +change the exit status of your program. For example: + + END { + $? = 1 if $? == 255; # die would make it 255 + } + +Under VMS, the pragma C<use vmsish 'status'> makes C<$?> reflect the +actual VMS exit status, instead of the default emulation of POSIX +status; see L<perlvms/$?> for details. + +Also see L<Error Indicators>. + +=item ${^CHILD_ERROR_NATIVE} +X<$^CHILD_ERROR_NATIVE> + +The native status returned by the last pipe close, backtick (C<``>) +command, successful call to wait() or waitpid(), or from the system() +operator. On POSIX-like systems this value can be decoded with the +WIFEXITED, WEXITSTATUS, WIFSIGNALED, WTERMSIG, WIFSTOPPED, WSTOPSIG +and WIFCONTINUED functions provided by the L<POSIX> module. + +Under VMS this reflects the actual VMS exit status; i.e. it is the same +as $? when the pragma C<use vmsish 'status'> is in effect. + +=item ${^ENCODING} +X<$^ENCODING> + +The I<object reference> to the Encode object that is used to convert +the source code to Unicode. Thanks to this variable your perl script +does not have to be written in UTF-8. Default is I<undef>. The direct +manipulation of this variable is highly discouraged. + +=item $OS_ERROR + +=item $ERRNO + +=item $! +X<$!> X<$ERRNO> X<$OS_ERROR> + +If used numerically, yields the current value of the C C<errno> +variable, or in other words, if a system or library call fails, it +sets this variable. This means that the value of C<$!> is meaningful +only I<immediately> after a B<failure>: + + if (open my $fh, "<", $filename) { + # Here $! is meaningless. + ... + } else { + # ONLY here is $! meaningful. + ... + # Already here $! might be meaningless. + } + # Since here we might have either success or failure, + # here $! is meaningless. + +In the above I<meaningless> stands for anything: zero, non-zero, +C<undef>. A successful system or library call does B<not> set +the variable to zero. + +If used as a string, yields the corresponding system error string. +You can assign a number to C<$!> to set I<errno> if, for instance, +you want C<"$!"> to return the string for error I<n>, or you want +to set the exit value for the die() operator. (Mnemonic: What just +went bang?) + +Also see L<Error Indicators>. + +=item %OS_ERROR + +=item %ERRNO + +=item %! +X<%!> + +Each element of C<%!> has a true value only if C<$!> is set to that +value. For example, C<$!{ENOENT}> is true if and only if the current +value of C<$!> is C<ENOENT>; that is, if the most recent error was +"No such file or directory" (or its moral equivalent: not all operating +systems give that exact error, and certainly not all languages). +To check if a particular key is meaningful on your system, use +C<exists $!{the_key}>; for a list of legal keys, use C<keys %!>. +See L<Errno> for more information, and also see above for the +validity of C<$!>. + +=item $EXTENDED_OS_ERROR + +=item $^E +X<$^E> X<$EXTENDED_OS_ERROR> + +Error information specific to the current operating system. At +the moment, this differs from C<$!> under only VMS, OS/2, and Win32 +(and for MacPerl). On all other platforms, C<$^E> is always just +the same as C<$!>. + +Under VMS, C<$^E> provides the VMS status value from the last +system error. This is more specific information about the last +system error than that provided by C<$!>. This is particularly +important when C<$!> is set to B<EVMSERR>. + +Under OS/2, C<$^E> is set to the error code of the last call to +OS/2 API either via CRT, or directly from perl. + +Under Win32, C<$^E> always returns the last error information +reported by the Win32 call C<GetLastError()> which describes +the last error from within the Win32 API. Most Win32-specific +code will report errors via C<$^E>. ANSI C and Unix-like calls +set C<errno> and so most portable Perl code will report errors +via C<$!>. + +Caveats mentioned in the description of C<$!> generally apply to +C<$^E>, also. (Mnemonic: Extra error explanation.) + +Also see L<Error Indicators>. + +=item $EVAL_ERROR + +=item $@ +X<$@> X<$EVAL_ERROR> + +The Perl syntax error message from the last eval() operator. +If $@ is the null string, the last eval() parsed and executed +correctly (although the operations you invoked may have failed in the +normal fashion). (Mnemonic: Where was the syntax error "at"?) + +Warning messages are not collected in this variable. You can, +however, set up a routine to process warnings by setting C<$SIG{__WARN__}> +as described below. + +Also see L<Error Indicators>. + +=item $PROCESS_ID + +=item $PID + +=item $$ +X<$$> X<$PID> X<$PROCESS_ID> + +The process number of the Perl running this script. You should +consider this variable read-only, although it will be altered +across fork() calls. (Mnemonic: same as shells.) + +Note for Linux users: on Linux, the C functions C<getpid()> and +C<getppid()> return different values from different threads. In order to +be portable, this behavior is not reflected by C<$$>, whose value remains +consistent across threads. If you want to call the underlying C<getpid()>, +you may use the CPAN module C<Linux::Pid>. + +=item $REAL_USER_ID + +=item $UID + +=item $< +X<< $< >> X<$UID> X<$REAL_USER_ID> + +The real uid of this process. (Mnemonic: it's the uid you came I<from>, +if you're running setuid.) You can change both the real uid and +the effective uid at the same time by using POSIX::setuid(). Since +changes to $< require a system call, check $! after a change attempt to +detect any possible errors. + +=item $EFFECTIVE_USER_ID + +=item $EUID + +=item $> +X<< $> >> X<$EUID> X<$EFFECTIVE_USER_ID> + +The effective uid of this process. Example: + + $< = $>; # set real to effective uid + ($<,$>) = ($>,$<); # swap real and effective uid + +You can change both the effective uid and the real uid at the same +time by using POSIX::setuid(). Changes to $> require a check to $! +to detect any possible errors after an attempted change. + +(Mnemonic: it's the uid you went I<to>, if you're running setuid.) +C<< $< >> and C<< $> >> can be swapped only on machines +supporting setreuid(). + +=item $REAL_GROUP_ID + +=item $GID + +=item $( +X<$(> X<$GID> X<$REAL_GROUP_ID> + +The real gid of this process. If you are on a machine that supports +membership in multiple groups simultaneously, gives a space separated +list of groups you are in. The first number is the one returned by +getgid(), and the subsequent ones by getgroups(), one of which may be +the same as the first number. + +However, a value assigned to C<$(> must be a single number used to +set the real gid. So the value given by C<$(> should I<not> be assigned +back to C<$(> without being forced numeric, such as by adding zero. Note +that this is different to the effective gid (C<$)>) which does take a +list. + +You can change both the real gid and the effective gid at the same +time by using POSIX::setgid(). Changes to $( require a check to $! +to detect any possible errors after an attempted change. + +(Mnemonic: parentheses are used to I<group> things. The real gid is the +group you I<left>, if you're running setgid.) + +=item $EFFECTIVE_GROUP_ID + +=item $EGID + +=item $) +X<$)> X<$EGID> X<$EFFECTIVE_GROUP_ID> + +The effective gid of this process. If you are on a machine that +supports membership in multiple groups simultaneously, gives a space +separated list of groups you are in. The first number is the one +returned by getegid(), and the subsequent ones by getgroups(), one of +which may be the same as the first number. + +Similarly, a value assigned to C<$)> must also be a space-separated +list of numbers. The first number sets the effective gid, and +the rest (if any) are passed to setgroups(). To get the effect of an +empty list for setgroups(), just repeat the new effective gid; that is, +to force an effective gid of 5 and an effectively empty setgroups() +list, say C< $) = "5 5" >. + +You can change both the effective gid and the real gid at the same +time by using POSIX::setgid() (use only a single numeric argument). +Changes to $) require a check to $! to detect any possible errors +after an attempted change. + +(Mnemonic: parentheses are used to I<group> things. The effective gid +is the group that's I<right> for you, if you're running setgid.) + +C<< $< >>, C<< $> >>, C<$(> and C<$)> can be set only on +machines that support the corresponding I<set[re][ug]id()> routine. C<$(> +and C<$)> can be swapped only on machines supporting setregid(). + +=item $PROGRAM_NAME + +=item $0 +X<$0> X<$PROGRAM_NAME> + +Contains the name of the program being executed. + +On some (read: not all) operating systems assigning to C<$0> modifies +the argument area that the C<ps> program sees. On some platforms you +may have to use special C<ps> options or a different C<ps> to see the +changes. Modifying the $0 is more useful as a way of indicating the +current program state than it is for hiding the program you're +running. (Mnemonic: same as B<sh> and B<ksh>.) + +Note that there are platform specific limitations on the maximum +length of C<$0>. In the most extreme case it may be limited to the +space occupied by the original C<$0>. + +In some platforms there may be arbitrary amount of padding, for +example space characters, after the modified name as shown by C<ps>. +In some platforms this padding may extend all the way to the original +length of the argument area, no matter what you do (this is the case +for example with Linux 2.2). + +Note for BSD users: setting C<$0> does not completely remove "perl" +from the ps(1) output. For example, setting C<$0> to C<"foobar"> may +result in C<"perl: foobar (perl)"> (whether both the C<"perl: "> prefix +and the " (perl)" suffix are shown depends on your exact BSD variant +and version). This is an operating system feature, Perl cannot help it. + +In multithreaded scripts Perl coordinates the threads so that any +thread may modify its copy of the C<$0> and the change becomes visible +to ps(1) (assuming the operating system plays along). Note that +the view of C<$0> the other threads have will not change since they +have their own copies of it. + +If the program has been given to perl via the switches C<-e> or C<-E>, +C<$0> will contain the string C<"-e">. + +=item $[ +X<$[> + +The index of the first element in an array, and of the first character +in a substring. Default is 0, but you could theoretically set it +to 1 to make Perl behave more like B<awk> (or Fortran) when +subscripting and when evaluating the index() and substr() functions. +(Mnemonic: [ begins subscripts.) + +As of release 5 of Perl, assignment to C<$[> is treated as a compiler +directive, and cannot influence the behavior of any other file. +(That's why you can only assign compile-time constants to it.) +Its use is highly discouraged. + +Note that, unlike other compile-time directives (such as L<strict>), +assignment to C<$[> can be seen from outer lexical scopes in the same file. +However, you can use local() on it to strictly bind its value to a +lexical block. + +=item $] +X<$]> + +The version + patchlevel / 1000 of the Perl interpreter. This variable +can be used to determine whether the Perl interpreter executing a +script is in the right range of versions. (Mnemonic: Is this version +of perl in the right bracket?) Example: + + warn "No checksumming!\n" if $] < 3.019; + +See also the documentation of C<use VERSION> and C<require VERSION> +for a convenient way to fail if the running Perl interpreter is too old. + +The floating point representation can sometimes lead to inaccurate +numeric comparisons. See C<$^V> for a more modern representation of +the Perl version that allows accurate string comparisons. + +=item $COMPILING + +=item $^C +X<$^C> X<$COMPILING> + +The current value of the flag associated with the B<-c> switch. +Mainly of use with B<-MO=...> to allow code to alter its behavior +when being compiled, such as for example to AUTOLOAD at compile +time rather than normal, deferred loading. Setting +C<$^C = 1> is similar to calling C<B::minus_c>. + +=item $DEBUGGING + +=item $^D +X<$^D> X<$DEBUGGING> + +The current value of the debugging flags. (Mnemonic: value of B<-D> +switch.) May be read or set. Like its command-line equivalent, you can use +numeric or symbolic values, eg C<$^D = 10> or C<$^D = "st">. + +=item ${^RE_DEBUG_FLAGS} + +The current value of the regex debugging flags. Set to 0 for no debug output +even when the re 'debug' module is loaded. See L<re> for details. + +=item ${^RE_TRIE_MAXBUF} + +Controls how certain regex optimisations are applied and how much memory they +utilize. This value by default is 65536 which corresponds to a 512kB temporary +cache. Set this to a higher value to trade memory for speed when matching +large alternations. Set it to a lower value if you want the optimisations to +be as conservative of memory as possible but still occur, and set it to a +negative value to prevent the optimisation and conserve the most memory. +Under normal situations this variable should be of no interest to you. + +=item $SYSTEM_FD_MAX + +=item $^F +X<$^F> X<$SYSTEM_FD_MAX> + +The maximum system file descriptor, ordinarily 2. System file +descriptors are passed to exec()ed processes, while higher file +descriptors are not. Also, during an open(), system file descriptors are +preserved even if the open() fails. (Ordinary file descriptors are +closed before the open() is attempted.) The close-on-exec +status of a file descriptor will be decided according to the value of +C<$^F> when the corresponding file, pipe, or socket was opened, not the +time of the exec(). + +=item $^H + +WARNING: This variable is strictly for internal use only. Its availability, +behavior, and contents are subject to change without notice. + +This variable contains compile-time hints for the Perl interpreter. At the +end of compilation of a BLOCK the value of this variable is restored to the +value when the interpreter started to compile the BLOCK. + +When perl begins to parse any block construct that provides a lexical scope +(e.g., eval body, required file, subroutine body, loop body, or conditional +block), the existing value of $^H is saved, but its value is left unchanged. +When the compilation of the block is completed, it regains the saved value. +Between the points where its value is saved and restored, code that +executes within BEGIN blocks is free to change the value of $^H. + +This behavior provides the semantic of lexical scoping, and is used in, +for instance, the C<use strict> pragma. + +The contents should be an integer; different bits of it are used for +different pragmatic flags. Here's an example: + + sub add_100 { $^H |= 0x100 } + + sub foo { + BEGIN { add_100() } + bar->baz($boon); + } + +Consider what happens during execution of the BEGIN block. At this point +the BEGIN block has already been compiled, but the body of foo() is still +being compiled. The new value of $^H will therefore be visible only while +the body of foo() is being compiled. + +Substitution of the above BEGIN block with: + + BEGIN { require strict; strict->import('vars') } + +demonstrates how C<use strict 'vars'> is implemented. Here's a conditional +version of the same lexical pragma: + + BEGIN { require strict; strict->import('vars') if $condition } + +=item %^H + +The %^H hash provides the same scoping semantic as $^H. This makes it +useful for implementation of lexically scoped pragmas. See L<perlpragma>. + +=item $INPLACE_EDIT + +=item $^I +X<$^I> X<$INPLACE_EDIT> + +The current value of the inplace-edit extension. Use C<undef> to disable +inplace editing. (Mnemonic: value of B<-i> switch.) + +=item $^M +X<$^M> + +By default, running out of memory is an untrappable, fatal error. +However, if suitably built, Perl can use the contents of C<$^M> +as an emergency memory pool after die()ing. Suppose that your Perl +were compiled with C<-DPERL_EMERGENCY_SBRK> and used Perl's malloc. +Then + + $^M = 'a' x (1 << 16); + +would allocate a 64K buffer for use in an emergency. See the +F<INSTALL> file in the Perl distribution for information on how to +add custom C compilation flags when compiling perl. To discourage casual +use of this advanced feature, there is no L<English|English> long name for +this variable. + +=item $OSNAME + +=item $^O +X<$^O> X<$OSNAME> + +The name of the operating system under which this copy of Perl was +built, as determined during the configuration process. The value +is identical to C<$Config{'osname'}>. See also L<Config> and the +B<-V> command-line switch documented in L<perlrun>. + +In Windows platforms, $^O is not very helpful: since it is always +C<MSWin32>, it doesn't tell the difference between +95/98/ME/NT/2000/XP/CE/.NET. Use Win32::GetOSName() or +Win32::GetOSVersion() (see L<Win32> and L<perlport>) to distinguish +between the variants. + +=item ${^OPEN} + +An internal variable used by PerlIO. A string in two parts, separated +by a C<\0> byte, the first part describes the input layers, the second +part describes the output layers. + +=item $PERLDB + +=item $^P +X<$^P> X<$PERLDB> + +The internal variable for debugging support. The meanings of the +various bits are subject to change, but currently indicate: + +=over 6 + +=item 0x01 + +Debug subroutine enter/exit. + +=item 0x02 + +Line-by-line debugging. Causes DB::DB() subroutine to be called for each +statement executed. Also causes saving source code lines (like 0x400). + +=item 0x04 + +Switch off optimizations. + +=item 0x08 + +Preserve more data for future interactive inspections. + +=item 0x10 + +Keep info about source lines on which a subroutine is defined. + +=item 0x20 + +Start with single-step on. + +=item 0x40 + +Use subroutine address instead of name when reporting. + +=item 0x80 + +Report C<goto &subroutine> as well. + +=item 0x100 + +Provide informative "file" names for evals based on the place they were compiled. + +=item 0x200 + +Provide informative names to anonymous subroutines based on the place they +were compiled. + +=item 0x400 + +Save source code lines into C<@{"_<$filename"}>. + +=back + +Some bits may be relevant at compile-time only, some at +run-time only. This is a new mechanism and the details may change. +See also L<perldebguts>. + +=item $LAST_REGEXP_CODE_RESULT + +=item $^R +X<$^R> X<$LAST_REGEXP_CODE_RESULT> + +The result of evaluation of the last successful C<(?{ code })> +regular expression assertion (see L<perlre>). May be written to. + +=item $EXCEPTIONS_BEING_CAUGHT + +=item $^S +X<$^S> X<$EXCEPTIONS_BEING_CAUGHT> + +Current state of the interpreter. + + $^S State + --------- ------------------- + undef Parsing module/eval + true (1) Executing an eval + false (0) Otherwise + +The first state may happen in $SIG{__DIE__} and $SIG{__WARN__} handlers. + +=item $BASETIME + +=item $^T +X<$^T> X<$BASETIME> + +The time at which the program began running, in seconds since the +epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, +and B<-C> filetests are based on this value. + +=item ${^TAINT} + +Reflects if taint mode is on or off. 1 for on (the program was run with +B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with +B<-t> or B<-TU>). This variable is read-only. + +=item ${^UNICODE} + +Reflects certain Unicode settings of Perl. See L<perlrun> +documentation for the C<-C> switch for more information about +the possible values. This variable is set during Perl startup +and is thereafter read-only. + +=item ${^UTF8CACHE} + +This variable controls the state of the internal UTF-8 offset caching code. +1 for on (the default), 0 for off, -1 to debug the caching code by checking +all its results against linear scans, and panicking on any discrepancy. + +=item ${^UTF8LOCALE} + +This variable indicates whether an UTF-8 locale was detected by perl at +startup. This information is used by perl when it's in +adjust-utf8ness-to-locale mode (as when run with the C<-CL> command-line +switch); see L<perlrun> for more info on this. + +=item $PERL_VERSION + +=item $^V +X<$^V> X<$PERL_VERSION> + +The revision, version, and subversion of the Perl interpreter, represented +as a C<version> object. + +This variable first appeared in perl 5.6.0; earlier versions of perl will +see an undefined value. Before perl 5.10.0 $^V was represented as a v-string. + +$^V can be used to determine whether the Perl interpreter executing a +script is in the right range of versions. (Mnemonic: use ^V for Version +Control.) Example: + + warn "Hashes not randomized!\n" if !$^V or $^V lt v5.8.1 + +To convert C<$^V> into its string representation use sprintf()'s +C<"%vd"> conversion: + + printf "version is v%vd\n", $^V; # Perl's version + +See the documentation of C<use VERSION> and C<require VERSION> +for a convenient way to fail if the running Perl interpreter is too old. + +See also C<$]> for an older representation of the Perl version. + +=item $WARNING + +=item $^W +X<$^W> X<$WARNING> + +The current value of the warning switch, initially true if B<-w> +was used, false otherwise, but directly modifiable. (Mnemonic: +related to the B<-w> switch.) See also L<warnings>. + +=item ${^WARNING_BITS} + +The current set of warning checks enabled by the C<use warnings> pragma. +See the documentation of C<warnings> for more details. + +=item ${^WIN32_SLOPPY_STAT} + +If this variable is set to a true value, then stat() on Windows will +not try to open the file. This means that the link count cannot be +determined and file attributes may be out of date if additional +hardlinks to the file exist. On the other hand, not opening the file +is considerably faster, especially for files on network drives. + +This variable could be set in the F<sitecustomize.pl> file to +configure the local Perl installation to use "sloppy" stat() by +default. See L<perlrun> for more information about site +customization. + +=item $EXECUTABLE_NAME + +=item $^X +X<$^X> X<$EXECUTABLE_NAME> + +The name used to execute the current copy of Perl, from C's +C<argv[0]> or (where supported) F</proc/self/exe>. + +Depending on the host operating system, the value of $^X may be +a relative or absolute pathname of the perl program file, or may +be the string used to invoke perl but not the pathname of the +perl program file. Also, most operating systems permit invoking +programs that are not in the PATH environment variable, so there +is no guarantee that the value of $^X is in PATH. For VMS, the +value may or may not include a version number. + +You usually can use the value of $^X to re-invoke an independent +copy of the same perl that is currently running, e.g., + + @first_run = `$^X -le "print int rand 100 for 1..100"`; + +But recall that not all operating systems support forking or +capturing of the output of commands, so this complex statement +may not be portable. + +It is not safe to use the value of $^X as a path name of a file, +as some operating systems that have a mandatory suffix on +executable files do not require use of the suffix when invoking +a command. To convert the value of $^X to a path name, use the +following statements: + + # Build up a set of file names (not command names). + use Config; + $this_perl = $^X; + if ($^O ne 'VMS') + {$this_perl .= $Config{_exe} + unless $this_perl =~ m/$Config{_exe}$/i;} + +Because many operating systems permit anyone with read access to +the Perl program file to make a copy of it, patch the copy, and +then execute the copy, the security-conscious Perl programmer +should take care to invoke the installed copy of perl, not the +copy referenced by $^X. The following statements accomplish +this goal, and produce a pathname that can be invoked as a +command or referenced as a file. + + use Config; + $secure_perl_path = $Config{perlpath}; + if ($^O ne 'VMS') + {$secure_perl_path .= $Config{_exe} + unless $secure_perl_path =~ m/$Config{_exe}$/i;} + +=item ARGV +X<ARGV> + +The special filehandle that iterates over command-line filenames in +C<@ARGV>. Usually written as the null filehandle in the angle operator +C<< <> >>. Note that currently C<ARGV> only has its magical effect +within the C<< <> >> operator; elsewhere it is just a plain filehandle +corresponding to the last file opened by C<< <> >>. In particular, +passing C<\*ARGV> as a parameter to a function that expects a filehandle +may not cause your function to automatically read the contents of all the +files in C<@ARGV>. + +=item $ARGV +X<$ARGV> + +contains the name of the current file when reading from <>. + +=item @ARGV +X<@ARGV> + +The array @ARGV contains the command-line arguments intended for +the script. C<$#ARGV> is generally the number of arguments minus +one, because C<$ARGV[0]> is the first argument, I<not> the program's +command name itself. See C<$0> for the command name. + +=item ARGVOUT +X<ARGVOUT> + +The special filehandle that points to the currently open output file +when doing edit-in-place processing with B<-i>. Useful when you have +to do a lot of inserting and don't want to keep modifying $_. See +L<perlrun> for the B<-i> switch. + +=item @F +X<@F> + +The array @F contains the fields of each line read in when autosplit +mode is turned on. See L<perlrun> for the B<-a> switch. This array +is package-specific, and must be declared or given a full package name +if not in package main when running under C<strict 'vars'>. + +=item @INC +X<@INC> + +The array @INC contains the list of places that the C<do EXPR>, +C<require>, or C<use> constructs look for their library files. It +initially consists of the arguments to any B<-I> command-line +switches, followed by the default Perl library, probably +F</usr/local/lib/perl>, followed by ".", to represent the current +directory. ("." will not be appended if taint checks are enabled, either by +C<-T> or by C<-t>.) If you need to modify this at runtime, you should use +the C<use lib> pragma to get the machine-dependent library properly +loaded also: + + use lib '/mypath/libdir/'; + use SomeMod; + +You can also insert hooks into the file inclusion system by putting Perl +code directly into @INC. Those hooks may be subroutine references, array +references or blessed objects. See L<perlfunc/require> for details. + +=item @ARG + +=item @_ +X<@_> X<@ARG> + +Within a subroutine the array @_ contains the parameters passed to that +subroutine. See L<perlsub>. + +=item %INC +X<%INC> + +The hash %INC contains entries for each filename included via the +C<do>, C<require>, or C<use> operators. The key is the filename +you specified (with module names converted to pathnames), and the +value is the location of the file found. The C<require> +operator uses this hash to determine whether a particular file has +already been included. + +If the file was loaded via a hook (e.g. a subroutine reference, see +L<perlfunc/require> for a description of these hooks), this hook is +by default inserted into %INC in place of a filename. Note, however, +that the hook may have set the %INC entry by itself to provide some more +specific info. + +=item %ENV + +=item $ENV{expr} +X<%ENV> + +The hash %ENV contains your current environment. Setting a +value in C<ENV> changes the environment for any child processes +you subsequently fork() off. + +=item %SIG + +=item $SIG{expr} +X<%SIG> + +The hash C<%SIG> contains signal handlers for signals. For example: + + sub handler { # 1st argument is signal name + my($sig) = @_; + print "Caught a SIG$sig--shutting down\n"; + close(LOG); + exit(0); + } + + $SIG{'INT'} = \&handler; + $SIG{'QUIT'} = \&handler; + ... + $SIG{'INT'} = 'DEFAULT'; # restore default action + $SIG{'QUIT'} = 'IGNORE'; # ignore SIGQUIT + +Using a value of C<'IGNORE'> usually has the effect of ignoring the +signal, except for the C<CHLD> signal. See L<perlipc> for more about +this special case. + +Here are some other examples: + + $SIG{"PIPE"} = "Plumber"; # assumes main::Plumber (not recommended) + $SIG{"PIPE"} = \&Plumber; # just fine; assume current Plumber + $SIG{"PIPE"} = *Plumber; # somewhat esoteric + $SIG{"PIPE"} = Plumber(); # oops, what did Plumber() return?? + +Be sure not to use a bareword as the name of a signal handler, +lest you inadvertently call it. + +If your system has the sigaction() function then signal handlers are +installed using it. This means you get reliable signal handling. + +The default delivery policy of signals changed in Perl 5.8.0 from +immediate (also known as "unsafe") to deferred, also known as +"safe signals". See L<perlipc> for more information. + +Certain internal hooks can be also set using the %SIG hash. The +routine indicated by C<$SIG{__WARN__}> is called when a warning message is +about to be printed. The warning message is passed as the first +argument. The presence of a C<__WARN__> hook causes the ordinary printing +of warnings to C<STDERR> to be suppressed. You can use this to save warnings +in a variable, or turn warnings into fatal errors, like this: + + local $SIG{__WARN__} = sub { die $_[0] }; + eval $proggie; + +As the C<'IGNORE'> hook is not supported by C<__WARN__>, you can +disable warnings using the empty subroutine: + + local $SIG{__WARN__} = sub {}; + +The routine indicated by C<$SIG{__DIE__}> is called when a fatal exception +is about to be thrown. The error message is passed as the first +argument. When a C<__DIE__> hook routine returns, the exception +processing continues as it would have in the absence of the hook, +unless the hook routine itself exits via a C<goto>, a loop exit, or a C<die()>. +The C<__DIE__> handler is explicitly disabled during the call, so that you +can die from a C<__DIE__> handler. Similarly for C<__WARN__>. + +Due to an implementation glitch, the C<$SIG{__DIE__}> hook is called +even inside an eval(). Do not use this to rewrite a pending exception +in C<$@>, or as a bizarre substitute for overriding C<CORE::GLOBAL::die()>. +This strange action at a distance may be fixed in a future release +so that C<$SIG{__DIE__}> is only called if your program is about +to exit, as was the original intent. Any other use is deprecated. + +C<__DIE__>/C<__WARN__> handlers are very special in one respect: +they may be called to report (probable) errors found by the parser. +In such a case the parser may be in inconsistent state, so any +attempt to evaluate Perl code from such a handler will probably +result in a segfault. This means that warnings or errors that +result from parsing Perl should be used with extreme caution, like +this: + + require Carp if defined $^S; + Carp::confess("Something wrong") if defined &Carp::confess; + die "Something wrong, but could not load Carp to give backtrace... + To see backtrace try starting Perl with -MCarp switch"; + +Here the first line will load Carp I<unless> it is the parser who +called the handler. The second line will print backtrace and die if +Carp was available. The third line will be executed only if Carp was +not available. + +See L<perlfunc/die>, L<perlfunc/warn>, L<perlfunc/eval>, and +L<warnings> for additional information. + +=back + +=head2 Error Indicators +X<error> X<exception> + +The variables C<$@>, C<$!>, C<$^E>, and C<$?> contain information +about different types of error conditions that may appear during +execution of a Perl program. The variables are shown ordered by +the "distance" between the subsystem which reported the error and +the Perl process. They correspond to errors detected by the Perl +interpreter, C library, operating system, or an external program, +respectively. + +To illustrate the differences between these variables, consider the +following Perl expression, which uses a single-quoted string: + + eval q{ + open my $pipe, "/cdrom/install |" or die $!; + my @res = <$pipe>; + close $pipe or die "bad pipe: $?, $!"; + }; + +After execution of this statement all 4 variables may have been set. + +C<$@> is set if the string to be C<eval>-ed did not compile (this +may happen if C<open> or C<close> were imported with bad prototypes), +or if Perl code executed during evaluation die()d . In these cases +the value of $@ is the compile error, or the argument to C<die> +(which will interpolate C<$!> and C<$?>). (See also L<Fatal>, +though.) + +When the eval() expression above is executed, open(), C<< <PIPE> >>, +and C<close> are translated to calls in the C run-time library and +thence to the operating system kernel. C<$!> is set to the C library's +C<errno> if one of these calls fails. + +Under a few operating systems, C<$^E> may contain a more verbose +error indicator, such as in this case, "CDROM tray not closed." +Systems that do not support extended error messages leave C<$^E> +the same as C<$!>. + +Finally, C<$?> may be set to non-0 value if the external program +F</cdrom/install> fails. The upper eight bits reflect specific +error conditions encountered by the program (the program's exit() +value). The lower eight bits reflect mode of failure, like signal +death and core dump information See wait(2) for details. In +contrast to C<$!> and C<$^E>, which are set only if error condition +is detected, the variable C<$?> is set on each C<wait> or pipe +C<close>, overwriting the old value. This is more like C<$@>, which +on every eval() is always set on failure and cleared on success. + +For more details, see the individual descriptions at C<$@>, C<$!>, C<$^E>, +and C<$?>. + +=head2 Technical Note on the Syntax of Variable Names + +Variable names in Perl can have several formats. Usually, they +must begin with a letter or underscore, in which case they can be +arbitrarily long (up to an internal limit of 251 characters) and +may contain letters, digits, underscores, or the special sequence +C<::> or C<'>. In this case, the part before the last C<::> or +C<'> is taken to be a I<package qualifier>; see L<perlmod>. + +Perl variable names may also be a sequence of digits or a single +punctuation or control character. These names are all reserved for +special uses by Perl; for example, the all-digits names are used +to hold data captured by backreferences after a regular expression +match. Perl has a special syntax for the single-control-character +names: It understands C<^X> (caret C<X>) to mean the control-C<X> +character. For example, the notation C<$^W> (dollar-sign caret +C<W>) is the scalar variable whose name is the single character +control-C<W>. This is better than typing a literal control-C<W> +into your program. + +Finally, new in Perl 5.6, Perl variable names may be alphanumeric +strings that begin with control characters (or better yet, a caret). +These variables must be written in the form C<${^Foo}>; the braces +are not optional. C<${^Foo}> denotes the scalar variable whose +name is a control-C<F> followed by two C<o>'s. These variables are +reserved for future special uses by Perl, except for the ones that +begin with C<^_> (control-underscore or caret-underscore). No +control-character name that begins with C<^_> will acquire a special +meaning in any future version of Perl; such names may therefore be +used safely in programs. C<$^_> itself, however, I<is> reserved. + +Perl identifiers that begin with digits, control characters, or +punctuation characters are exempt from the effects of the C<package> +declaration and are always forced to be in package C<main>; they are +also exempt from C<strict 'vars'> errors. A few other names are also +exempt in these ways: + + ENV STDIN + INC STDOUT + ARGV STDERR + ARGVOUT _ + SIG + +In particular, the new special C<${^_XYZ}> variables are always taken +to be in package C<main>, regardless of any C<package> declarations +presently in scope. + +=head1 BUGS + +Due to an unfortunate accident of Perl's implementation, C<use +English> imposes a considerable performance penalty on all regular +expression matches in a program, regardless of whether they occur +in the scope of C<use English>. For that reason, saying C<use +English> in libraries is strongly discouraged. See the +Devel::SawAmpersand module documentation from CPAN +( http://www.cpan.org/modules/by-module/Devel/ ) +for more information. Writing C<use English '-no_match_vars';> +avoids the performance penalty. + +Having to even think about the C<$^S> variable in your exception +handlers is simply wrong. C<$SIG{__DIE__}> as currently implemented +invites grievous and difficult to track down errors. Avoid it +and use an C<END{}> or CORE::GLOBAL::die override instead. |