summaryrefslogtreecommitdiff
path: root/ext/Pod-Simple/lib
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2009-09-12 20:44:53 +0100
committerNicholas Clark <nick@ccl4.org>2009-09-13 08:26:05 +0100
commit09a90ea7ecb1cbdb87ab854c4ad6cf6da6c6e053 (patch)
tree763112a8c528b14960a5ce8d50bce0cb7f6e3b4d /ext/Pod-Simple/lib
parent9ddff1485169a2ddf8548bfc8dce819bad389a50 (diff)
downloadperl-09a90ea7ecb1cbdb87ab854c4ad6cf6da6c6e053.tar.gz
Move Pod::Simple from lib to ext.
In the process, unpack the "packed" files. These were the last files uunecoded, so now uupacktool.pl is obsolete, and can be retired.
Diffstat (limited to 'ext/Pod-Simple/lib')
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple.pm1536
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple.pod226
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/BlackBox.pm1919
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Checker.pm171
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Debug.pm151
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/DumpAsText.pm130
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm146
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/HTML.pm889
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm1342
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/HTMLLegacy.pm104
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/LinkSection.pm145
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Methody.pm127
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Progress.pm93
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/PullParser.pm795
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm93
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm130
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm101
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/PullParserToken.pm138
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/RTF.pm674
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Search.pm1016
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/SimpleTree.pm155
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Subclassing.pod922
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Text.pm152
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/TextContent.pm87
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm103
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/Transcode.pm33
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm63
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm42
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/XHTML.pm400
-rw-r--r--ext/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm157
30 files changed, 12040 insertions, 0 deletions
diff --git a/ext/Pod-Simple/lib/Pod/Simple.pm b/ext/Pod-Simple/lib/Pod/Simple.pm
new file mode 100644
index 0000000000..1e325724a5
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple.pm
@@ -0,0 +1,1536 @@
+
+require 5;
+package Pod::Simple;
+use strict;
+use Carp ();
+BEGIN { *DEBUG = sub () {0} unless defined &DEBUG }
+use integer;
+use Pod::Escapes 1.03 ();
+use Pod::Simple::LinkSection ();
+use Pod::Simple::BlackBox ();
+#use utf8;
+
+use vars qw(
+ $VERSION @ISA
+ @Known_formatting_codes @Known_directives
+ %Known_formatting_codes %Known_directives
+ $NL
+);
+
+@ISA = ('Pod::Simple::BlackBox');
+$VERSION = '3.07';
+
+@Known_formatting_codes = qw(I B C L E F S X Z);
+%Known_formatting_codes = map(($_=>1), @Known_formatting_codes);
+@Known_directives = qw(head1 head2 head3 head4 item over back);
+%Known_directives = map(($_=>'Plain'), @Known_directives);
+$NL = $/ unless defined $NL;
+
+#-----------------------------------------------------------------------------
+# Set up some constants:
+
+BEGIN {
+ if(defined &ASCII) { }
+ elsif(chr(65) eq 'A') { *ASCII = sub () {1} }
+ else { *ASCII = sub () {''} }
+
+ unless(defined &MANY_LINES) { *MANY_LINES = sub () {20} }
+ DEBUG > 4 and print "MANY_LINES is ", MANY_LINES(), "\n";
+ unless(MANY_LINES() >= 1) {
+ die "MANY_LINES is too small (", MANY_LINES(), ")!\nAborting";
+ }
+ if(defined &UNICODE) { }
+ elsif($] >= 5.008) { *UNICODE = sub() {1} }
+ else { *UNICODE = sub() {''} }
+}
+if(DEBUG > 2) {
+ print "# We are ", ASCII ? '' : 'not ', "in ASCII-land\n";
+ print "# We are under a Unicode-safe Perl.\n";
+}
+
+# Design note:
+# This is a parser for Pod. It is not a parser for the set of Pod-like
+# languages which happens to contain Pod -- it is just for Pod, plus possibly
+# some extensions.
+
+# @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
+#@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+__PACKAGE__->_accessorize(
+ 'nbsp_for_S', # Whether to map S<...>'s to \xA0 characters
+ 'source_filename', # Filename of the source, for use in warnings
+ 'source_dead', # Whether to consider this parser's source dead
+
+ 'output_fh', # The filehandle we're writing to, if applicable.
+ # Used only in some derived classes.
+
+ 'hide_line_numbers', # For some dumping subclasses: whether to pointedly
+ # suppress the start_line attribute
+
+ 'line_count', # the current line number
+ 'pod_para_count', # count of pod paragraphs seen so far
+
+ 'no_whining', # whether to suppress whining
+ 'no_errata_section', # whether to suppress the errata section
+ 'complain_stderr', # whether to complain to stderr
+
+ 'doc_has_started', # whether we've fired the open-Document event yet
+
+ 'bare_output', # For some subclasses: whether to prepend
+ # header-code and postpend footer-code
+
+ 'fullstop_space_harden', # Whether to turn ". " into ".[nbsp] ";
+
+ 'nix_X_codes', # whether to ignore X<...> codes
+ 'merge_text', # whether to avoid breaking a single piece of
+ # text up into several events
+
+ 'preserve_whitespace', # whether to try to keep whitespace as-is
+
+ 'content_seen', # whether we've seen any real Pod content
+ 'errors_seen', # TODO: document. whether we've seen any errors (fatal or not)
+
+ 'codes_in_verbatim', # for PseudoPod extensions
+
+ 'code_handler', # coderef to call when a code (non-pod) line is seen
+ 'cut_handler', # coderef to call when a =cut line is seen
+ #Called like:
+ # $code_handler->($line, $self->{'line_count'}, $self) if $code_handler;
+ # $cut_handler->($line, $self->{'line_count'}, $self) if $cut_handler;
+
+);
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub any_errata_seen { # good for using as an exit() value...
+ return shift->{'errors_seen'} || 0;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+# Pull in some functions that, for some reason, I expect to see here too:
+BEGIN {
+ *pretty = \&Pod::Simple::BlackBox::pretty;
+ *stringify_lol = \&Pod::Simple::BlackBox::stringify_lol;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub version_report {
+ my $class = ref($_[0]) || $_[0];
+ if($class eq __PACKAGE__) {
+ return "$class $VERSION";
+ } else {
+ my $v = $class->VERSION;
+ return "$class $v (" . __PACKAGE__ . " $VERSION)";
+ }
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+#sub curr_open { # read-only list accessor
+# return @{ $_[0]{'curr_open'} || return() };
+#}
+#sub _curr_open_listref { $_[0]{'curr_open'} ||= [] }
+
+
+sub output_string {
+ # Works by faking out output_fh. Simplifies our code.
+ #
+ my $this = shift;
+ return $this->{'output_string'} unless @_; # GET.
+
+ require Pod::Simple::TiedOutFH;
+ my $x = (defined($_[0]) and ref($_[0])) ? $_[0] : \( $_[0] );
+ $$x = '' unless defined $$x;
+ DEBUG > 4 and print "# Output string set to $x ($$x)\n";
+ $this->{'output_fh'} = Pod::Simple::TiedOutFH->handle_on($_[0]);
+ return
+ $this->{'output_string'} = $_[0];
+ #${ ${ $this->{'output_fh'} } };
+}
+
+sub abandon_output_string { $_[0]->abandon_output_fh; delete $_[0]{'output_string'} }
+sub abandon_output_fh { $_[0]->output_fh(undef) }
+# These don't delete the string or close the FH -- they just delete our
+# references to it/them.
+# TODO: document these
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub new {
+ # takes no parameters
+ my $class = ref($_[0]) || $_[0];
+ #Carp::croak(__PACKAGE__ . " is a virtual base class -- see perldoc "
+ # . __PACKAGE__ );
+ return bless {
+ 'accept_codes' => { map( ($_=>$_), @Known_formatting_codes ) },
+ 'accept_directives' => { %Known_directives },
+ 'accept_targets' => {},
+ }, $class;
+}
+
+
+
+# TODO: an option for whether to interpolate E<...>'s, or just resolve to codes.
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _handle_element_start { # OVERRIDE IN DERIVED CLASS
+ my($self, $element_name, $attr_hash_r) = @_;
+ return;
+}
+
+sub _handle_element_end { # OVERRIDE IN DERIVED CLASS
+ my($self, $element_name) = @_;
+ return;
+}
+
+sub _handle_text { # OVERRIDE IN DERIVED CLASS
+ my($self, $text) = @_;
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And now directives (not targets)
+
+sub accept_directive_as_verbatim { shift->_accept_directives('Verbatim', @_) }
+sub accept_directive_as_data { shift->_accept_directives('Data', @_) }
+sub accept_directive_as_processed { shift->_accept_directives('Plain', @_) }
+
+sub _accept_directives {
+ my($this, $type) = splice @_,0,2;
+ foreach my $d (@_) {
+ next unless defined $d and length $d;
+ Carp::croak "\"$d\" isn't a valid directive name"
+ unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s;
+ Carp::croak "\"$d\" is already a reserved Pod directive name"
+ if exists $Known_directives{$d};
+ $this->{'accept_directives'}{$d} = $type;
+ DEBUG > 2 and print "Learning to accept \"=$d\" as directive of type $type\n";
+ }
+ DEBUG > 6 and print "$this\'s accept_directives : ",
+ pretty($this->{'accept_directives'}), "\n";
+
+ return sort keys %{ $this->{'accept_directives'} } if wantarray;
+ return;
+}
+
+#--------------------------------------------------------------------------
+# TODO: document these:
+
+sub unaccept_directive { shift->unaccept_directives(@_) };
+
+sub unaccept_directives {
+ my $this = shift;
+ foreach my $d (@_) {
+ next unless defined $d and length $d;
+ Carp::croak "\"$d\" isn't a valid directive name"
+ unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s;
+ Carp::croak "But you must accept \"$d\" directives -- it's a builtin!"
+ if exists $Known_directives{$d};
+ delete $this->{'accept_directives'}{$d};
+ DEBUG > 2 and print "OK, won't accept \"=$d\" as directive.\n";
+ }
+ return sort keys %{ $this->{'accept_directives'} } if wantarray;
+ return
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And now targets (not directives)
+
+sub accept_target { shift->accept_targets(@_) } # alias
+sub accept_target_as_text { shift->accept_targets_as_text(@_) } # alias
+
+
+sub accept_targets { shift->_accept_targets('1', @_) }
+
+sub accept_targets_as_text { shift->_accept_targets('force_resolve', @_) }
+ # forces them to be processed, even when there's no ":".
+
+sub _accept_targets {
+ my($this, $type) = splice @_,0,2;
+ foreach my $t (@_) {
+ next unless defined $t and length $t;
+ # TODO: enforce some limitations on what a target name can be?
+ $this->{'accept_targets'}{$t} = $type;
+ DEBUG > 2 and print "Learning to accept \"$t\" as target of type $type\n";
+ }
+ return sort keys %{ $this->{'accept_targets'} } if wantarray;
+ return;
+}
+
+#--------------------------------------------------------------------------
+sub unaccept_target { shift->unaccept_targets(@_) }
+
+sub unaccept_targets {
+ my $this = shift;
+ foreach my $t (@_) {
+ next unless defined $t and length $t;
+ # TODO: enforce some limitations on what a target name can be?
+ delete $this->{'accept_targets'}{$t};
+ DEBUG > 2 and print "OK, won't accept \"$t\" as target.\n";
+ }
+ return sort keys %{ $this->{'accept_targets'} } if wantarray;
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And now codes (not targets or directives)
+
+sub accept_code { shift->accept_codes(@_) } # alias
+
+sub accept_codes { # Add some codes
+ my $this = shift;
+
+ foreach my $new_code (@_) {
+ next unless defined $new_code and length $new_code;
+ if(ASCII) {
+ # A good-enough check that it's good as an XML Name symbol:
+ Carp::croak "\"$new_code\" isn't a valid element name"
+ if $new_code =~
+ m/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/
+ # Characters under 0x80 that aren't legal in an XML Name.
+ or $new_code =~ m/^[-\.0-9]/s
+ or $new_code =~ m/:[-\.0-9]/s;
+ # The legal under-0x80 Name characters that
+ # an XML Name still can't start with.
+ }
+
+ $this->{'accept_codes'}{$new_code} = $new_code;
+
+ # Yes, map to itself -- just so that when we
+ # see "=extend W [whatever] thatelementname", we say that W maps
+ # to whatever $this->{accept_codes}{thatelementname} is,
+ # i.e., "thatelementname". Then when we go re-mapping,
+ # a "W" in the treelet turns into "thatelementname". We only
+ # remap once.
+ # If we say we accept "W", then a "W" in the treelet simply turns
+ # into "W".
+ }
+
+ return;
+}
+
+#--------------------------------------------------------------------------
+sub unaccept_code { shift->unaccept_codes(@_) }
+
+sub unaccept_codes { # remove some codes
+ my $this = shift;
+
+ foreach my $new_code (@_) {
+ next unless defined $new_code and length $new_code;
+ if(ASCII) {
+ # A good-enough check that it's good as an XML Name symbol:
+ Carp::croak "\"$new_code\" isn't a valid element name"
+ if $new_code =~
+ m/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/
+ # Characters under 0x80 that aren't legal in an XML Name.
+ or $new_code =~ m/^[-\.0-9]/s
+ or $new_code =~ m/:[-\.0-9]/s;
+ # The legal under-0x80 Name characters that
+ # an XML Name still can't start with.
+ }
+
+ Carp::croak "But you must accept \"$new_code\" codes -- it's a builtin!"
+ if grep $new_code eq $_, @Known_formatting_codes;
+
+ delete $this->{'accept_codes'}{$new_code};
+
+ DEBUG > 2 and print "OK, won't accept the code $new_code<...>.\n";
+ }
+
+ return;
+}
+
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub parse_string_document {
+ my $self = shift;
+ my @lines;
+ foreach my $line_group (@_) {
+ next unless defined $line_group and length $line_group;
+ pos($line_group) = 0;
+ while($line_group =~
+ m/([^\n\r]*)((?:\r?\n)?)/g
+ ) {
+ #print(">> $1\n"),
+ $self->parse_lines($1)
+ if length($1) or length($2)
+ or pos($line_group) != length($line_group);
+ # I.e., unless it's a zero-length "empty line" at the very
+ # end of "foo\nbar\n" (i.e., between the \n and the EOS).
+ }
+ }
+ $self->parse_lines(undef); # to signal EOF
+ return $self;
+}
+
+sub _init_fh_source {
+ my($self, $source) = @_;
+
+ #DEBUG > 1 and print "Declaring $source as :raw for starters\n";
+ #$self->_apply_binmode($source, ':raw');
+ #binmode($source, ":raw");
+
+ return;
+}
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+#
+
+sub parse_file {
+ my($self, $source) = (@_);
+
+ if(!defined $source) {
+ Carp::croak("Can't use empty-string as a source for parse_file");
+ } elsif(ref(\$source) eq 'GLOB') {
+ $self->{'source_filename'} = '' . ($source);
+ } elsif(ref $source) {
+ $self->{'source_filename'} = '' . ($source);
+ } elsif(!length $source) {
+ Carp::croak("Can't use empty-string as a source for parse_file");
+ } else {
+ {
+ local *PODSOURCE;
+ open(PODSOURCE, "<$source") || Carp::croak("Can't open $source: $!");
+ $self->{'source_filename'} = $source;
+ $source = *PODSOURCE{IO};
+ }
+ $self->_init_fh_source($source);
+ }
+ # By here, $source is a FH.
+
+ $self->{'source_fh'} = $source;
+
+ my($i, @lines);
+ until( $self->{'source_dead'} ) {
+ splice @lines;
+ for($i = MANY_LINES; $i--;) { # read those many lines at a time
+ local $/ = $NL;
+ push @lines, scalar(<$source>); # readline
+ last unless defined $lines[-1];
+ # but pass thru the undef, which will set source_dead to true
+ }
+ $self->parse_lines(@lines);
+ }
+ delete($self->{'source_fh'}); # so it can be GC'd
+ return $self;
+}
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub parse_from_file {
+ # An emulation of Pod::Parser's interface, for the sake of Perldoc.
+ # Basically just a wrapper around parse_file.
+
+ my($self, $source, $to) = @_;
+ $self = $self->new unless ref($self); # so we tolerate being a class method
+
+ if(!defined $source) { $source = *STDIN{IO}
+ } elsif(ref(\$source) eq 'GLOB') { # stet
+ } elsif(ref($source) ) { # stet
+ } elsif(!length $source
+ or $source eq '-' or $source =~ m/^<&(STDIN|0)$/i
+ ) {
+ $source = *STDIN{IO};
+ }
+
+ if(!defined $to) { $self->output_fh( *STDOUT{IO} );
+ } elsif(ref(\$to) eq 'GLOB') { $self->output_fh( $to );
+ } elsif(ref($to)) { $self->output_fh( $to );
+ } elsif(!length $to
+ or $to eq '-' or $to =~ m/^>&?(?:STDOUT|1)$/i
+ ) {
+ $self->output_fh( *STDOUT{IO} );
+ } else {
+ require Symbol;
+ my $out_fh = Symbol::gensym();
+ DEBUG and print "Write-opening to $to\n";
+ open($out_fh, ">$to") or Carp::croak "Can't write-open $to: $!";
+ binmode($out_fh)
+ if $self->can('write_with_binmode') and $self->write_with_binmode;
+ $self->output_fh($out_fh);
+ }
+
+ return $self->parse_file($source);
+}
+
+#-----------------------------------------------------------------------------
+
+sub whine {
+ #my($self,$line,$complaint) = @_;
+ my $self = shift(@_);
+ ++$self->{'errors_seen'};
+ if($self->{'no_whining'}) {
+ DEBUG > 9 and print "Discarding complaint (at line $_[0]) $_[1]\n because no_whining is on.\n";
+ return;
+ }
+ return $self->_complain_warn(@_) if $self->{'complain_stderr'};
+ return $self->_complain_errata(@_);
+}
+
+sub scream { # like whine, but not suppressable
+ #my($self,$line,$complaint) = @_;
+ my $self = shift(@_);
+ ++$self->{'errors_seen'};
+ return $self->_complain_warn(@_) if $self->{'complain_stderr'};
+ return $self->_complain_errata(@_);
+}
+
+sub _complain_warn {
+ my($self,$line,$complaint) = @_;
+ return printf STDERR "%s around line %s: %s\n",
+ $self->{'source_filename'} || 'Pod input', $line, $complaint;
+}
+
+sub _complain_errata {
+ my($self,$line,$complaint) = @_;
+ if( $self->{'no_errata_section'} ) {
+ DEBUG > 9 and print "Discarding erratum (at line $line) $complaint\n because no_errata_section is on.\n";
+ } else {
+ DEBUG > 9 and print "Queuing erratum (at line $line) $complaint\n";
+ push @{$self->{'errata'}{$line}}, $complaint
+ # for a report to be generated later!
+ }
+ return 1;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _get_initial_item_type {
+ # A hack-wrapper here for when you have like "=over\n\n=item 456\n\n"
+ my($self, $para) = @_;
+ return $para->[1]{'~type'} if $para->[1]{'~type'};
+
+ return $para->[1]{'~type'} = 'text'
+ if join("\n", @{$para}[2 .. $#$para]) =~ m/^\s*(\d+)\.?\s*$/s and $1 ne '1';
+ # Else fall thru to the general case:
+ return $self->_get_item_type($para);
+}
+
+
+
+sub _get_item_type { # mutates the item!!
+ my($self, $para) = @_;
+ return $para->[1]{'~type'} if $para->[1]{'~type'};
+
+
+ # Otherwise we haven't yet been to this node. Maybe alter it...
+
+ my $content = join "\n", @{$para}[2 .. $#$para];
+
+ if($content =~ m/^\s*\*\s*$/s or $content =~ m/^\s*$/s) {
+ # Like: "=item *", "=item * ", "=item"
+ splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
+ $para->[1]{'~orig_content'} = $content;
+ return $para->[1]{'~type'} = 'bullet';
+
+ } elsif($content =~ m/^\s*\*\s+(.+)/s) { # tolerance
+
+ # Like: "=item * Foo bar baz";
+ $para->[1]{'~orig_content'} = $content;
+ $para->[1]{'~_freaky_para_hack'} = $1;
+ DEBUG > 2 and print " Tolerating $$para[2] as =item *\\n\\n$1\n";
+ splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
+ return $para->[1]{'~type'} = 'bullet';
+
+ } elsif($content =~ m/^\s*(\d+)\.?\s*$/s) {
+ # Like: "=item 1.", "=item 123412"
+
+ $para->[1]{'~orig_content'} = $content;
+ $para->[1]{'number'} = $1; # Yes, stores the number there!
+
+ splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
+ return $para->[1]{'~type'} = 'number';
+
+ } else {
+ # It's anything else.
+ return $para->[1]{'~type'} = 'text';
+
+ }
+}
+
+#-----------------------------------------------------------------------------
+
+sub _make_treelet {
+ my $self = shift; # and ($para, $start_line)
+ my $treelet;
+ if(!@_) {
+ return [''];
+ } if(ref $_[0] and ref $_[0][0] and $_[0][0][0] eq '~Top') {
+ # Hack so we can pass in fake-o pre-cooked paragraphs:
+ # just have the first line be a reference to a ['~Top', {}, ...]
+ # We use this feechure in gen_errata and stuff.
+
+ DEBUG and print "Applying precooked treelet hack to $_[0][0]\n";
+ $treelet = $_[0][0];
+ splice @$treelet, 0, 2; # lop the top off
+ return $treelet;
+ } else {
+ $treelet = $self->_treelet_from_formatting_codes(@_);
+ }
+
+ if( $self->_remap_sequences($treelet) ) {
+ $self->_treat_Zs($treelet); # Might as well nix these first
+ $self->_treat_Ls($treelet); # L has to precede E and S
+ $self->_treat_Es($treelet);
+ $self->_treat_Ss($treelet); # S has to come after E
+
+ $self->_wrap_up($treelet); # Nix X's and merge texties
+
+ } else {
+ DEBUG and print "Formatless treelet gets fast-tracked.\n";
+ # Very common case!
+ }
+
+ splice @$treelet, 0, 2; # lop the top off
+
+ return $treelet;
+}
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub _wrap_up {
+ my($self, @stack) = @_;
+ my $nixx = $self->{'nix_X_codes'};
+ my $merge = $self->{'merge_text' };
+ return unless $nixx or $merge;
+
+ DEBUG > 2 and print "\nStarting _wrap_up traversal.\n",
+ $merge ? (" Merge mode on\n") : (),
+ $nixx ? (" Nix-X mode on\n") : (),
+ ;
+
+
+ my($i, $treelet);
+ while($treelet = shift @stack) {
+ DEBUG > 3 and print " Considering children of this $treelet->[0] node...\n";
+ for($i = 2; $i < @$treelet; ++$i) { # iterate over children
+ DEBUG > 3 and print " Considering child at $i ", pretty($treelet->[$i]), "\n";
+ if($nixx and ref $treelet->[$i] and $treelet->[$i][0] eq 'X') {
+ DEBUG > 3 and print " Nixing X node at $i\n";
+ splice(@$treelet, $i, 1); # just nix this node (and its descendants)
+ # no need to back-update the counter just yet
+ redo;
+
+ } elsif($merge and $i != 2 and # non-initial
+ !ref $treelet->[$i] and !ref $treelet->[$i - 1]
+ ) {
+ DEBUG > 3 and print " Merging ", $i-1,
+ ":[$treelet->[$i-1]] and $i\:[$treelet->[$i]]\n";
+ $treelet->[$i-1] .= ( splice(@$treelet, $i, 1) )[0];
+ DEBUG > 4 and print " Now: ", $i-1, ":[$treelet->[$i-1]]\n";
+ --$i;
+ next;
+ # since we just pulled the possibly last node out from under
+ # ourselves, we can't just redo()
+
+ } elsif( ref $treelet->[$i] ) {
+ DEBUG > 4 and print " Enqueuing ", pretty($treelet->[$i]), " for traversal.\n";
+ push @stack, $treelet->[$i];
+
+ if($treelet->[$i][0] eq 'L') {
+ my $thing;
+ foreach my $attrname ('section', 'to') {
+ if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) {
+ unshift @stack, $thing;
+ DEBUG > 4 and print " +Enqueuing ",
+ pretty( $treelet->[$i][1]{$attrname} ),
+ " as an attribute value to tweak.\n";
+ }
+ }
+ }
+ }
+ }
+ }
+ DEBUG > 2 and print "End of _wrap_up traversal.\n\n";
+
+ return;
+}
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub _remap_sequences {
+ my($self,@stack) = @_;
+
+ if(@stack == 1 and @{ $stack[0] } == 3 and !ref $stack[0][2]) {
+ # VERY common case: abort it.
+ DEBUG and print "Skipping _remap_sequences: formatless treelet.\n";
+ return 0;
+ }
+
+ my $map = ($self->{'accept_codes'} || die "NO accept_codes in $self?!?");
+
+ my $start_line = $stack[0][1]{'start_line'};
+ DEBUG > 2 and printf
+ "\nAbout to start _remap_sequences on treelet from line %s.\n",
+ $start_line || '[?]'
+ ;
+ DEBUG > 3 and print " Map: ",
+ join('; ', map "$_=" . (
+ ref($map->{$_}) ? join(",", @{$map->{$_}}) : $map->{$_}
+ ),
+ sort keys %$map ),
+ ("B~C~E~F~I~L~S~X~Z" eq join '~', sort keys %$map)
+ ? " (all normal)\n" : "\n"
+ ;
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+ my($is, $was, $i, $treelet); # scratch
+ while($treelet = shift @stack) {
+ DEBUG > 3 and print " Considering children of this $treelet->[0] node...\n";
+ for($i = 2; $i < @$treelet; ++$i) { # iterate over children
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+
+ DEBUG > 4 and print " Noting child $i : $treelet->[$i][0]<...>\n";
+
+ $is = $treelet->[$i][0] = $map->{ $was = $treelet->[$i][0] };
+ if( DEBUG > 3 ) {
+ if(!defined $is) {
+ print " Code $was<> is UNKNOWN!\n";
+ } elsif($is eq $was) {
+ DEBUG > 4 and print " Code $was<> stays the same.\n";
+ } else {
+ print " Code $was<> maps to ",
+ ref($is)
+ ? ( "tags ", map("$_<", @$is), '...', map('>', @$is), "\n" )
+ : "tag $is<...>.\n";
+ }
+ }
+
+ if(!defined $is) {
+ $self->whine($start_line, "Deleting unknown formatting code $was<>");
+ $is = $treelet->[$i][0] = '1'; # But saving the children!
+ # I could also insert a leading "$was<" and tailing ">" as
+ # children of this node, but something about that seems icky.
+ }
+ if(ref $is) {
+ my @dynasty = @$is;
+ DEBUG > 4 and print " Renaming $was node to $dynasty[-1]\n";
+ $treelet->[$i][0] = pop @dynasty;
+ my $nugget;
+ while(@dynasty) {
+ DEBUG > 4 and printf
+ " Grafting a new %s node between %s and %s\n",
+ $dynasty[-1], $treelet->[0], $treelet->[$i][0],
+ ;
+
+ #$nugget = ;
+ splice @$treelet, $i, 1, [pop(@dynasty), {}, $treelet->[$i]];
+ # relace node with a new parent
+ }
+ } elsif($is eq '0') {
+ splice(@$treelet, $i, 1); # just nix this node (and its descendants)
+ --$i; # back-update the counter
+ } elsif($is eq '1') {
+ splice(@$treelet, $i, 1 # replace this node with its children!
+ => splice @{ $treelet->[$i] },2
+ # (not catching its first two (non-child) items)
+ );
+ --$i; # back up for new stuff
+ } else {
+ # otherwise it's unremarkable
+ unshift @stack, $treelet->[$i]; # just recurse
+ }
+ }
+ }
+
+ DEBUG > 2 and print "End of _remap_sequences traversal.\n\n";
+
+ if(@_ == 2 and @{ $_[1] } == 3 and !ref $_[1][2]) {
+ DEBUG and print "Noting that the treelet is now formatless.\n";
+ return 0;
+ }
+ return 1;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _ponder_extend {
+
+ # "Go to an extreme, move back to a more comfortable place"
+ # -- /Oblique Strategies/, Brian Eno and Peter Schmidt
+
+ my($self, $para) = @_;
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+
+ DEBUG > 2 and print "Ogling extensor: =extend $content\n";
+
+ if($content =~
+ m/^
+ (\S+) # 1 : new item
+ \s+
+ (\S+) # 2 : fallback(s)
+ (?:\s+(\S+))? # 3 : element name(s)
+ \s*
+ $
+ /xs
+ ) {
+ my $new_letter = $1;
+ my $fallbacks_one = $2;
+ my $elements_one;
+ $elements_one = defined($3) ? $3 : $1;
+
+ DEBUG > 2 and print "Extensor has good syntax.\n";
+
+ unless($new_letter =~ m/^[A-Z]$/s or $new_letter) {
+ DEBUG > 2 and print " $new_letter isn't a valid thing to entend.\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You can extend only formatting codes A-Z, not like \"$new_letter\""
+ );
+ return;
+ }
+
+ if(grep $new_letter eq $_, @Known_formatting_codes) {
+ DEBUG > 2 and print " $new_letter isn't a good thing to extend, because known.\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You can't extend an established code like \"$new_letter\""
+ );
+
+ #TODO: or allow if last bit is same?
+
+ return;
+ }
+
+ unless($fallbacks_one =~ m/^[A-Z](,[A-Z])*$/s # like "B", "M,I", etc.
+ or $fallbacks_one eq '0' or $fallbacks_one eq '1'
+ ) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Format for second =extend parameter must be like"
+ . " M or 1 or 0 or M,N or M,N,O but you have it like "
+ . $fallbacks_one
+ );
+ return;
+ }
+
+ unless($elements_one =~ m/^[^ ,]+(,[^ ,]+)*$/s) { # like "B", "M,I", etc.
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Format for third =extend parameter: like foo or bar,Baz,qu:ux but not like "
+ . $elements_one
+ );
+ return;
+ }
+
+ my @fallbacks = split ',', $fallbacks_one, -1;
+ my @elements = split ',', $elements_one, -1;
+
+ foreach my $f (@fallbacks) {
+ next if exists $Known_formatting_codes{$f} or $f eq '0' or $f eq '1';
+ DEBUG > 2 and print " Can't fall back on unknown code $f\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Can't use unknown formatting code '$f' as a fallback for '$new_letter'"
+ );
+ return;
+ }
+
+ DEBUG > 3 and printf "Extensor: Fallbacks <%s> Elements <%s>.\n",
+ @fallbacks, @elements;
+
+ my $canonical_form;
+ foreach my $e (@elements) {
+ if(exists $self->{'accept_codes'}{$e}) {
+ DEBUG > 1 and print " Mapping '$new_letter' to known extension '$e'\n";
+ $canonical_form = $e;
+ last; # first acceptable elementname wins!
+ } else {
+ DEBUG > 1 and print " Can't map '$new_letter' to unknown extension '$e'\n";
+ }
+ }
+
+
+ if( defined $canonical_form ) {
+ # We found a good N => elementname mapping
+ $self->{'accept_codes'}{$new_letter} = $canonical_form;
+ DEBUG > 2 and print
+ "Extensor maps $new_letter => known element $canonical_form.\n";
+ } else {
+ # We have to use the fallback(s), which might be '0', or '1'.
+ $self->{'accept_codes'}{$new_letter}
+ = (@fallbacks == 1) ? $fallbacks[0] : \@fallbacks;
+ DEBUG > 2 and print
+ "Extensor maps $new_letter => fallbacks @fallbacks.\n";
+ }
+
+ } else {
+ DEBUG > 2 and print "Extensor has bad syntax.\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Unknown =extend syntax: $content"
+ )
+ }
+ return;
+}
+
+
+#:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
+
+sub _treat_Zs { # Nix Z<...>'s
+ my($self,@stack) = @_;
+
+ my($i, $treelet);
+ my $start_line = $stack[0][1]{'start_line'};
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+ while($treelet = shift @stack) {
+ for($i = 2; $i < @$treelet; ++$i) { # iterate over children
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+ unless($treelet->[$i][0] eq 'Z') {
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ }
+
+ DEBUG > 1 and print "Nixing Z node @{$treelet->[$i]}\n";
+
+ # bitch UNLESS it's empty
+ unless( @{$treelet->[$i]} == 2
+ or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
+ ) {
+ $self->whine( $start_line, "A non-empty Z<>" );
+ } # but kill it anyway
+
+ splice(@$treelet, $i, 1); # thereby just nix this node.
+ --$i;
+
+ }
+ }
+
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+# Quoting perlpodspec:
+
+# In parsing an L<...> code, Pod parsers must distinguish at least four
+# attributes:
+
+############# Not used. Expressed via the element children plus
+############# the value of the "content-implicit" flag.
+# First:
+# The link-text. If there is none, this must be undef. (E.g., in "L<Perl
+# Functions|perlfunc>", the link-text is "Perl Functions". In
+# "L<Time::HiRes>" and even "L<|Time::HiRes>", there is no link text. Note
+# that link text may contain formatting.)
+#
+
+############# The element children
+# Second:
+# The possibly inferred link-text -- i.e., if there was no real link text,
+# then this is the text that we'll infer in its place. (E.g., for
+# "L<Getopt::Std>", the inferred link text is "Getopt::Std".)
+#
+
+############# The "to" attribute (which might be text, or a treelet)
+# Third:
+# The name or URL, or undef if none. (E.g., in "L<Perl
+# Functions|perlfunc>", the name -- also sometimes called the page -- is
+# "perlfunc". In "L</CAVEATS>", the name is undef.)
+#
+
+############# The "section" attribute (which might be next, or a treelet)
+# Fourth:
+# The section (AKA "item" in older perlpods), or undef if none. E.g., in
+# Getopt::Std/DESCRIPTION, "DESCRIPTION" is the section. (Note that this
+# is not the same as a manpage section like the "5" in "man 5 crontab".
+# "Section Foo" in the Pod sense means the part of the text that's
+# introduced by the heading or item whose text is "Foo".)
+#
+# Pod parsers may also note additional attributes including:
+#
+
+############# The "type" attribute.
+# Fifth:
+# A flag for whether item 3 (if present) is a URL (like
+# "http://lists.perl.org" is), in which case there should be no section
+# attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or
+# possibly a man page name (like "crontab(5)" is).
+#
+
+############# Not implemented, I guess.
+# Sixth:
+# The raw original L<...> content, before text is split on "|", "/", etc,
+# and before E<...> codes are expanded.
+
+
+# For L<...> codes without a "name|" part, only E<...> and Z<> codes may
+# occur -- no other formatting codes. That is, authors should not use
+# "L<B<Foo::Bar>>".
+#
+# Note, however, that formatting codes and Z<>'s can occur in any and all
+# parts of an L<...> (i.e., in name, section, text, and url).
+
+sub _treat_Ls { # Process our dear dear friends, the L<...> sequences
+
+ # L<name>
+ # L<name/"sec"> or L<name/sec>
+ # L</"sec"> or L</sec> or L<"sec">
+ # L<text|name>
+ # L<text|name/"sec"> or L<text|name/sec>
+ # L<text|/"sec"> or L<text|/sec> or L<text|"sec">
+ # L<scheme:...>
+ # Ltext|scheme:...>
+
+ my($self,@stack) = @_;
+
+ my($i, $treelet);
+ my $start_line = $stack[0][1]{'start_line'};
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+ while($treelet = shift @stack) {
+ for(my $i = 2; $i < @$treelet; ++$i) {
+ # iterate over children of current tree node
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+ unless($treelet->[$i][0] eq 'L') {
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ }
+
+
+ # By here, $treelet->[$i] is definitely an L node
+ my $ell = $treelet->[$i];
+ DEBUG > 1 and print "Ogling L node $ell\n";
+
+ # bitch if it's empty
+ if( @{$ell} == 2
+ or (@{$ell} == 3 and $ell->[2] eq '')
+ ) {
+ $self->whine( $start_line, "An empty L<>" );
+ $treelet->[$i] = 'L<>'; # just make it a text node
+ next; # and move on
+ }
+
+ # Catch URLs:
+
+ # there are a number of possible cases:
+ # 1) text node containing url: http://foo.com
+ # -> [ 'http://foo.com' ]
+ # 2) text node containing url and text: foo|http://foo.com
+ # -> [ 'foo|http://foo.com' ]
+ # 3) text node containing url start: mailto:xE<at>foo.com
+ # -> [ 'mailto:x', [ E ... ], 'foo.com' ]
+ # 4) text node containing url start and text: foo|mailto:xE<at>foo.com
+ # -> [ 'foo|mailto:x', [ E ... ], 'foo.com' ]
+ # 5) other nodes containing text and url start: OE<39>Malley|http://foo.com
+ # -> [ 'O', [ E ... ], 'Malley', '|http://foo.com' ]
+ # ... etc.
+
+ # anything before the url is part of the text.
+ # anything after it is part of the url.
+ # the url text node itself may contain parts of both.
+
+ if (my ($url_index, $text_part, $url_part) =
+ # grep is no good here; we want to bail out immediately so that we can
+ # use $1, $2, etc. without having to do the match twice.
+ sub {
+ for (2..$#$ell) {
+ next if ref $ell->[$_];
+ next unless $ell->[$_] =~ m/^(?:([^|]*)\|)?(\w+:[^:\s]\S*)$/s;
+ return ($_, $1, $2);
+ }
+ return;
+ }->()
+ ) {
+ $ell->[1]{'type'} = 'url';
+
+ my @text = @{$ell}[2..$url_index-1];
+ push @text, $text_part if defined $text_part;
+
+ my @url = @{$ell}[$url_index+1..$#$ell];
+ unshift @url, $url_part;
+
+ unless (@text) {
+ $ell->[1]{'content-implicit'} = 'yes';
+ @text = @url;
+ }
+
+ $ell->[1]{to} = Pod::Simple::LinkSection->new(
+ @url == 1
+ ? $url[0]
+ : [ '', {}, @url ],
+ );
+
+ splice @$ell, 2, $#$ell, @text;
+
+ next;
+ }
+
+ # Catch some very simple and/or common cases
+ if(@{$ell} == 3 and ! ref $ell->[2]) {
+ my $it = $ell->[2];
+ if($it =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s) { # man sections
+ # Hopefully neither too broad nor too restrictive a RE
+ DEBUG > 1 and print "Catching \"$it\" as manpage link.\n";
+ $ell->[1]{'type'} = 'man';
+ # This's the only place where man links can get made.
+ $ell->[1]{'content-implicit'} = 'yes';
+ $ell->[1]{'to' } =
+ Pod::Simple::LinkSection->new( $it ); # treelet!
+
+ next;
+ }
+ if($it =~ m/^[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+(\:\:[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+)*$/s) {
+ # Extremely forgiving idea of what constitutes a bare
+ # modulename link like L<Foo::Bar> or even L<Thing::1.0::Docs::Tralala>
+ DEBUG > 1 and print "Catching \"$it\" as ho-hum L<Modulename> link.\n";
+ $ell->[1]{'type'} = 'pod';
+ $ell->[1]{'content-implicit'} = 'yes';
+ $ell->[1]{'to' } =
+ Pod::Simple::LinkSection->new( $it ); # treelet!
+ next;
+ }
+ # else fall thru...
+ }
+
+
+
+ # ...Uhoh, here's the real L<...> parsing stuff...
+ # "With the ill behavior, with the ill behavior, with the ill behavior..."
+
+ DEBUG > 1 and print "Running a real parse on this non-trivial L\n";
+
+
+ my $link_text; # set to an arrayref if found
+ my @ell_content = @$ell;
+ splice @ell_content,0,2; # Knock off the 'L' and {} bits
+
+ DEBUG > 3 and print " Ell content to start: ",
+ pretty(@ell_content), "\n";
+
+
+ # Look for the "|" -- only in CHILDREN (not all underlings!)
+ # Like L<I like the strictness|strict>
+ DEBUG > 3 and
+ print " Peering at L content for a '|' ...\n";
+ for(my $j = 0; $j < @ell_content; ++$j) {
+ next if ref $ell_content[$j];
+ DEBUG > 3 and
+ print " Peering at L-content text bit \"$ell_content[$j]\" for a '|'.\n";
+
+ if($ell_content[$j] =~ m/^([^\|]*)\|(.*)$/s) {
+ my @link_text = ($1); # might be 0-length
+ $ell_content[$j] = $2; # might be 0-length
+
+ DEBUG > 3 and
+ print " FOUND a '|' in it. Splitting into [$1] + [$2]\n";
+
+ unshift @link_text, splice @ell_content, 0, $j;
+ # leaving only things at J and after
+ @ell_content = grep ref($_)||length($_), @ell_content ;
+ $link_text = [grep ref($_)||length($_), @link_text ];
+ DEBUG > 3 and printf
+ " So link text is %s\n and remaining ell content is %s\n",
+ pretty($link_text), pretty(@ell_content);
+ last;
+ }
+ }
+
+
+ # Now look for the "/" -- only in CHILDREN (not all underlings!)
+ # And afterward, anything left in @ell_content will be the raw name
+ # Like L<Foo::Bar/Object Methods>
+ my $section_name; # set to arrayref if found
+ DEBUG > 3 and print " Peering at L-content for a '/' ...\n";
+ for(my $j = 0; $j < @ell_content; ++$j) {
+ next if ref $ell_content[$j];
+ DEBUG > 3 and
+ print " Peering at L-content text bit \"$ell_content[$j]\" for a '/'.\n";
+
+ if($ell_content[$j] =~ m/^([^\/]*)\/(.*)$/s) {
+ my @section_name = ($2); # might be 0-length
+ $ell_content[$j] = $1; # might be 0-length
+
+ DEBUG > 3 and
+ print " FOUND a '/' in it.",
+ " Splitting to page [...$1] + section [$2...]\n";
+
+ push @section_name, splice @ell_content, 1+$j;
+ # leaving only things before and including J
+
+ @ell_content = grep ref($_)||length($_), @ell_content ;
+ @section_name = grep ref($_)||length($_), @section_name ;
+
+ # Turn L<.../"foo"> into L<.../foo>
+ if(@section_name
+ and !ref($section_name[0]) and !ref($section_name[-1])
+ and $section_name[ 0] =~ m/^\"/s
+ and $section_name[-1] =~ m/\"$/s
+ and !( # catch weird degenerate case of L<"> !
+ @section_name == 1 and $section_name[0] eq '"'
+ )
+ ) {
+ $section_name[ 0] =~ s/^\"//s;
+ $section_name[-1] =~ s/\"$//s;
+ DEBUG > 3 and
+ print " Quotes removed: ", pretty(@section_name), "\n";
+ } else {
+ DEBUG > 3 and
+ print " No need to remove quotes in ", pretty(@section_name), "\n";
+ }
+
+ $section_name = \@section_name;
+ last;
+ }
+ }
+
+ # Turn L<"Foo Bar"> into L</Foo Bar>
+ if(!$section_name and @ell_content
+ and !ref($ell_content[0]) and !ref($ell_content[-1])
+ and $ell_content[ 0] =~ m/^\"/s
+ and $ell_content[-1] =~ m/\"$/s
+ and !( # catch weird degenerate case of L<"> !
+ @ell_content == 1 and $ell_content[0] eq '"'
+ )
+ ) {
+ $section_name = [splice @ell_content];
+ $section_name->[ 0] =~ s/^\"//s;
+ $section_name->[-1] =~ s/\"$//s;
+ }
+
+ # Turn L<Foo Bar> into L</Foo Bar>.
+ if(!$section_name and !$link_text and @ell_content
+ and grep !ref($_) && m/ /s, @ell_content
+ ) {
+ $section_name = [splice @ell_content];
+ # That's support for the now-deprecated syntax.
+ # (Maybe generate a warning eventually?)
+ # Note that it deliberately won't work on L<...|Foo Bar>
+ }
+
+
+ # Now make up the link_text
+ # L<Foo> -> L<Foo|Foo>
+ # L</Bar> -> L<"Bar"|Bar>
+ # L<Foo/Bar> -> L<"Bar" in Foo/Foo>
+ unless($link_text) {
+ $ell->[1]{'content-implicit'} = 'yes';
+ $link_text = [];
+ push @$link_text, '"', @$section_name, '"' if $section_name;
+
+ if(@ell_content) {
+ $link_text->[-1] .= ' in ' if $section_name;
+ push @$link_text, @ell_content;
+ }
+ }
+
+
+ # And the E resolver will have to deal with all our treeletty things:
+
+ if(@ell_content == 1 and !ref($ell_content[0])
+ and $ell_content[0] =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s
+ ) {
+ $ell->[1]{'type'} = 'man';
+ DEBUG > 3 and print "Considering this ($ell_content[0]) a man link.\n";
+ } else {
+ $ell->[1]{'type'} = 'pod';
+ DEBUG > 3 and print "Considering this a pod link (not man or url).\n";
+ }
+
+ if( defined $section_name ) {
+ $ell->[1]{'section'} = Pod::Simple::LinkSection->new(
+ ['', {}, @$section_name]
+ );
+ DEBUG > 3 and print "L-section content: ", pretty($ell->[1]{'section'}), "\n";
+ }
+
+ if( @ell_content ) {
+ $ell->[1]{'to'} = Pod::Simple::LinkSection->new(
+ ['', {}, @ell_content]
+ );
+ DEBUG > 3 and print "L-to content: ", pretty($ell->[1]{'to'}), "\n";
+ }
+
+ # And update children to be the link-text:
+ @$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : '');
+
+ DEBUG > 2 and print "End of L-parsing for this node $treelet->[$i]\n";
+
+ unshift @stack, $treelet->[$i]; # might as well recurse
+ }
+ }
+
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _treat_Es {
+ my($self,@stack) = @_;
+
+ my($i, $treelet, $content, $replacer, $charnum);
+ my $start_line = $stack[0][1]{'start_line'};
+
+ # A recursive algorithm implemented iteratively! Whee!
+
+
+ # Has frightening side effects on L nodes' attributes.
+
+ #my @ells_to_tweak;
+
+ while($treelet = shift @stack) {
+ for(my $i = 2; $i < @$treelet; ++$i) { # iterate over children
+ next unless ref $treelet->[$i]; # text nodes are uninteresting
+ if($treelet->[$i][0] eq 'L') {
+ # SPECIAL STUFF for semi-processed L<>'s
+
+ my $thing;
+ foreach my $attrname ('section', 'to') {
+ if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) {
+ unshift @stack, $thing;
+ DEBUG > 2 and print " Enqueuing ",
+ pretty( $treelet->[$i][1]{$attrname} ),
+ " as an attribute value to tweak.\n";
+ }
+ }
+
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ } elsif($treelet->[$i][0] ne 'E') {
+ unshift @stack, $treelet->[$i]; # recurse
+ next;
+ }
+
+ DEBUG > 1 and print "Ogling E node ", pretty($treelet->[$i]), "\n";
+
+ # bitch if it's empty
+ if( @{$treelet->[$i]} == 2
+ or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
+ ) {
+ $self->whine( $start_line, "An empty E<>" );
+ $treelet->[$i] = 'E<>'; # splice in a literal
+ next;
+ }
+
+ # bitch if content is weird
+ unless(@{$treelet->[$i]} == 3 and !ref($content = $treelet->[$i][2])) {
+ $self->whine( $start_line, "An E<...> surrounding strange content" );
+ $replacer = $treelet->[$i]; # scratch
+ splice(@$treelet, $i, 1, # fake out a literal
+ 'E<',
+ splice(@$replacer,2), # promote its content
+ '>'
+ );
+ # Don't need to do --$i, as the 'E<' we just added isn't interesting.
+ next;
+ }
+
+ DEBUG > 1 and print "Ogling E<$content>\n";
+
+ $charnum = Pod::Escapes::e2charnum($content);
+ DEBUG > 1 and print " Considering E<$content> with char ",
+ defined($charnum) ? $charnum : "undef", ".\n";
+
+ if(!defined( $charnum )) {
+ DEBUG > 1 and print "I don't know how to deal with E<$content>.\n";
+ $self->whine( $start_line, "Unknown E content in E<$content>" );
+ $replacer = "E<$content>"; # better than nothing
+ } elsif($charnum >= 255 and !UNICODE) {
+ $replacer = ASCII ? "\xA4" : "?";
+ DEBUG > 1 and print "This Perl version can't handle ",
+ "E<$content> (chr $charnum), so replacing with $replacer\n";
+ } else {
+ $replacer = Pod::Escapes::e2char($content);
+ DEBUG > 1 and print " Replacing E<$content> with $replacer\n";
+ }
+
+ splice(@$treelet, $i, 1, $replacer); # no need to back up $i, tho
+ }
+ }
+
+ return;
+}
+
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _treat_Ss {
+ my($self,$treelet) = @_;
+
+ _change_S_to_nbsp($treelet,0) if $self->{'nbsp_for_S'};
+
+ # TODO: or a change_nbsp_to_S
+ # Normalizing nbsp's to S is harder: for each text node, make S content
+ # out of anything matching m/([^ \xA0]*(?:\xA0+[^ \xA0]*)+)/
+
+
+ return;
+}
+
+
+sub _change_S_to_nbsp { # a recursive function
+ # Sanely assumes that the top node in the excursion won't be an S node.
+ my($treelet, $in_s) = @_;
+
+ my $is_s = ('S' eq $treelet->[0]);
+ $in_s ||= $is_s; # So in_s is on either by this being an S element,
+ # or by an ancestor being an S element.
+
+ for(my $i = 2; $i < @$treelet; ++$i) {
+ if(ref $treelet->[$i]) {
+ if( _change_S_to_nbsp( $treelet->[$i], $in_s ) ) {
+ my $to_pull_up = $treelet->[$i];
+ splice @$to_pull_up,0,2; # ...leaving just its content
+ splice @$treelet, $i, 1, @$to_pull_up; # Pull up content
+ $i += @$to_pull_up - 1; # Make $i skip the pulled-up stuff
+ }
+ } else {
+ $treelet->[$i] =~ s/\s/\xA0/g if ASCII and $in_s;
+ # (If not in ASCIIland, we can't assume that \xA0 == nbsp.)
+
+ # Note that if you apply nbsp_for_S to text, and so turn
+ # "foo S<bar baz> quux" into "foo bar&#160;faz quux", you
+ # end up with something that fails to say "and don't hyphenate
+ # any part of 'bar baz'". However, hyphenation is such a vexing
+ # problem anyway, that most Pod renderers just don't render it
+ # at all. But if you do want to implement hyphenation, I guess
+ # that you'd better have nbsp_for_S off.
+ }
+ }
+
+ return $is_s;
+}
+
+#-----------------------------------------------------------------------------
+
+sub _accessorize { # A simple-minded method-maker
+ no strict 'refs';
+ foreach my $attrname (@_) {
+ next if $attrname =~ m/::/; # a hack
+ *{caller() . '::' . $attrname} = sub {
+ use strict;
+ $Carp::CarpLevel = 1, Carp::croak(
+ "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
+ ) unless (@_ == 1 or @_ == 2) and ref $_[0];
+ (@_ == 1) ? $_[0]->{$attrname}
+ : ($_[0]->{$attrname} = $_[1]);
+ };
+ }
+ # Ya know, they say accessories make the ensemble!
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+#=============================================================================
+
+sub filter {
+ my($class, $source) = @_;
+ my $new = $class->new;
+ $new->output_fh(*STDOUT{IO});
+
+ if(ref($source || '') eq 'SCALAR') {
+ $new->parse_string_document( $$source );
+ } elsif(ref($source)) { # it's a file handle
+ $new->parse_file($source);
+ } else { # it's a filename
+ $new->parse_file($source);
+ }
+
+ return $new;
+}
+
+
+#-----------------------------------------------------------------------------
+
+sub _out {
+ # For use in testing: Class->_out($source)
+ # returns the transformation of $source
+
+ my $class = shift(@_);
+
+ my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE';
+
+ DEBUG and print "\n\n", '#' x 76,
+ "\nAbout to parse source: {{\n$_[0]\n}}\n\n";
+
+
+ my $parser = ref $class && $class->isa(__PACKAGE__) ? $class : $class->new;
+ $parser->hide_line_numbers(1);
+
+ my $out = '';
+ $parser->output_string( \$out );
+ DEBUG and print " _out to ", \$out, "\n";
+
+ $mutor->($parser) if $mutor;
+
+ $parser->parse_string_document( $_[0] );
+ # use Data::Dumper; print Dumper($parser), "\n";
+ return $out;
+}
+
+
+sub _duo {
+ # For use in testing: Class->_duo($source1, $source2)
+ # returns the parse trees of $source1 and $source2.
+ # Good in things like: &ok( Class->duo(... , ...) );
+
+ my $class = shift(@_);
+
+ Carp::croak "But $class->_duo is useful only in list context!"
+ unless wantarray;
+
+ my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE';
+
+ Carp::croak "But $class->_duo takes two parameters, not: @_"
+ unless @_ == 2;
+
+ my(@out);
+
+ while( @_ ) {
+ my $parser = $class->new;
+
+ push @out, '';
+ $parser->output_string( \( $out[-1] ) );
+
+ DEBUG and print " _duo out to ", $parser->output_string(),
+ " = $parser->{'output_string'}\n";
+
+ $parser->hide_line_numbers(1);
+ $mutor->($parser) if $mutor;
+ $parser->parse_string_document( shift( @_ ) );
+ # use Data::Dumper; print Dumper($parser), "\n";
+ }
+
+ return @out;
+}
+
+
+
+#-----------------------------------------------------------------------------
+1;
+__END__
+
+TODO:
+A start_formatting_code and end_formatting_code methods, which in the
+base class call start_L, end_L, start_C, end_C, etc., if they are
+defined.
+
+have the POD FORMATTING ERRORS section note the localtime, and the
+version of Pod::Simple.
+
+option to delete all E<shy>s?
+option to scream if under-0x20 literals are found in the input, or
+under-E<32> E codes are found in the tree. And ditto \x7f-\x9f
+
+Option to turn highbit characters into their compromised form? (applies
+to E parsing too)
+
+TODO: BOM/encoding things.
+
+TODO: ascii-compat things in the XML classes?
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple.pod b/ext/Pod-Simple/lib/Pod/Simple.pod
new file mode 100644
index 0000000000..a58217336a
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple.pod
@@ -0,0 +1,226 @@
+
+=head1 NAME
+
+Pod::Simple - framework for parsing Pod
+
+=head1 SYNOPSIS
+
+ TODO
+
+=head1 DESCRIPTION
+
+Pod::Simple is a Perl library for parsing text in the Pod ("plain old
+documentation") markup language that is typically used for writing
+documentation for Perl and for Perl modules. The Pod format is explained
+in the L<perlpod|perlpod> man page; the most common formatter is called
+"perldoc".
+
+Pod formatters can use Pod::Simple to parse Pod documents into produce
+renderings of them in plain ASCII, in HTML, or in any number of other
+formats. Typically, such formatters will be subclasses of Pod::Simple,
+and so they will inherit its methods, like C<parse_file>.
+
+If you're reading this document just because you have a Pod-processing
+subclass that you want to use, this document (plus the documentation for
+the subclass) is probably all you'll need to read.
+
+If you're reading this document because you want to write a formatter
+subclass, continue reading this document, and then read
+L<Pod::Simple::Subclassing>, and then possibly even read L<perlpodspec>
+(some of which is for parser-writers, but much of which is notes to
+formatter-writers).
+
+
+=head1 MAIN METHODS
+
+
+
+=over
+
+=item C<< $parser = I<SomeClass>->new(); >>
+
+This returns a new parser object, where I<C<SomeClass>> is a subclass
+of Pod::Simple.
+
+=item C<< $parser->output_fh( *OUT ); >>
+
+This sets the filehandle that C<$parser>'s output will be written to.
+You can pass C<*STDOUT>, otherwise you should probably do something
+like this:
+
+ my $outfile = "output.txt";
+ open TXTOUT, ">$outfile" or die "Can't write to $outfile: $!";
+ $parser->output_fh(*TXTOUT);
+
+...before you call one of the C<< $parser->parse_I<whatever> >> methods.
+
+=item C<< $parser->output_string( \$somestring ); >>
+
+This sets the string that C<$parser>'s output will be sent to,
+instead of any filehandle.
+
+
+=item C<< $parser->parse_file( I<$some_filename> ); >>
+
+=item C<< $parser->parse_file( *INPUT_FH ); >>
+
+This reads the Pod content of the file (or filehandle) that you specify,
+and processes it with that C<$parser> object, according to however
+C<$parser>'s class works, and according to whatever parser options you
+have set up for this C<$parser> object.
+
+=item C<< $parser->parse_string_document( I<$all_content> ); >>
+
+This works just like C<parse_file> except that it reads the Pod
+content not from a file, but from a string that you have already
+in memory.
+
+=item C<< $parser->parse_lines( I<...@lines...>, undef ); >>
+
+This processes the lines in C<@lines> (where each list item must be a
+defined value, and must contain exactly one line of content -- so no
+items like C<"foo\nbar"> are allowed). The final C<undef> is used to
+indicate the end of document being parsed.
+
+The other C<parser_I<whatever>> methods are meant to be called only once
+per C<$parser> object; but C<parse_lines> can be called as many times per
+C<$parser> object as you want, as long as the last call (and only
+the last call) ends with an C<undef> value.
+
+
+=item C<< $parser->content_seen >>
+
+This returns true only if there has been any real content seen
+for this document.
+
+
+=item C<< I<SomeClass>->filter( I<$filename> ); >>
+
+=item C<< I<SomeClass>->filter( I<*INPUT_FH> ); >>
+
+=item C<< I<SomeClass>->filter( I<\$document_content> ); >>
+
+This is a shortcut method for creating a new parser object, setting the
+output handle to STDOUT, and then processing the specified file (or
+filehandle, or in-memory document). This is handy for one-liners like
+this:
+
+ perl -MPod::Simple::Text -e "Pod::Simple::Text->filter('thingy.pod')"
+
+=back
+
+
+
+=head1 SECONDARY METHODS
+
+Some of these methods might be of interest to general users, as
+well as of interest to formatter-writers.
+
+Note that the general pattern here is that the accessor-methods
+read the attribute's value with C<< $value = $parser->I<attribute> >>
+and set the attribute's value with
+C<< $parser->I<attribute>(I<newvalue>) >>. For each accessor, I typically
+only mention one syntax or another, based on which I think you are actually
+most likely to use.
+
+
+=over
+
+=item C<< $parser->no_whining( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, you will suppress the
+parser's complaints about irregularities in the Pod coding. By default,
+this attribute's value is false, meaning that irregularities will
+be reported.
+
+Note that turning this attribute to true won't suppress one or two kinds
+of complaints about rarely occurring unrecoverable errors.
+
+
+=item C<< $parser->no_errata_section( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, you will stop the parser from
+generating a "POD ERRORS" section at the end of the document. By
+default, this attribute's value is false, meaning that an errata section
+will be generated, as necessary.
+
+
+=item C<< $parser->complain_stderr( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, it will send reports of
+parsing errors to STDERR. By default, this attribute's value is false,
+meaning that no output is sent to STDERR.
+
+Note that errors can be noted in an errata section, or sent to STDERR,
+or both, or neither. So don't think that turning on C<complain_stderr>
+will turn off C<no_errata_section> or vice versa -- these are
+independent attributes.
+
+
+=item C<< $parser->source_filename >>
+
+This returns the filename that this parser object was set to read from.
+
+
+=item C<< $parser->doc_has_started >>
+
+This returns true if C<$parser> has read from a source, and has seen
+Pod content in it.
+
+
+=item C<< $parser->source_dead >>
+
+This returns true if C<$parser> has read from a source, and come to the
+end of that source.
+
+=back
+
+
+=head1 CAVEATS
+
+This is just a beta release -- there are a good number of things still
+left to do. Notably, support for EBCDIC platforms is still half-done,
+an untested.
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple::Subclassing>
+
+L<perlpod|perlpod>
+
+L<perlpodspec|perlpodspec>
+
+L<Pod::Escapes|Pod::Escapes>
+
+L<perldoc>
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Original author: Sean M. Burke C<sburke@cpan.org>
+
+Maintained by:
+
+=over
+
+=item * Allison Randal C<allison@perl.org>
+
+=item * Hans Dieter Pearcey C<hdp@cpan.org>
+
+=back
+
+=cut
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/BlackBox.pm b/ext/Pod-Simple/lib/Pod/Simple/BlackBox.pm
new file mode 100644
index 0000000000..4804973a2e
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/BlackBox.pm
@@ -0,0 +1,1919 @@
+
+package Pod::Simple::BlackBox;
+#
+# "What's in the box?" "Pain."
+#
+###########################################################################
+#
+# This is where all the scary things happen: parsing lines into
+# paragraphs; and then into directives, verbatims, and then also
+# turning formatting sequences into treelets.
+#
+# Are you really sure you want to read this code?
+#
+#-----------------------------------------------------------------------------
+#
+# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
+# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
+# to call the proper callbacks on the treelets.
+#
+# Every node in a treelet is a ['name', {attrhash}, ...children...]
+
+use integer; # vroom!
+use strict;
+use Carp ();
+BEGIN {
+ require Pod::Simple;
+ *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub parse_line { shift->parse_lines(@_) } # alias
+
+# - - - Turn back now! Run away! - - -
+
+sub parse_lines { # Usage: $parser->parse_lines(@lines)
+ # an undef means end-of-stream
+ my $self = shift;
+
+ my $code_handler = $self->{'code_handler'};
+ my $cut_handler = $self->{'cut_handler'};
+ $self->{'line_count'} ||= 0;
+
+ my $scratch;
+
+ DEBUG > 4 and
+ print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
+
+ DEBUG > 5 and
+ print "# About to parse lines: ",
+ join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
+
+ my $paras = ($self->{'paras'} ||= []);
+ # paragraph buffer. Because we need to defer processing of =over
+ # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
+ # to process this.
+
+ $self->{'pod_para_count'} ||= 0;
+
+ my $line;
+ foreach my $source_line (@_) {
+ if( $self->{'source_dead'} ) {
+ DEBUG > 4 and print "# Source is dead.\n";
+ last;
+ }
+
+ unless( defined $source_line ) {
+ DEBUG > 4 and print "# Undef-line seen.\n";
+
+ push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
+ push @$paras, $paras->[-1], $paras->[-1];
+ # So that it definitely fills the buffer.
+ $self->{'source_dead'} = 1;
+ $self->_ponder_paragraph_buffer;
+ next;
+ }
+
+
+ if( $self->{'line_count'}++ ) {
+ ($line = $source_line) =~ tr/\n\r//d;
+ # If we don't have two vars, we'll end up with that there
+ # tr/// modding the (potentially read-only) original source line!
+
+ } else {
+ DEBUG > 2 and print "First line: [$source_line]\n";
+
+ if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
+ DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n";
+ $self->_handle_encoding_line( "=encode utf8" );
+ $line =~ tr/\n\r//d;
+
+ } elsif( $line =~ s/^\xFE\xFF//s ) {
+ DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
+ $self->scream(
+ $self->{'line_count'},
+ "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
+ );
+ splice @_;
+ push @_, undef;
+ next;
+
+ # TODO: implement somehow?
+
+ } elsif( $line =~ s/^\xFF\xFE//s ) {
+ DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
+ $self->scream(
+ $self->{'line_count'},
+ "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
+ );
+ splice @_;
+ push @_, undef;
+ next;
+
+ # TODO: implement somehow?
+
+ } else {
+ DEBUG > 2 and print "First line is BOM-less.\n";
+ ($line = $source_line) =~ tr/\n\r//d;
+ }
+ }
+
+
+ DEBUG > 5 and print "# Parsing line: [$line]\n";
+
+ if(!$self->{'in_pod'}) {
+ if($line =~ m/^=([a-zA-Z]+)/s) {
+ if($1 eq 'cut') {
+ $self->scream(
+ $self->{'line_count'},
+ "=cut found outside a pod block. Skipping to next block."
+ );
+
+ ## Before there were errata sections in the world, it was
+ ## least-pessimal to abort processing the file. But now we can
+ ## just barrel on thru (but still not start a pod block).
+ #splice @_;
+ #push @_, undef;
+
+ next;
+ } else {
+ $self->{'in_pod'} = $self->{'start_of_pod_block'}
+ = $self->{'last_was_blank'} = 1;
+ # And fall thru to the pod-mode block further down
+ }
+ } else {
+ DEBUG > 5 and print "# It's a code-line.\n";
+ $code_handler->(map $_, $line, $self->{'line_count'}, $self)
+ if $code_handler;
+ # Note: this may cause code to be processed out of order relative
+ # to pods, but in order relative to cuts.
+
+ # Note also that we haven't yet applied the transcoding to $line
+ # by time we call $code_handler!
+
+ if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
+ # That RE is from perlsyn, section "Plain Old Comments (Not!)",
+ #$fname = $2 if defined $2;
+ #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
+ DEBUG > 1 and print "# Setting nextline to $1\n";
+ $self->{'line_count'} = $1 - 1;
+ }
+
+ next;
+ }
+ }
+
+ # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ # Else we're in pod mode:
+
+ # Apply any necessary transcoding:
+ $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
+
+ # HERE WE CATCH =encoding EARLY!
+ if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
+ $line = $self->_handle_encoding_line( $line );
+ }
+
+ if($line =~ m/^=cut/s) {
+ # here ends the pod block, and therefore the previous pod para
+ DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
+ $self->{'in_pod'} = 0;
+ # ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+ $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
+ if $cut_handler;
+
+ # TODO: add to docs: Note: this may cause cuts to be processed out
+ # of order relative to pods, but in order relative to code.
+
+ } elsif($line =~ m/^\s*$/s) { # it's a blank line
+ if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
+ DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
+ push @{$paras->[-1]}, $line;
+ } # otherwise it's not interesting
+
+ if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
+ DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
+ }
+
+ $self->{'last_was_blank'} = 1;
+
+ } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
+
+ if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
+ # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
+ my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
+ # Note that in "=head1 foo", the WS is lost.
+ # Example: ['=head1', {'start_line' => 123}, ' foo']
+
+ ++$self->{'pod_para_count'};
+
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+
+ push @$paras, $new; # the new incipient paragraph
+ DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
+
+ } elsif($line =~ m/^\s/s) {
+
+ if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
+ DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
+ push @{$paras->[-1]}, $line;
+ } else {
+ ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+ DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
+ push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
+ }
+ } else {
+ ++$self->{'pod_para_count'};
+ $self->_ponder_paragraph_buffer();
+ # by now it's safe to consider the previous paragraph as done.
+ push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
+ DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
+ }
+ $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
+
+ } else {
+ # It's a non-blank line /continuing/ the current para
+ if(@$paras) {
+ DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
+ push @{$paras->[-1]}, $line;
+ } else {
+ # Unexpected case!
+ die "Continuing a paragraph but \@\$paras is empty?";
+ }
+ $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
+ }
+
+ } # ends the big while loop
+
+ DEBUG > 1 and print(pretty(@$paras), "\n");
+ return $self;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _handle_encoding_line {
+ my($self, $line) = @_;
+
+ # The point of this routine is to set $self->{'_transcoder'} as indicated.
+
+ return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
+ DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
+
+ my $e = $1;
+ my $orig = $e;
+ push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
+
+ my $enc_error;
+
+ # Cf. perldoc Encode and perldoc Encode::Supported
+
+ require Pod::Simple::Transcode;
+
+ if( $self->{'encoding'} ) {
+ my $norm_current = $self->{'encoding'};
+ my $norm_e = $e;
+ foreach my $that ($norm_current, $norm_e) {
+ $that = lc($that);
+ $that =~ s/[-_]//g;
+ }
+ if($norm_current eq $norm_e) {
+ DEBUG > 1 and print "The '=encoding $orig' line is ",
+ "redundant. ($norm_current eq $norm_e). Ignoring.\n";
+ $enc_error = '';
+ # But that doesn't necessarily mean that the earlier one went okay
+ } else {
+ $enc_error = "Encoding is already set to " . $self->{'encoding'};
+ DEBUG > 1 and print $enc_error;
+ }
+ } elsif (
+ # OK, let's turn on the encoding
+ do {
+ DEBUG > 1 and print " Setting encoding to $e\n";
+ $self->{'encoding'} = $e;
+ 1;
+ }
+ and $e eq 'HACKRAW'
+ ) {
+ DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
+
+ } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
+
+ die($enc_error = "WHAT? _transcoder is already set?!")
+ if $self->{'_transcoder'}; # should never happen
+ require Pod::Simple::Transcode;
+ $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
+ eval {
+ my @x = ('', "abc", "123");
+ $self->{'_transcoder'}->(@x);
+ };
+ $@ && die( $enc_error =
+ "Really unexpected error setting up encoding $e: $@\nAborting"
+ );
+
+ } else {
+ my @supported = Pod::Simple::Transcode::->all_encodings;
+
+ # Note unsupported, and complain
+ DEBUG and print " Encoding [$e] is unsupported.",
+ "\nSupporteds: @supported\n";
+ my $suggestion = '';
+
+ # Look for a near match:
+ my $norm = lc($e);
+ $norm =~ tr[-_][]d;
+ my $n;
+ foreach my $enc (@supported) {
+ $n = lc($enc);
+ $n =~ tr[-_][]d;
+ next unless $n eq $norm;
+ $suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
+ last;
+ }
+ my $encmodver = Pod::Simple::Transcode::->encmodver;
+ $enc_error = join '' =>
+ "This document probably does not appear as it should, because its ",
+ "\"=encoding $e\" line calls for an unsupported encoding.",
+ $suggestion, " [$encmodver\'s supported encodings are: @supported]"
+ ;
+
+ $self->scream( $self->{'line_count'}, $enc_error );
+ }
+ push @{ $self->{'encoding_command_statuses'} }, $enc_error;
+
+ return '=encoding ALREADYDONE';
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _handle_encoding_second_level {
+ # By time this is called, the encoding (if well formed) will already
+ # have been acted one.
+ my($self, $para) = @_;
+ my @x = @$para;
+ my $content = join ' ', splice @x, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+
+ DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
+
+ if($content eq 'ALREADYDONE') {
+ # It's already been handled. Check for errors.
+ if(! $self->{'encoding_command_statuses'} ) {
+ DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
+ } elsif( $self->{'encoding_command_statuses'}[-1] ) {
+ $self->whine( $para->[1]{'start_line'},
+ sprintf "Couldn't do %s: %s",
+ $self->{'encoding_command_reqs' }[-1],
+ $self->{'encoding_command_statuses'}[-1],
+ );
+ } else {
+ DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
+ }
+
+ } else {
+ # Otherwise it's a syntax error
+ $self->whine( $para->[1]{'start_line'},
+ "Invalid =encoding syntax: $content"
+ );
+ }
+
+ return;
+}
+
+#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
+
+{
+my $m = -321; # magic line number
+
+sub _gen_errata {
+ my $self = $_[0];
+ # Return 0 or more fake-o paragraphs explaining the accumulated
+ # errors on this document.
+
+ return() unless $self->{'errata'} and keys %{$self->{'errata'}};
+
+ my @out;
+
+ foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
+ push @out,
+ ['=item', {'start_line' => $m}, "Around line $line:"],
+ map( ['~Para', {'start_line' => $m, '~cooked' => 1},
+ #['~Top', {'start_line' => $m},
+ $_
+ #]
+ ],
+ @{$self->{'errata'}{$line}}
+ )
+ ;
+ }
+
+ # TODO: report of unknown entities? unrenderable characters?
+
+ unshift @out,
+ ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
+ ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
+ "Hey! ",
+ ['B', {},
+ 'The above document had some coding errors, which are explained below:'
+ ]
+ ],
+ ['=over', {'start_line' => $m, 'errata' => 1}, ''],
+ ;
+
+ push @out,
+ ['=back', {'start_line' => $m, 'errata' => 1}, ''],
+ ;
+
+ DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
+
+ return @out;
+}
+
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+##############################################################################
+##
+## stop reading now stop reading now stop reading now stop reading now stop
+##
+## HERE IT BECOMES REALLY SCARY
+##
+## stop reading now stop reading now stop reading now stop reading now stop
+##
+##############################################################################
+
+sub _ponder_paragraph_buffer {
+
+ # Para-token types as found in the buffer.
+ # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
+ # =over, =back, =item
+ # and the null =pod (to be complained about if over one line)
+ #
+ # "~data" paragraphs are something we generate at this level, depending on
+ # a currently open =over region
+
+ # Events fired: Begin and end for:
+ # directivename (like head1 .. head4), item, extend,
+ # for (from =begin...=end, =for),
+ # over-bullet, over-number, over-text, over-block,
+ # item-bullet, item-number, item-text,
+ # Document,
+ # Data, Para, Verbatim
+ # B, C, longdirname (TODO -- wha?), etc. for all directives
+ #
+
+ my $self = $_[0];
+ my $paras;
+ return unless @{$paras = $self->{'paras'}};
+ my $curr_open = ($self->{'curr_open'} ||= []);
+
+ my $scratch;
+
+ DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
+
+ # We have something in our buffer. So apparently the document has started.
+ unless($self->{'doc_has_started'}) {
+ $self->{'doc_has_started'} = 1;
+
+ my $starting_contentless;
+ $starting_contentless =
+ (
+ !@$curr_open
+ and @$paras and ! grep $_->[0] ne '~end', @$paras
+ # i.e., if the paras is all ~ends
+ )
+ ;
+ DEBUG and print "# Starting ",
+ $starting_contentless ? 'contentless' : 'contentful',
+ " document\n"
+ ;
+
+ $self->_handle_element_start(
+ ($scratch = 'Document'),
+ {
+ 'start_line' => $paras->[0][1]{'start_line'},
+ $starting_contentless ? ( 'contentless' => 1 ) : (),
+ },
+ );
+ }
+
+ my($para, $para_type);
+ while(@$paras) {
+ last if @$paras == 1 and
+ ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
+ or $paras->[0][0] eq '=item' )
+ ;
+ # Those're the three kinds of paragraphs that require lookahead.
+ # Actually, an "=item Foo" inside an <over type=text> region
+ # and any =item inside an <over type=block> region (rare)
+ # don't require any lookahead, but all others (bullets
+ # and numbers) do.
+
+# TODO: winge about many kinds of directives in non-resolving =for regions?
+# TODO: many? like what? =head1 etc?
+
+ $para = shift @$paras;
+ $para_type = $para->[0];
+
+ DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
+ $self->_dump_curr_open(), ")\n";
+
+ if($para_type eq '=for') {
+ next if $self->_ponder_for($para,$curr_open,$paras);
+
+ } elsif($para_type eq '=begin') {
+ next if $self->_ponder_begin($para,$curr_open,$paras);
+
+ } elsif($para_type eq '=end') {
+ next if $self->_ponder_end($para,$curr_open,$paras);
+
+ } elsif($para_type eq '~end') { # The virtual end-document signal
+ next if $self->_ponder_doc_end($para,$curr_open,$paras);
+ }
+
+
+ # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+ #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and
+ print "Skipping $para_type paragraph because in ignore mode.\n";
+ next;
+ }
+ #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+ # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+
+ if($para_type eq '=pod') {
+ $self->_ponder_pod($para,$curr_open,$paras);
+
+ } elsif($para_type eq '=over') {
+ next if $self->_ponder_over($para,$curr_open,$paras);
+
+ } elsif($para_type eq '=back') {
+ next if $self->_ponder_back($para,$curr_open,$paras);
+
+ } else {
+
+ # All non-magical codes!!!
+
+ # Here we start using $para_type for our own twisted purposes, to
+ # mean how it should get treated, not as what the element name
+ # should be.
+
+ DEBUG > 1 and print "Pondering non-magical $para_type\n";
+
+ my $i;
+
+ # Enforce some =headN discipline
+ if($para_type =~ m/^=head\d$/s
+ and ! $self->{'accept_heads_anywhere'}
+ and @$curr_open
+ and $curr_open->[-1][0] eq '=over'
+ ) {
+ DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You forgot a '=back' before '$para_type'"
+ );
+ unshift @$paras, ['=back', {}, ''], $para; # close the =over
+ next;
+ }
+
+
+ if($para_type eq '=item') {
+
+ my $over;
+ unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=item' outside of any '=over'"
+ );
+ unshift @$paras,
+ ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
+ $para
+ ;
+ next;
+ }
+
+
+ my $over_type = $over->[1]{'~type'};
+
+ if(!$over_type) {
+ # Shouldn't happen1
+ die "Typeless over in stack, starting at line "
+ . $over->[1]{'start_line'};
+
+ } elsif($over_type eq 'block') {
+ unless($curr_open->[-1][1]{'~bitched_about'}) {
+ $curr_open->[-1][1]{'~bitched_about'} = 1;
+ $self->whine(
+ $curr_open->[-1][1]{'start_line'},
+ "You can't have =items (as at line "
+ . $para->[1]{'start_line'}
+ . ") unless the first thing after the =over is an =item"
+ );
+ }
+ # Just turn it into a paragraph and reconsider it
+ $para->[0] = '~Para';
+ unshift @$paras, $para;
+ next;
+
+ } elsif($over_type eq 'text') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ if($item_type eq 'text') {
+ # Nothing special needs doing for 'text'
+ } elsif($item_type eq 'number' or $item_type eq 'bullet') {
+ die "Unknown item type $item_type"
+ unless $item_type eq 'number' or $item_type eq 'bullet';
+ # Undo our clobbering:
+ push @$para, $para->[1]{'~orig_content'};
+ delete $para->[1]{'number'};
+ # Only a PROPER item-number element is allowed
+ # to have a number attribute.
+ } else {
+ die "Unhandled item type $item_type"; # should never happen
+ }
+
+ # =item-text thingies don't need any assimilation, it seems.
+
+ } elsif($over_type eq 'number') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
+
+ if($item_type eq 'bullet') {
+ # Hm, it's not numeric. Correct for this.
+ $para->[1]{'number'} = $expected_value;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item $expected_value'"
+ );
+ push @$para, $para->[1]{'~orig_content'};
+ # restore the bullet, blocking the assimilation of next para
+
+ } elsif($item_type eq 'text') {
+ # Hm, it's not numeric. Correct for this.
+ $para->[1]{'number'} = $expected_value;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item $expected_value'"
+ );
+ # Text content will still be there and will block next ~Para
+
+ } elsif($item_type ne 'number') {
+ die "Unknown item type $item_type"; # should never happen
+
+ } elsif($expected_value == $para->[1]{'number'}) {
+ DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
+
+ } else {
+ DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
+ " instead of the expected value of $expected_value\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You have '=item " . $para->[1]{'number'} .
+ "' instead of the expected '=item $expected_value'"
+ );
+ $para->[1]{'number'} = $expected_value; # correcting!!
+ }
+
+ if(@$para == 2) {
+ # For the cases where we /didn't/ push to @$para
+ if($paras->[0][0] eq '~Para') {
+ DEBUG and print "Assimilating following ~Para content into $over_type item\n";
+ push @$para, splice @{shift @$paras},2;
+ } else {
+ DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
+ push @$para, ''; # Just so it's not contentless
+ }
+ }
+
+
+ } elsif($over_type eq 'bullet') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ if($item_type eq 'bullet') {
+ # as expected!
+
+ if( $para->[1]{'~_freaky_para_hack'} ) {
+ DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
+ push @$para, delete $para->[1]{'~_freaky_para_hack'};
+ }
+
+ } elsif($item_type eq 'number') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item *'"
+ );
+ push @$para, $para->[1]{'~orig_content'};
+ # and block assimilation of the next paragraph
+ delete $para->[1]{'number'};
+ # Only a PROPER item-number element is allowed
+ # to have a number attribute.
+ } elsif($item_type eq 'text') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item *'"
+ );
+ # But doesn't need processing. But it'll block assimilation
+ # of the next para.
+ } else {
+ die "Unhandled item type $item_type"; # should never happen
+ }
+
+ if(@$para == 2) {
+ # For the cases where we /didn't/ push to @$para
+ if($paras->[0][0] eq '~Para') {
+ DEBUG and print "Assimilating following ~Para content into $over_type item\n";
+ push @$para, splice @{shift @$paras},2;
+ } else {
+ DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
+ push @$para, ''; # Just so it's not contentless
+ }
+ }
+
+ } else {
+ die "Unhandled =over type \"$over_type\"?";
+ # Shouldn't happen!
+ }
+
+ $para_type = 'Plain';
+ $para->[0] .= '-' . $over_type;
+ # Whew. Now fall thru and process it.
+
+
+ } elsif($para_type eq '=extend') {
+ # Well, might as well implement it here.
+ $self->_ponder_extend($para);
+ next; # and skip
+ } elsif($para_type eq '=encoding') {
+ # Not actually acted on here, but we catch errors here.
+ $self->_handle_encoding_second_level($para);
+
+ next; # and skip
+ } elsif($para_type eq '~Verbatim') {
+ $para->[0] = 'Verbatim';
+ $para_type = '?Verbatim';
+ } elsif($para_type eq '~Para') {
+ $para->[0] = 'Para';
+ $para_type = '?Plain';
+ } elsif($para_type eq 'Data') {
+ $para->[0] = 'Data';
+ $para_type = '?Data';
+ } elsif( $para_type =~ s/^=//s
+ and defined( $para_type = $self->{'accept_directives'}{$para_type} )
+ ) {
+ DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
+ } else {
+ # An unknown directive!
+ DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
+ $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
+ ;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Unknown directive: $para->[0]"
+ );
+
+ # And maybe treat it as text instead of just letting it go?
+ next;
+ }
+
+ if($para_type =~ s/^\?//s) {
+ if(! @$curr_open) { # usual case
+ DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
+ } else {
+ my @fors = grep $_->[0] eq '=for', @$curr_open;
+ DEBUG > 1 and print "Containing fors: ",
+ join(',', map $_->[1]{'target'}, @fors), "\n";
+
+ if(! @fors) {
+ DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
+
+ #} elsif(grep $_->[1]{'~resolve'}, @fors) {
+ #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
+ } elsif( $fors[-1][1]{'~resolve'} ) {
+ # Look to the immediately containing for
+
+ if($para_type eq 'Data') {
+ DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
+ $para->[0] = 'Para';
+ $para_type = 'Plain';
+ } else {
+ DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
+ }
+ } else {
+ DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
+ $para->[0] = $para_type = 'Data';
+ }
+ }
+ }
+
+ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ if($para_type eq 'Plain') {
+ $self->_ponder_Plain($para);
+ } elsif($para_type eq 'Verbatim') {
+ $self->_ponder_Verbatim($para);
+ } elsif($para_type eq 'Data') {
+ $self->_ponder_Data($para);
+ } else {
+ die "\$para type is $para_type -- how did that happen?";
+ # Shouldn't happen.
+ }
+
+ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ $para->[0] =~ s/^[~=]//s;
+
+ DEBUG and print "\n", pretty($para), "\n";
+
+ # traverse the treelet (which might well be just one string scalar)
+ $self->{'content_seen'} ||= 1;
+ $self->_traverse_treelet_bit(@$para);
+ }
+ }
+
+ return;
+}
+
+###########################################################################
+# The sub-ponderers...
+
+
+
+sub _ponder_for {
+ my ($self,$para,$curr_open,$paras) = @_;
+
+ # Fake it out as a begin/end
+ my $target;
+
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Ignoring ignorable =for\n";
+ return 1;
+ }
+
+ for(my $i = 2; $i < @$para; ++$i) {
+ if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
+ $target = $1;
+ last;
+ }
+ }
+ unless(defined $target) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=for without a target?"
+ );
+ return 1;
+ }
+ DEBUG > 1 and
+ print "Faking out a =for $target as a =begin $target / =end $target\n";
+
+ $para->[0] = 'Data';
+
+ unshift @$paras,
+ ['=begin',
+ {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
+ $target,
+ ],
+ $para,
+ ['=end',
+ {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
+ $target,
+ ],
+ ;
+
+ return 1;
+}
+
+sub _ponder_begin {
+ my ($self,$para,$curr_open,$paras) = @_;
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+ unless(length($content)) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=begin without a target?"
+ );
+ DEBUG and print "Ignoring targetless =begin\n";
+ return 1;
+ }
+
+ my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
+ $para->[1]{'title'} = $title if ($title);
+ $para->[1]{'target'} = $target; # without any ':'
+ $content = $target; # strip off the title
+
+ $content =~ s/^:!/!:/s;
+ my $neg; # whether this is a negation-match
+ $neg = 1 if $content =~ s/^!//s;
+ my $to_resolve; # whether to process formatting codes
+ $to_resolve = 1 if $content =~ s/^://s;
+
+ my $dont_ignore; # whether this target matches us
+
+ foreach my $target_name (
+ split(',', $content, -1),
+ $neg ? () : '*'
+ ) {
+ DEBUG > 2 and
+ print " Considering whether =begin $content matches $target_name\n";
+ next unless $self->{'accept_targets'}{$target_name};
+
+ DEBUG > 2 and
+ print " It DOES match the acceptable target $target_name!\n";
+ $to_resolve = 1
+ if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
+ $dont_ignore = 1;
+ $para->[1]{'target_matching'} = $target_name;
+ last; # stop looking at other target names
+ }
+
+ if($neg) {
+ if( $dont_ignore ) {
+ $dont_ignore = '';
+ delete $para->[1]{'target_matching'};
+ DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
+ } else {
+ $dont_ignore = 1;
+ $para->[1]{'target_matching'} = '!';
+ DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
+ }
+ }
+
+ $para->[0] = '=for'; # Just what we happen to call these, internally
+ $para->[1]{'~really'} ||= '=begin';
+ $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
+ $para->[1]{'~resolve'} = $to_resolve || 0;
+
+ DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
+ "ignore contents of this region\n";
+ DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
+ ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
+ DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
+
+ push @$curr_open, $para;
+ if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Ignoring ignorable =begin\n";
+ } else {
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_start((my $scratch='for'), $para->[1]);
+ }
+
+ return 1;
+}
+
+sub _ponder_end {
+ my ($self,$para,$curr_open,$paras) = @_;
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+ DEBUG and print "Ogling '=end $content' directive\n";
+
+ unless(length($content)) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=end' without a target?" . (
+ ( @$curr_open and $curr_open->[-1][0] eq '=for' )
+ ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
+ : ''
+ )
+ );
+ DEBUG and print "Ignoring targetless =end\n";
+ return 1;
+ }
+
+ unless($content =~ m/^\S+$/) { # i.e., unless it's one word
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=end $content' is invalid. (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content\n";
+ return 1;
+ }
+
+ unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=end $content without matching =begin. (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content\n";
+ return 1;
+ }
+
+ unless($content eq $curr_open->[-1][1]{'target'}) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=end $content doesn't match =begin "
+ . $curr_open->[-1][1]{'target'}
+ . ". (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
+ return 1;
+ }
+
+ # Else it's okay to close...
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
+ # And that may be because of this to-be-closed =for region, or some
+ # other one, but it doesn't matter.
+ } else {
+ $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
+ # what's that for?
+
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_end( my $scratch = 'for' );
+ }
+ DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
+ pop @$curr_open;
+
+ return 1;
+}
+
+sub _ponder_doc_end {
+ my ($self,$para,$curr_open,$paras) = @_;
+ if(@$curr_open) { # Deal with things left open
+ DEBUG and print "Stack is nonempty at end-document: (",
+ $self->_dump_curr_open(), ")\n";
+
+ DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
+ unshift @$paras, $self->_closers_for_all_curr_open;
+ # Make sure there is exactly one ~end in the parastack, at the end:
+ @$paras = grep $_->[0] ne '~end', @$paras;
+ push @$paras, $para, $para;
+ # We need two -- once for the next cycle where we
+ # generate errata, and then another to be at the end
+ # when that loop back around to process the errata.
+ return 1;
+
+ } else {
+ DEBUG and print "Okay, stack is empty now.\n";
+ }
+
+ # Try generating errata section, if applicable
+ unless($self->{'~tried_gen_errata'}) {
+ $self->{'~tried_gen_errata'} = 1;
+ my @extras = $self->_gen_errata();
+ if(@extras) {
+ unshift @$paras, @extras;
+ DEBUG and print "Generated errata... relooping...\n";
+ return 1; # I.e., loop around again to process these fake-o paragraphs
+ }
+ }
+
+ splice @$paras; # Well, that's that for this paragraph buffer.
+ DEBUG and print "Throwing end-document event.\n";
+
+ $self->_handle_element_end( my $scratch = 'Document' );
+ return 1; # Hasta la byebye
+}
+
+sub _ponder_pod {
+ my ($self,$para,$curr_open,$paras) = @_;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=pod directives shouldn't be over one line long! Ignoring all "
+ . (@$para - 2) . " lines of content"
+ ) if @$para > 3;
+ # Content is always ignored.
+ return;
+}
+
+sub _ponder_over {
+ my ($self,$para,$curr_open,$paras) = @_;
+ return 1 unless @$paras;
+ my $list_type;
+
+ if($paras->[0][0] eq '=item') { # most common case
+ $list_type = $self->_get_initial_item_type($paras->[0]);
+
+ } elsif($paras->[0][0] eq '=back') {
+ # Ignore empty lists. TODO: make this an option?
+ shift @$paras;
+ return 1;
+
+ } elsif($paras->[0][0] eq '~end') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=over is the last thing in the document?!"
+ );
+ return 1; # But feh, ignore it.
+ } else {
+ $list_type = 'block';
+ }
+ $para->[1]{'~type'} = $list_type;
+ push @$curr_open, $para;
+ # yes, we reuse the paragraph as a stack item
+
+ my $content = join ' ', splice @$para, 2;
+ my $overness;
+ if($content =~ m/^\s*$/s) {
+ $para->[1]{'indent'} = 4;
+ } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
+ no integer;
+ $para->[1]{'indent'} = $1;
+ if($1 == 0) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Can't have a 0 in =over $content"
+ );
+ $para->[1]{'indent'} = 4;
+ }
+ } else {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=over should be: '=over' or '=over positive_number'"
+ );
+ $para->[1]{'indent'} = 4;
+ }
+ DEBUG > 1 and print "=over found of type $list_type\n";
+
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
+
+ return;
+}
+
+sub _ponder_back {
+ my ($self,$para,$curr_open,$paras) = @_;
+ # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
+
+ my $content = join ' ', splice @$para, 2;
+ if($content =~ m/\S/) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=back doesn't take any parameters, but you said =back $content"
+ );
+ }
+
+ if(@$curr_open and $curr_open->[-1][0] eq '=over') {
+ DEBUG > 1 and print "=back happily closes matching =over\n";
+ # Expected case: we're closing the most recently opened thing
+ #my $over = pop @$curr_open;
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_end( my $scratch =
+ 'over-' . ( (pop @$curr_open)->[1]{'~type'} )
+ );
+ } else {
+ DEBUG > 1 and print "=back found without a matching =over. Stack: (",
+ join(', ', map $_->[0], @$curr_open), ").\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ '=back without =over'
+ );
+ return 1; # and ignore it
+ }
+}
+
+sub _ponder_item {
+ my ($self,$para,$curr_open,$paras) = @_;
+ my $over;
+ unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=item' outside of any '=over'"
+ );
+ unshift @$paras,
+ ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
+ $para
+ ;
+ return 1;
+ }
+
+
+ my $over_type = $over->[1]{'~type'};
+
+ if(!$over_type) {
+ # Shouldn't happen1
+ die "Typeless over in stack, starting at line "
+ . $over->[1]{'start_line'};
+
+ } elsif($over_type eq 'block') {
+ unless($curr_open->[-1][1]{'~bitched_about'}) {
+ $curr_open->[-1][1]{'~bitched_about'} = 1;
+ $self->whine(
+ $curr_open->[-1][1]{'start_line'},
+ "You can't have =items (as at line "
+ . $para->[1]{'start_line'}
+ . ") unless the first thing after the =over is an =item"
+ );
+ }
+ # Just turn it into a paragraph and reconsider it
+ $para->[0] = '~Para';
+ unshift @$paras, $para;
+ return 1;
+
+ } elsif($over_type eq 'text') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ if($item_type eq 'text') {
+ # Nothing special needs doing for 'text'
+ } elsif($item_type eq 'number' or $item_type eq 'bullet') {
+ die "Unknown item type $item_type"
+ unless $item_type eq 'number' or $item_type eq 'bullet';
+ # Undo our clobbering:
+ push @$para, $para->[1]{'~orig_content'};
+ delete $para->[1]{'number'};
+ # Only a PROPER item-number element is allowed
+ # to have a number attribute.
+ } else {
+ die "Unhandled item type $item_type"; # should never happen
+ }
+
+ # =item-text thingies don't need any assimilation, it seems.
+
+ } elsif($over_type eq 'number') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
+
+ if($item_type eq 'bullet') {
+ # Hm, it's not numeric. Correct for this.
+ $para->[1]{'number'} = $expected_value;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item $expected_value'"
+ );
+ push @$para, $para->[1]{'~orig_content'};
+ # restore the bullet, blocking the assimilation of next para
+
+ } elsif($item_type eq 'text') {
+ # Hm, it's not numeric. Correct for this.
+ $para->[1]{'number'} = $expected_value;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item $expected_value'"
+ );
+ # Text content will still be there and will block next ~Para
+
+ } elsif($item_type ne 'number') {
+ die "Unknown item type $item_type"; # should never happen
+
+ } elsif($expected_value == $para->[1]{'number'}) {
+ DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
+
+ } else {
+ DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
+ " instead of the expected value of $expected_value\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You have '=item " . $para->[1]{'number'} .
+ "' instead of the expected '=item $expected_value'"
+ );
+ $para->[1]{'number'} = $expected_value; # correcting!!
+ }
+
+ if(@$para == 2) {
+ # For the cases where we /didn't/ push to @$para
+ if($paras->[0][0] eq '~Para') {
+ DEBUG and print "Assimilating following ~Para content into $over_type item\n";
+ push @$para, splice @{shift @$paras},2;
+ } else {
+ DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
+ push @$para, ''; # Just so it's not contentless
+ }
+ }
+
+
+ } elsif($over_type eq 'bullet') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ if($item_type eq 'bullet') {
+ # as expected!
+
+ if( $para->[1]{'~_freaky_para_hack'} ) {
+ DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
+ push @$para, delete $para->[1]{'~_freaky_para_hack'};
+ }
+
+ } elsif($item_type eq 'number') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item *'"
+ );
+ push @$para, $para->[1]{'~orig_content'};
+ # and block assimilation of the next paragraph
+ delete $para->[1]{'number'};
+ # Only a PROPER item-number element is allowed
+ # to have a number attribute.
+ } elsif($item_type eq 'text') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item *'"
+ );
+ # But doesn't need processing. But it'll block assimilation
+ # of the next para.
+ } else {
+ die "Unhandled item type $item_type"; # should never happen
+ }
+
+ if(@$para == 2) {
+ # For the cases where we /didn't/ push to @$para
+ if($paras->[0][0] eq '~Para') {
+ DEBUG and print "Assimilating following ~Para content into $over_type item\n";
+ push @$para, splice @{shift @$paras},2;
+ } else {
+ DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
+ push @$para, ''; # Just so it's not contentless
+ }
+ }
+
+ } else {
+ die "Unhandled =over type \"$over_type\"?";
+ # Shouldn't happen!
+ }
+ $para->[0] .= '-' . $over_type;
+
+ return;
+}
+
+sub _ponder_Plain {
+ my ($self,$para) = @_;
+ DEBUG and print " giving plain treatment...\n";
+ unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
+ or $para->[1]{'~cooked'}
+ ) {
+ push @$para,
+ @{$self->_make_treelet(
+ join("\n", splice(@$para, 2)),
+ $para->[1]{'start_line'}
+ )};
+ }
+ # Empty paragraphs don't need a treelet for any reason I can see.
+ # And precooked paragraphs already have a treelet.
+ return;
+}
+
+sub _ponder_Verbatim {
+ my ($self,$para) = @_;
+ DEBUG and print " giving verbatim treatment...\n";
+
+ $para->[1]{'xml:space'} = 'preserve';
+ for(my $i = 2; $i < @$para; $i++) {
+ foreach my $line ($para->[$i]) { # just for aliasing
+ while( $line =~
+ # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
+ # tabs are at every EIGHTH column. For portability, it has to be
+ # one setting everywhere, and 8th wins.
+ s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
+ ) {}
+
+ # TODO: whinge about (or otherwise treat) unindented or overlong lines
+
+ }
+ }
+
+ # Now the VerbatimFormatted hoodoo...
+ if( $self->{'accept_codes'} and
+ $self->{'accept_codes'}{'VerbatimFormatted'}
+ ) {
+ while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
+ # Kill any number of terminal newlines
+ $self->_verbatim_format($para);
+ } elsif ($self->{'codes_in_verbatim'}) {
+ push @$para,
+ @{$self->_make_treelet(
+ join("\n", splice(@$para, 2)),
+ $para->[1]{'start_line'}, $para->[1]{'xml:space'}
+ )};
+ $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
+ } else {
+ push @$para, join "\n", splice(@$para, 2) if @$para > 3;
+ $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
+ }
+ return;
+}
+
+sub _ponder_Data {
+ my ($self,$para) = @_;
+ DEBUG and print " giving data treatment...\n";
+ $para->[1]{'xml:space'} = 'preserve';
+ push @$para, join "\n", splice(@$para, 2) if @$para > 3;
+ return;
+}
+
+
+
+
+###########################################################################
+
+sub _traverse_treelet_bit { # for use only by the routine above
+ my($self, $name) = splice @_,0,2;
+
+ my $scratch;
+ $self->_handle_element_start(($scratch=$name), shift @_);
+
+ foreach my $x (@_) {
+ if(ref($x)) {
+ &_traverse_treelet_bit($self, @$x);
+ } else {
+ $self->_handle_text($x);
+ }
+ }
+
+ $self->_handle_element_end($scratch=$name);
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _closers_for_all_curr_open {
+ my $self = $_[0];
+ my @closers;
+ foreach my $still_open (@{ $self->{'curr_open'} || return }) {
+ my @copy = @$still_open;
+ $copy[1] = {%{ $copy[1] }};
+ #$copy[1]{'start_line'} = -1;
+ if($copy[0] eq '=for') {
+ $copy[0] = '=end';
+ } elsif($copy[0] eq '=over') {
+ $copy[0] = '=back';
+ } else {
+ die "I don't know how to auto-close an open $copy[0] region";
+ }
+
+ unless( @copy > 2 ) {
+ push @copy, $copy[1]{'target'};
+ $copy[-1] = '' unless defined $copy[-1];
+ # since =over's don't have targets
+ }
+
+ DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
+ unshift @closers, \@copy;
+ }
+ return @closers;
+}
+
+#--------------------------------------------------------------------------
+
+sub _verbatim_format {
+ my($it, $p) = @_;
+
+ my $formatting;
+
+ for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
+ DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
+ $p->[$i] .= "\n";
+ # Unlike with simple Verbatim blocks, we don't end up just doing
+ # a join("\n", ...) on the contents, so we have to append a
+ # newline to ever line, and then nix the last one later.
+ }
+
+ if( DEBUG > 4 ) {
+ print "<<\n";
+ for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
+ print "_verbatim_format $i: $p->[$i]";
+ }
+ print ">>\n";
+ }
+
+ for(my $i = $#$p; $i > 2; $i--) {
+ # work backwards over the lines, except the first (#2)
+
+ #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
+ # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
+ # look at a formatty line preceding a nonformatty one
+ DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
+ if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
+ DEBUG > 5 and print " It's a formatty line. ",
+ "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
+
+ if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
+ DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
+ next;
+ } else {
+ DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
+ }
+ } else {
+ DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
+ next;
+ }
+
+ # A formatty line has to have #: in the first two columns, and uses
+ # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
+ # Example:
+ # What do you want? i like pie. [or whatever]
+ # #:^^^^^^^^^^^^^^^^^ /////////////
+
+
+ DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
+
+ $formatting = ' ' . $1;
+ $formatting =~ s/\s+$//s; # nix trailing whitespace
+ unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
+ splice @$p,$i,1; # remove this line
+ $i--; # don't consider next line
+ next;
+ }
+
+ if( length($formatting) >= length($p->[$i-1]) ) {
+ $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
+ } else {
+ $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
+ }
+ # Make $formatting and the previous line be exactly the same length,
+ # with $formatting having a " " as the last character.
+
+ DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
+
+
+ my @new_line;
+ while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
+ #print "Format matches $1\n";
+
+ if($2) {
+ #print "SKIPPING <$2>\n";
+ push @new_line,
+ substr($p->[$i-1], pos($formatting)-length($1), length($1));
+ } else {
+ #print "SNARING $+\n";
+ push @new_line, [
+ (
+ $3 ? 'VerbatimB' :
+ $4 ? 'VerbatimI' :
+ $5 ? 'VerbatimBI' : die("Should never get called")
+ ), {},
+ substr($p->[$i-1], pos($formatting)-length($1), length($1))
+ ];
+ #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
+ }
+ }
+ my @nixed =
+ splice @$p, $i-1, 2, @new_line; # replace myself and the next line
+ DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
+
+ DEBUG > 6 and print "New version of the above line is these tokens (",
+ scalar(@new_line), "):",
+ map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
+ $i--; # So the next line we scrutinize is the line before the one
+ # that we just went and formatted
+ }
+
+ $p->[0] = 'VerbatimFormatted';
+
+ # Collapse adjacent text nodes, just for kicks.
+ for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
+ if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
+ DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
+ $p->[$i] .= splice @$p, $i+1, 1; # merge
+ --$i; # and back up
+ }
+ }
+
+ # Now look for the last text token, and remove the terminal newline
+ for( my $i = $#$p; $i >= 2; $i-- ) {
+ # work backwards over the tokens, even the first
+ if( !ref($p->[$i]) ) {
+ if($p->[$i] =~ s/\n$//s) {
+ DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
+ } else {
+ DEBUG > 5 and print
+ "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
+ }
+ last; # we only want the next one
+ }
+ }
+
+ return;
+}
+
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+
+sub _treelet_from_formatting_codes {
+ # Given a paragraph, returns a treelet. Full of scary tokenizing code.
+ # Like [ '~Top', {'start_line' => $start_line},
+ # "I like ",
+ # [ 'B', {}, "pie" ],
+ # "!"
+ # ]
+
+ my($self, $para, $start_line, $preserve_space) = @_;
+
+ my $treelet = ['~Top', {'start_line' => $start_line},];
+
+ unless ($preserve_space || $self->{'preserve_whitespace'}) {
+ $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'};
+
+ $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
+ $para =~ s/ $//;
+ $para =~ s/^ //;
+ }
+
+ # Only apparent problem the above code is that N<< >> turns into
+ # N<< >>. But then, word wrapping does that too! So don't do that!
+
+ my @stack;
+ my @lineage = ($treelet);
+
+ DEBUG > 4 and print "Paragraph:\n$para\n\n";
+
+ # Here begins our frightening tokenizer RE. The following regex matches
+ # text in four main parts:
+ #
+ # * Start-codes. The first alternative matches C< or C<<, the latter
+ # followed by some whitespace. $1 will hold the entire start code
+ # (including any space following a multiple-angle-bracket delimiter),
+ # and $2 will hold only the additional brackets past the first in a
+ # multiple-bracket delimiter. length($2) + 1 will be the number of
+ # closing brackets we have to find.
+ #
+ # * Closing brackets. Match some amount of whitespace followed by
+ # multiple close brackets. The logic to see if this closes anything
+ # is down below. Note that in order to parse C<< >> correctly, we
+ # have to use look-behind (?<=\s\s), since the match of the starting
+ # code will have consumed the whitespace.
+ #
+ # * A single closing bracket, to close a simple code like C<>.
+ #
+ # * Something that isn't a start or end code. We have to be careful
+ # about accepting whitespace, since perlpodspec says that any whitespace
+ # before a multiple-bracket closing delimiter should be ignored.
+ #
+ while($para =~
+ m/\G
+ (?:
+ # Match starting codes, including the whitespace following a
+ # multiple-delimiter start code. $1 gets the whole start code and
+ # $2 gets all but one of the <s in the multiple-bracket case.
+ ([A-Z]<(?:(<+)\s+)?)
+ |
+ # Match multiple-bracket end codes. $3 gets the whitespace that
+ # should be discarded before an end bracket but kept in other cases
+ # and $4 gets the end brackets themselves.
+ (\s+|(?<=\s\s))(>{2,})
+ |
+ (\s?>) # $5: simple end-codes
+ |
+ ( # $6: stuff containing no start-codes or end-codes
+ (?:
+ [^A-Z\s>]
+ |
+ (?:
+ [A-Z](?!<)
+ )
+ |
+ # whitespace is ok, but we don't want to eat the whitespace before
+ # a multiple-bracket end code.
+ # NOTE: we may still have problems with e.g. S<< >>
+ (?:
+ \s(?!\s*>{2,})
+ )
+ )+
+ )
+ )
+ /xgo
+ ) {
+ DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
+ if(defined $1) {
+ if(defined $2) {
+ DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
+ push @stack, length($2) + 1;
+ # length of the necessary complex end-code string
+ } else {
+ DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
+ push @stack, 0; # signal that we're looking for simple
+ }
+ push @lineage, [ substr($1,0,1), {}, ]; # new node object
+ push @{ $lineage[-2] }, $lineage[-1];
+
+ } elsif(defined $4) {
+ DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
+ # This is where it gets messy...
+ if(! @stack) {
+ # We saw " >>>>" but needed nothing. This is ALL just stuff then.
+ DEBUG > 4 and print " But it's really just stuff.\n";
+ push @{ $lineage[-1] }, $3, $4;
+ next;
+ } elsif(!$stack[-1]) {
+ # We saw " >>>>" but needed only ">". Back pos up.
+ DEBUG > 4 and print " And that's more than we needed to close simple.\n";
+ push @{ $lineage[-1] }, $3; # That was a for-real space, too.
+ pos($para) = pos($para) - length($4) + 1;
+ } elsif($stack[-1] == length($4)) {
+ # We found " >>>>", and it was exactly what we needed. Commonest case.
+ DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
+ } elsif($stack[-1] < length($4)) {
+ # We saw " >>>>" but needed only " >>". Back pos up.
+ DEBUG > 4 and print " And that's more than we needed to close complex.\n";
+ pos($para) = pos($para) - length($4) + $stack[-1];
+ } else {
+ # We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
+ DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
+ push @{ $lineage[-1] }, $3, $4;
+ next;
+ }
+ #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
+
+ push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
+ # Keep the element from being childless
+
+ pop @stack;
+ pop @lineage;
+
+ } elsif(defined $5) {
+ DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n";
+
+ if(@stack and ! $stack[-1]) {
+ # We're indeed expecting a simple end-code
+ DEBUG > 4 and print " It's indeed an end-code.\n";
+
+ if(length($5) == 2) { # There was a space there: " >"
+ push @{ $lineage[-1] }, ' ';
+ } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
+ push @{ $lineage[-1] }, ''; # keep it from being really childless
+ }
+
+ pop @stack;
+ pop @lineage;
+ } else {
+ DEBUG > 4 and print " It's just stuff.\n";
+ push @{ $lineage[-1] }, $5;
+ }
+
+ } elsif(defined $6) {
+ DEBUG > 3 and print "Found stuff \"$6\"\n";
+ push @{ $lineage[-1] }, $6;
+
+ } else {
+ # should never ever ever ever happen
+ DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
+ die "SPORK 512512!";
+ }
+ }
+
+ if(@stack) { # Uhoh, some sequences weren't closed.
+ my $x= "...";
+ while(@stack) {
+ push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
+ # Hmmmmm!
+
+ my $code = (pop @lineage)->[0];
+ my $ender_length = pop @stack;
+ if($ender_length) {
+ --$ender_length;
+ $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
+ } else {
+ $x = $code . "<$x>";
+ }
+ }
+ DEBUG > 1 and print "Unterminated $x sequence\n";
+ $self->whine($start_line,
+ "Unterminated $x sequence",
+ );
+ }
+
+ return $treelet;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
+ return stringify_lol($_[1]);
+}
+
+sub stringify_lol { # function: stringify_lol($lol)
+ my $string_form = '';
+ _stringify_lol( $_[0] => \$string_form );
+ return $string_form;
+}
+
+sub _stringify_lol { # the real recursor
+ my($lol, $to) = @_;
+ use UNIVERSAL ();
+ for(my $i = 2; $i < @$lol; ++$i) {
+ if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
+ _stringify_lol( $lol->[$i], $to); # recurse!
+ } else {
+ $$to .= $lol->[$i];
+ }
+ }
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _dump_curr_open { # return a string representation of the stack
+ my $curr_open = $_[0]{'curr_open'};
+
+ return '[empty]' unless @$curr_open;
+ return join '; ',
+ map {;
+ ($_->[0] eq '=for')
+ ? ( ($_->[1]{'~really'} || '=over')
+ . ' ' . $_->[1]{'target'})
+ : $_->[0]
+ }
+ @$curr_open
+ ;
+}
+
+###########################################################################
+my %pretty_form = (
+ "\a" => '\a', # ding!
+ "\b" => '\b', # BS
+ "\e" => '\e', # ESC
+ "\f" => '\f', # FF
+ "\t" => '\t', # tab
+ "\cm" => '\cm',
+ "\cj" => '\cj',
+ "\n" => '\n', # probably overrides one of either \cm or \cj
+ '"' => '\"',
+ '\\' => '\\\\',
+ '$' => '\\$',
+ '@' => '\\@',
+ '%' => '\\%',
+ '#' => '\\#',
+);
+
+sub pretty { # adopted from Class::Classless
+ # Not the most brilliant routine, but passable.
+ # Don't give it a cyclic data structure!
+ my @stuff = @_; # copy
+ my $x;
+ my $out =
+ # join ",\n" .
+ join ", ",
+ map {;
+ if(!defined($_)) {
+ "undef";
+ } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
+ $x = "[ " . pretty(@$_) . " ]" ;
+ $x;
+ } elsif(ref($_) eq 'SCALAR') {
+ $x = "\\" . pretty($$_) ;
+ $x;
+ } elsif(ref($_) eq 'HASH') {
+ my $hr = $_;
+ $x = "{" . join(", ",
+ map(pretty($_) . '=>' . pretty($hr->{$_}),
+ sort keys %$hr ) ) . "}" ;
+ $x;
+ } elsif(!length($_)) { q{''} # empty string
+ } elsif(
+ $_ eq '0' # very common case
+ or(
+ m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
+ and $_ ne '-0' # the strange case that that RE lets thru
+ )
+ ) { $_;
+ } else {
+ if( chr(65) eq 'A' ) {
+ s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
+ #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
+ <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
+ } else {
+ # We're in some crazy non-ASCII world!
+ s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
+ #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
+ <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
+ }
+ qq{"$_"};
+ }
+ } @stuff;
+ # $out =~ s/\n */ /g if length($out) < 75;
+ return $out;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+# A rather unsubtle method of blowing away all the state information
+# from a parser object so it can be reused. Provided as a utility for
+# backward compatibilty in Pod::Man, etc. but not recommended for
+# general use.
+
+sub reinit {
+ my $self = shift;
+ foreach (qw(source_dead source_filename doc_has_started
+start_of_pod_block content_seen last_was_blank paras curr_open
+line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
+Title)) {
+
+ delete $self->{$_};
+ }
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Checker.pm b/ext/Pod-Simple/lib/Pod/Simple/Checker.pm
new file mode 100644
index 0000000000..0d01f50ec2
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Checker.pm
@@ -0,0 +1,171 @@
+
+# A quite dimwitted pod2plaintext that need only know how to format whatever
+# text comes out of Pod::BlackBox's _gen_errata
+
+require 5;
+package Pod::Simple::Checker;
+use strict;
+use Carp ();
+use Pod::Simple::Methody ();
+use Pod::Simple ();
+use vars qw( @ISA $VERSION );
+$VERSION = '2.02';
+@ISA = ('Pod::Simple::Methody');
+BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
+ ? \&Pod::Simple::DEBUG
+ : sub() {0}
+ }
+
+use Text::Wrap 98.112902 (); # was 2001.0131, but I don't think we need that
+$Text::Wrap::wrap = 'overflow';
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub any_errata_seen { # read-only accessor
+ return $_[1]->{'Errata_seen'};
+}
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->nix_X_codes(1);
+ $new->nbsp_for_S(1);
+ $new->{'Thispara'} = '';
+ $new->{'Indent'} = 0;
+ $new->{'Indentstring'} = ' ';
+ $new->{'Errata_seen'} = 0;
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub handle_text { $_[0]{'Errata_seen'} and $_[0]{'Thispara'} .= $_[1] }
+
+sub start_Para { $_[0]{'Thispara'} = '' }
+
+sub start_head1 {
+ if($_[0]{'Errata_seen'}) {
+ $_[0]{'Thispara'} = '';
+ } else {
+ if($_[1]{'errata'}) { # start of errata!
+ $_[0]{'Errata_seen'} = 1;
+ $_[0]{'Thispara'} = $_[0]{'source_filename'} ?
+ "$_[0]{'source_filename'} -- " : ''
+ }
+ }
+}
+sub start_head2 { $_[0]{'Thispara'} = '' }
+sub start_head3 { $_[0]{'Thispara'} = '' }
+sub start_head4 { $_[0]{'Thispara'} = '' }
+
+sub start_Verbatim { $_[0]{'Thispara'} = '' }
+sub start_item_bullet { $_[0]{'Thispara'} = '* ' }
+sub start_item_number { $_[0]{'Thispara'} = "$_[1]{'number'}. " }
+sub start_item_text { $_[0]{'Thispara'} = '' }
+
+sub start_over_bullet { ++$_[0]{'Indent'} }
+sub start_over_number { ++$_[0]{'Indent'} }
+sub start_over_text { ++$_[0]{'Indent'} }
+sub start_over_block { ++$_[0]{'Indent'} }
+
+sub end_over_bullet { --$_[0]{'Indent'} }
+sub end_over_number { --$_[0]{'Indent'} }
+sub end_over_text { --$_[0]{'Indent'} }
+sub end_over_block { --$_[0]{'Indent'} }
+
+
+# . . . . . Now the actual formatters:
+
+sub end_head1 { $_[0]->emit_par(-4) }
+sub end_head2 { $_[0]->emit_par(-3) }
+sub end_head3 { $_[0]->emit_par(-2) }
+sub end_head4 { $_[0]->emit_par(-1) }
+sub end_Para { $_[0]->emit_par( 0) }
+sub end_item_bullet { $_[0]->emit_par( 0) }
+sub end_item_number { $_[0]->emit_par( 0) }
+sub end_item_text { $_[0]->emit_par(-2) }
+
+sub emit_par {
+ return unless $_[0]{'Errata_seen'};
+ my($self, $tweak_indent) = splice(@_,0,2);
+ my $indent = ' ' x ( 2 * $self->{'Indent'} + ($tweak_indent||0) );
+ # Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0
+
+ $self->{'Thispara'} =~ tr{\xAD}{}d if Pod::Simple::ASCII;
+ my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n");
+ $out =~ tr{\xA0}{ } if Pod::Simple::ASCII;
+ print {$self->{'output_fh'}} $out,
+ #"\n"
+ ;
+ $self->{'Thispara'} = '';
+
+ return;
+}
+
+# . . . . . . . . . . And then off by its lonesome:
+
+sub end_Verbatim {
+ return unless $_[0]{'Errata_seen'};
+ my $self = shift;
+ if(Pod::Simple::ASCII) {
+ $self->{'Thispara'} =~ tr{\xA0}{ };
+ $self->{'Thispara'} =~ tr{\xAD}{}d;
+ }
+
+ my $i = ' ' x ( 2 * $self->{'Indent'} + 4);
+
+ $self->{'Thispara'} =~ s/^/$i/mg;
+
+ print { $self->{'output_fh'} } '',
+ $self->{'Thispara'},
+ "\n\n"
+ ;
+ $self->{'Thispara'} = '';
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::Checker -- check the Pod syntax of a document
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::Checker -e \
+ "exit Pod::Simple::Checker->filter(shift)->any_errata_seen" \
+ thingy.pod
+
+=head1 DESCRIPTION
+
+This class is for checking the syntactic validity of Pod.
+It works by basically acting like a simple-minded version of
+L<Pod::Simple::Text> that formats only the "Pod Errors" section
+(if Pod::Simple even generates one for the given document).
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Checker>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Debug.pm b/ext/Pod-Simple/lib/Pod/Simple/Debug.pm
new file mode 100644
index 0000000000..b00e58daba
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Debug.pm
@@ -0,0 +1,151 @@
+
+require 5;
+package Pod::Simple::Debug;
+use strict;
+
+sub import {
+ my($value,$variable);
+
+ if(@_ == 2) {
+ $value = $_[1];
+ } elsif(@_ == 3) {
+ ($variable, $value) = @_[1,2];
+
+ ($variable, $value) = ($value, $variable)
+ if defined $value and ref($value) eq 'SCALAR'
+ and not(defined $variable and ref($variable) eq 'SCALAR')
+ ; # tolerate getting it backwards
+
+ unless( defined $variable and ref($variable) eq 'SCALAR') {
+ require Carp;
+ Carp::croak("Usage:\n use Pod::Simple::Debug (NUMVAL)\nor"
+ . "\n use Pod::Simple::Debug (\\\$var, STARTNUMVAL)\nAborting");
+ }
+ } else {
+ require Carp;
+ Carp::croak("Usage:\n use Pod::Simple::Debug (NUMVAL)\nor"
+ . "\n use Pod::Simple::Debug (\\\$var, STARTNUMVAL)\nAborting");
+ }
+
+ if( defined &Pod::Simple::DEBUG ) {
+ require Carp;
+ Carp::croak("It's too late to call Pod::Simple::Debug -- "
+ . "Pod::Simple has already loaded\nAborting");
+ }
+
+ $value = 0 unless defined $value;
+
+ unless($value =~ m/^-?\d+$/) {
+ require Carp;
+ Carp::croak( "$value isn't a numeric value."
+ . "\nUsage:\n use Pod::Simple::Debug (NUMVAL)\nor"
+ . "\n use Pod::Simple::Debug (\\\$var, STARTNUMVAL)\nAborting");
+ }
+
+ if( defined $variable ) {
+ # make a not-really-constant
+ *Pod::Simple::DEBUG = sub () { $$variable } ;
+ $$variable = $value;
+ print "# Starting Pod::Simple::DEBUG = non-constant $variable with val $value\n";
+ } else {
+ *Pod::Simple::DEBUG = eval " sub () { $value } ";
+ print "# Starting Pod::Simple::DEBUG = $value\n";
+ }
+
+ require Pod::Simple;
+ return;
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::Debug -- put Pod::Simple into trace/debug mode
+
+=head1 SYNOPSIS
+
+ use Pod::Simple::Debug (5); # or some integer
+
+Or:
+
+ my $debuglevel;
+ use Pod::Simple::Debug (\$debuglevel, 0);
+ ...some stuff that uses Pod::Simple to do stuff, but which
+ you don't want debug output from...
+
+ $debug_level = 4;
+ ...some stuff that uses Pod::Simple to do stuff, but which
+ you DO want debug output from...
+
+ $debug_level = 0;
+
+=head1 DESCRIPTION
+
+This is an internal module for controlling the debug level (a.k.a. trace
+level) of Pod::Simple. This is of interest only to Pod::Simple
+developers.
+
+
+=head1 CAVEATS
+
+Note that you should load this module I<before> loading Pod::Simple (or
+any Pod::Simple-based class). If you try loading Pod::Simple::Debug
+after &Pod::Simple::DEBUG is already defined, Pod::Simple::Debug will
+throw a fatal error to the effect that
+"it's s too late to call Pod::Simple::Debug".
+
+Note that the C<use Pod::Simple::Debug (\$x, I<somenum>)> mode will make
+Pod::Simple (et al) run rather slower, since &Pod::Simple::DEBUG won't
+be a constant sub anymore, and so Pod::Simple (et al) won't compile with
+constant-folding.
+
+
+=head1 GUTS
+
+Doing this:
+
+ use Pod::Simple::Debug (5); # or some integer
+
+is basically equivalent to:
+
+ BEGIN { sub Pod::Simple::DEBUG () {5} } # or some integer
+ use Pod::Simple ();
+
+And this:
+
+ use Pod::Simple::Debug (\$debug_level,0); # or some integer
+
+is basically equivalent to this:
+
+ my $debug_level;
+ BEGIN { $debug_level = 0 }
+ BEGIN { sub Pod::Simple::DEBUG () { $debug_level }
+ use Pod::Simple ();
+
+=head1 SEE ALSO
+
+L<Pod::Simple>
+
+The article "Constants in Perl", in I<The Perl Journal> issue
+21. See L<http://www.sysadminmag.com/tpj/issues/vol5_5/>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/DumpAsText.pm b/ext/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
new file mode 100644
index 0000000000..e678e42fa1
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
@@ -0,0 +1,130 @@
+
+require 5;
+package Pod::Simple::DumpAsText;
+$VERSION = '2.02';
+use Pod::Simple ();
+BEGIN {@ISA = ('Pod::Simple')}
+
+use strict;
+
+use Carp ();
+
+BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->accept_codes('VerbatimFormatted');
+ return $new;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _handle_element_start {
+ # ($self, $element_name, $attr_hash_r)
+ my $fh = $_[0]{'output_fh'};
+ my($key, $value);
+ DEBUG and print "++ $_[1]\n";
+
+ print $fh ' ' x ($_[0]{'indent'} || 0), "++", $_[1], "\n";
+ $_[0]{'indent'}++;
+ while(($key,$value) = each %{$_[2]}) {
+ unless($key =~ m/^~/s) {
+ next if $key eq 'start_line' and $_[0]{'hide_line_numbers'};
+ _perly_escape($key);
+ _perly_escape($value);
+ printf $fh qq{%s \\ "%s" => "%s"\n},
+ ' ' x ($_[0]{'indent'} || 0), $key, $value;
+ }
+ }
+ return;
+}
+
+sub _handle_text {
+ DEBUG and print "== \"$_[1]\"\n";
+
+ if(length $_[1]) {
+ my $indent = ' ' x $_[0]{'indent'};
+ my $text = $_[1];
+ _perly_escape($text);
+ $text =~ # A not-totally-brilliant wrapping algorithm:
+ s/(
+ [^\n]{55} # Snare some characters from a line
+ [^\n\ ]{0,50} # and finish any current word
+ )
+ \x20{1,10}(?!\n) # capture some spaces not at line-end
+ /$1"\n$indent . "/gx # => line-break here
+ ;
+
+ print {$_[0]{'output_fh'}} $indent, '* "', $text, "\"\n";
+ }
+ return;
+}
+
+sub _handle_element_end {
+ DEBUG and print "-- $_[1]\n";
+ print {$_[0]{'output_fh'}}
+ ' ' x --$_[0]{'indent'}, "--", $_[1], "\n";
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _perly_escape {
+ foreach my $x (@_) {
+ $x =~ s/([^\x00-\xFF])/sprintf'\x{%X}',ord($1)/eg;
+ # Escape things very cautiously:
+ $x =~ s/([^-\n\t \&\<\>\'!\#\%\(\)\*\+,\.\/\:\;=\?\~\[\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf'\x%02X',ord($1)/eg;
+ }
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::DumpAsText -- dump Pod-parsing events as text
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::DumpAsText -e \
+ "exit Pod::Simple::DumpAsText->filter(shift)->any_errata_seen" \
+ thingy.pod
+
+=head1 DESCRIPTION
+
+This class is for dumping, as text, the events gotten from parsing a Pod
+document. This class is of interest to people writing Pod formatters
+based on Pod::Simple. It is useful for seeing exactly what events you
+get out of some Pod that you feed in.
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+=head1 SEE ALSO
+
+L<Pod::Simple::DumpAsXML>
+
+L<Pod::Simple>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm b/ext/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
new file mode 100644
index 0000000000..fe0c1662e5
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
@@ -0,0 +1,146 @@
+
+require 5;
+package Pod::Simple::DumpAsXML;
+$VERSION = '2.02';
+use Pod::Simple ();
+BEGIN {@ISA = ('Pod::Simple')}
+
+use strict;
+
+use Carp ();
+
+BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->accept_codes('VerbatimFormatted');
+ return $new;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _handle_element_start {
+ # ($self, $element_name, $attr_hash_r)
+ my $fh = $_[0]{'output_fh'};
+ my($key, $value);
+ DEBUG and print "++ $_[1]\n";
+
+ print $fh ' ' x ($_[0]{'indent'} || 0), "<", $_[1];
+
+ foreach my $key (sort keys %{$_[2]}) {
+ unless($key =~ m/^~/s) {
+ next if $key eq 'start_line' and $_[0]{'hide_line_numbers'};
+ _xml_escape($value = $_[2]{$key});
+ print $fh ' ', $key, '="', $value, '"';
+ }
+ }
+
+
+ print $fh ">\n";
+ $_[0]{'indent'}++;
+ return;
+}
+
+sub _handle_text {
+ DEBUG and print "== \"$_[1]\"\n";
+ if(length $_[1]) {
+ my $indent = ' ' x $_[0]{'indent'};
+ my $text = $_[1];
+ _xml_escape($text);
+ $text =~ # A not-totally-brilliant wrapping algorithm:
+ s/(
+ [^\n]{55} # Snare some characters from a line
+ [^\n\ ]{0,50} # and finish any current word
+ )
+ \x20{1,10}(?!\n) # capture some spaces not at line-end
+ /$1\n$indent/gx # => line-break here
+ ;
+
+ print {$_[0]{'output_fh'}} $indent, $text, "\n";
+ }
+ return;
+}
+
+sub _handle_element_end {
+ DEBUG and print "-- $_[1]\n";
+ print {$_[0]{'output_fh'}}
+ ' ' x --$_[0]{'indent'}, "</", $_[1], ">\n";
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub _xml_escape {
+ foreach my $x (@_) {
+ # Escape things very cautiously:
+ $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
+ # Yes, stipulate the list without a range, so that this can work right on
+ # all charsets that this module happens to run under.
+ # Altho, hmm, what about that ord? Presumably that won't work right
+ # under non-ASCII charsets. Something should be done about that.
+ }
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::DumpAsXML -- turn Pod into XML
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::DumpAsXML -e \
+ "exit Pod::Simple::DumpAsXML->filter(shift)->any_errata_seen" \
+ thingy.pod
+
+=head1 DESCRIPTION
+
+Pod::Simple::DumpAsXML is a subclass of L<Pod::Simple> that parses Pod
+and turns it into indented and wrapped XML. This class is of
+interest to people writing Pod formatters based on Pod::Simple.
+
+Pod::Simple::DumpAsXML inherits methods from
+L<Pod::Simple>.
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple::XMLOutStream> is rather like this class.
+Pod::Simple::XMLOutStream's output is space-padded in a way
+that's better for sending to an XML processor (that is, it has
+no ignoreable whitespace). But
+Pod::Simple::DumpAsXML's output is much more human-readable, being
+(more-or-less) one token per line, with line-wrapping.
+
+L<Pod::Simple::DumpAsText> is rather like this class,
+except that it doesn't dump with XML syntax. Try them and see
+which one you like best!
+
+L<Pod::Simple>, L<Pod::Simple::DumpAsXML>
+
+The older libraries L<Pod::PXML>, L<Pod::XML>, L<Pod::SAX>
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/HTML.pm b/ext/Pod-Simple/lib/Pod/Simple/HTML.pm
new file mode 100644
index 0000000000..a4dbbc17d0
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/HTML.pm
@@ -0,0 +1,889 @@
+
+require 5;
+package Pod::Simple::HTML;
+use strict;
+use Pod::Simple::PullParser ();
+use vars qw(
+ @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
+ $Perldoc_URL_Prefix $Perldoc_URL_Postfix
+ $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
+ $Doctype_decl $Content_decl
+);
+@ISA = ('Pod::Simple::PullParser');
+$VERSION = '3.03';
+
+use UNIVERSAL ();
+BEGIN {
+ if(defined &DEBUG) { } # no-op
+ elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
+ else { *DEBUG = sub () {0}; }
+}
+
+$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
+ # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ # "http://www.w3.org/TR/html4/loose.dtd">\n};
+
+$Content_decl ||=
+ q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
+
+$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
+$Computerese = "" unless defined $Computerese;
+$LamePad = '' unless defined $LamePad;
+
+$Linearization_Limit = 120 unless defined $Linearization_Limit;
+ # headings/items longer than that won't get an <a name="...">
+$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?'
+ unless defined $Perldoc_URL_Prefix;
+$Perldoc_URL_Postfix = ''
+ unless defined $Perldoc_URL_Postfix;
+
+$Title_Prefix = '' unless defined $Title_Prefix;
+$Title_Postfix = '' unless defined $Title_Postfix;
+%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
+ # 'item-text' stuff in the index doesn't quite work, and may
+ # not be a good idea anyhow.
+
+
+__PACKAGE__->_accessorize(
+ 'perldoc_url_prefix',
+ # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
+ # to put before the "Foo%3a%3aBar".
+ # (for singleton mode only?)
+ 'perldoc_url_postfix',
+ # what to put after "Foo%3a%3aBar" in the URL. Normally "".
+
+ 'batch_mode', # whether we're in batch mode
+ 'batch_mode_current_level',
+ # When in batch mode, how deep the current module is: 1 for "LWP",
+ # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
+
+ 'title_prefix', 'title_postfix',
+ # What to put before and after the title in the head.
+ # Should already be &-escaped
+
+ 'html_header_before_title',
+ 'html_header_after_title',
+ 'html_footer',
+
+ 'index', # whether to add an index at the top of each page
+ # (actually it's a table-of-contents, but we'll call it an index,
+ # out of apparently longstanding habit)
+
+ 'html_css', # URL of CSS file to point to
+ 'html_javascript', # URL of CSS file to point to
+
+ 'force_title', # should already be &-escaped
+ 'default_title', # should already be &-escaped
+);
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+my @_to_accept;
+
+%Tagmap = (
+ 'Verbatim' => "\n<pre$Computerese>",
+ '/Verbatim' => "</pre>\n",
+ 'VerbatimFormatted' => "\n<pre$Computerese>",
+ '/VerbatimFormatted' => "</pre>\n",
+ 'VerbatimB' => "<b>",
+ '/VerbatimB' => "</b>",
+ 'VerbatimI' => "<i>",
+ '/VerbatimI' => "</i>",
+ 'VerbatimBI' => "<b><i>",
+ '/VerbatimBI' => "</i></b>",
+
+
+ 'Data' => "\n",
+ '/Data' => "\n",
+
+ 'head1' => "\n<h1>", # And also stick in an <a name="...">
+ 'head2' => "\n<h2>", # ''
+ 'head3' => "\n<h3>", # ''
+ 'head4' => "\n<h4>", # ''
+ '/head1' => "</a></h1>\n",
+ '/head2' => "</a></h2>\n",
+ '/head3' => "</a></h3>\n",
+ '/head4' => "</a></h4>\n",
+
+ 'X' => "<!--\n\tINDEX: ",
+ '/X' => "\n-->",
+
+ changes(qw(
+ Para=p
+ B=b I=i
+ over-bullet=ul
+ over-number=ol
+ over-text=dl
+ over-block=blockquote
+ item-bullet=li
+ item-number=li
+ item-text=dt
+ )),
+ changes2(
+ map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
+ qw[
+ sample=samp
+ definition=dfn
+ kbd=keyboard
+ variable=var
+ citation=cite
+ abbreviation=abbr
+ acronym=acronym
+ subscript=sub
+ superscript=sup
+ big=big
+ small=small
+ underline=u
+ strikethrough=s
+ ] # no point in providing a way to get <q>...</q>, I think
+ ),
+
+ '/item-bullet' => "</li>$LamePad\n",
+ '/item-number' => "</li>$LamePad\n",
+ '/item-text' => "</a></dt>$LamePad\n",
+ 'item-body' => "\n<dd>",
+ '/item-body' => "</dd>\n",
+
+
+ 'B' => "<b>", '/B' => "</b>",
+ 'I' => "<i>", '/I' => "</i>",
+ 'F' => "<em$Computerese>", '/F' => "</em>",
+ 'C' => "<code$Computerese>", '/C' => "</code>",
+ 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
+ '/L' => "</a>",
+);
+
+sub changes {
+ return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
+ ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
+ } @_;
+}
+sub changes2 {
+ return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
+ ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
+ } @_;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
+ # Just so we can run from the command line. No options.
+ # For that, use perldoc!
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub new {
+ my $new = shift->SUPER::new(@_);
+ #$new->nix_X_codes(1);
+ $new->nbsp_for_S(1);
+ $new->accept_targets( 'html', 'HTML' );
+ $new->accept_codes('VerbatimFormatted');
+ $new->accept_codes(@_to_accept);
+ DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n";
+
+ $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
+ $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
+ $new->title_prefix( $Title_Prefix );
+ $new->title_postfix( $Title_Postfix );
+
+ $new->html_header_before_title(
+ qq[$Doctype_decl<html><head><title>]
+ );
+ $new->html_header_after_title( join "\n" =>
+ "</title>",
+ $Content_decl,
+ "</head>\n<body class='pod'>",
+ $new->version_tag_comment,
+ "<!-- start doc -->\n",
+ );
+ $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
+
+ $new->{'Tagmap'} = {%Tagmap};
+ return $new;
+}
+
+sub batch_mode_page_object_init {
+ my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
+ DEBUG and print "Initting $self\n for $module\n",
+ " in $infile\n out $outfile\n depth $depth\n";
+ $self->batch_mode(1);
+ $self->batch_mode_current_level($depth);
+ return $self;
+}
+
+sub run {
+ my $self = $_[0];
+ return $self->do_middle if $self->bare_output;
+ return
+ $self->do_beginning && $self->do_middle && $self->do_end;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub do_beginning {
+ my $self = $_[0];
+
+ my $title;
+
+ if(defined $self->force_title) {
+ $title = $self->force_title;
+ DEBUG and print "Forcing title to be $title\n";
+ } else {
+ # Actually try looking for the title in the document:
+ $title = $self->get_short_title();
+ unless($self->content_seen) {
+ DEBUG and print "No content seen in search for title.\n";
+ return;
+ }
+ $self->{'Title'} = $title;
+
+ if(defined $title and $title =~ m/\S/) {
+ $title = $self->title_prefix . esc($title) . $self->title_postfix;
+ } else {
+ $title = $self->default_title;
+ $title = '' unless defined $title;
+ DEBUG and print "Title defaults to $title\n";
+ }
+ }
+
+
+ my $after = $self->html_header_after_title || '';
+ if($self->html_css) {
+ my $link =
+ $self->html_css =~ m/</
+ ? $self->html_css # It's a big blob of markup, let's drop it in
+ : sprintf( # It's just a URL, so let's wrap it up
+ qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
+ $self->html_css,
+ );
+ $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind
+ }
+ $self->_add_top_anchor(\$after);
+
+ if($self->html_javascript) {
+ my $link =
+ $self->html_javascript =~ m/</
+ ? $self->html_javascript # It's a big blob of markup, let's drop it in
+ : sprintf( # It's just a URL, so let's wrap it up
+ qq[<script type="text/javascript" src="%s"></script>\n],
+ $self->html_javascript,
+ );
+ $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind
+ }
+
+ print {$self->{'output_fh'}}
+ $self->html_header_before_title || '',
+ $title, # already escaped
+ $after,
+ ;
+
+ DEBUG and print "Returning from do_beginning...\n";
+ return 1;
+}
+
+sub _add_top_anchor {
+ my($self, $text_r) = @_;
+ unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
+ $$text_r .= "<a name='___top' class='dummyTopAnchor' ></a>\n";
+ }
+ return;
+}
+
+sub version_tag_comment {
+ my $self = shift;
+ return sprintf
+ "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
+ esc(
+ ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
+ $], scalar(gmtime),
+ ), $self->_modnote(),
+ ;
+}
+
+sub _modnote {
+ my $class = ref($_[0]) || $_[0];
+ return join "\n " => grep m/\S/, split "\n",
+
+qq{
+If you want to change this HTML document, you probably shouldn't do that
+by changing it directly. Instead, see about changing the calling options
+to $class, and/or subclassing $class,
+then reconverting this document from the Pod source.
+When in doubt, email the author of $class for advice.
+See 'perldoc $class' for more info.
+};
+
+}
+
+sub do_end {
+ my $self = $_[0];
+ print {$self->{'output_fh'}} $self->html_footer || '';
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Normally this would just be a call to _do_middle_main_loop -- but we
+# have to do some elaborate things to emit all the content and then
+# summarize it and output it /before/ the content that it's a summary of.
+
+sub do_middle {
+ my $self = $_[0];
+ return $self->_do_middle_main_loop unless $self->index;
+
+ if( $self->output_string ) {
+ # An efficiency hack
+ my $out = $self->output_string; #it's a reference to it
+ my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
+ $$out .= $sneakytag;
+ $self->_do_middle_main_loop;
+ $sneakytag = quotemeta($sneakytag);
+ my $index = $self->index_as_html();
+ if( $$out =~ s/$sneakytag/$index/s ) {
+ # Expected case
+ DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n";
+ } else {
+ DEBUG and print "Odd, couldn't find where to insert the index in the output!\n";
+ # I don't think this should ever happen.
+ }
+ return 1;
+ }
+
+ unless( $self->output_fh ) {
+ require Carp;
+ Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
+ }
+
+ # If we get here, we're outputting to a FH. So we need to do some magic.
+ # Namely, divert all content to a string, which we output after the index.
+ my $fh = $self->output_fh;
+ my $content = '';
+ {
+ # Our horrible bait and switch:
+ $self->output_string( \$content );
+ $self->_do_middle_main_loop;
+ $self->abandon_output_string();
+ $self->output_fh($fh);
+ }
+ print $fh $self->index_as_html();
+ print $fh $content;
+
+ return 1;
+}
+
+###########################################################################
+
+sub index_as_html {
+ my $self = $_[0];
+ # This is meant to be called AFTER the input document has been parsed!
+
+ my $points = $self->{'PSHTML_index_points'} || [];
+
+ @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
+ # There's no point in having a 0-item or 1-item index, I dare say.
+
+ my(@out) = qq{\n<div class='indexgroup'>};
+ my $level = 0;
+
+ my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
+ foreach my $p (@$points, ['head0', '(end)']) {
+ ($tagname, $text) = @$p;
+ $anchorname = $self->section_escape($text);
+ if( $tagname =~ m{^head(\d+)$} ) {
+ $target_level = 0 + $1;
+ } else { # must be some kinda list item
+ if($previous_tagname =~ m{^head\d+$} ) {
+ $target_level = $level + 1;
+ } else {
+ $target_level = $level; # no change needed
+ }
+ }
+
+ # Get to target_level by opening or closing ULs
+ while($level > $target_level)
+ { --$level; push @out, (" " x $level) . "</ul>"; }
+ while($level < $target_level)
+ { ++$level; push @out, (" " x ($level-1))
+ . "<ul class='indexList indexList$level'>"; }
+
+ $previous_tagname = $tagname;
+ next unless $level;
+
+ $indent = ' ' x $level;
+ push @out, sprintf
+ "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
+ $indent, $level, $anchorname, esc($text)
+ ;
+ }
+ push @out, "</div>\n";
+ return join "\n", @out;
+}
+
+###########################################################################
+
+sub _do_middle_main_loop {
+ my $self = $_[0];
+ my $fh = $self->{'output_fh'};
+ my $tagmap = $self->{'Tagmap'};
+
+ my($token, $type, $tagname, $linkto, $linktype);
+ my @stack;
+ my $dont_wrap = 0;
+
+ while($token = $self->get_token) {
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ if( ($type = $token->type) eq 'start' ) {
+ if(($tagname = $token->tagname) eq 'L') {
+ $linktype = $token->attr('type') || 'insane';
+
+ $linkto = $self->do_link($token);
+
+ if(defined $linkto and length $linkto) {
+ esc($linkto);
+ # (Yes, SGML-escaping applies on top of %-escaping!
+ # But it's rarely noticeable in practice.)
+ print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
+ } else {
+ print $fh "<a>"; # Yes, an 'a' element with no attributes!
+ }
+
+ } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
+ print $fh $tagmap->{$tagname} || next;
+
+ my @to_unget;
+ while(1) {
+ push @to_unget, $self->get_token;
+ last if $to_unget[-1]->is_end
+ and $to_unget[-1]->tagname eq $tagname;
+
+ # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
+ }
+
+ my $name = $self->linearize_tokens(@to_unget);
+
+ print $fh "<a ";
+ print $fh "class='u' href='#___top' title='click to go to top of document'\n"
+ if $tagname =~ m/^head\d$/s;
+
+ if(defined $name) {
+ my $esc = esc( $self->section_name_tidy( $name ) );
+ print $fh qq[name="$esc"];
+ DEBUG and print "Linearized ", scalar(@to_unget),
+ " tokens as \"$name\".\n";
+ push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
+ if $ToIndex{ $tagname };
+ # Obviously, this discards all formatting codes (saving
+ # just their content), but ahwell.
+
+ } else { # ludicrously long, so nevermind
+ DEBUG and print "Linearized ", scalar(@to_unget),
+ " tokens, but it was too long, so nevermind.\n";
+ }
+ print $fh "\n>";
+ $self->unget_token(@to_unget);
+
+ } elsif ($tagname eq 'Data') {
+ my $next = $self->get_token;
+ next unless defined $next;
+ unless( $next->type eq 'text' ) {
+ $self->unget_token($next);
+ next;
+ }
+ DEBUG and print " raw text ", $next->text, "\n";
+ printf $fh "\n" . $next->text . "\n";
+ next;
+
+ } else {
+ if( $tagname =~ m/^over-/s ) {
+ push @stack, '';
+ } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
+ print $fh $stack[-1];
+ $stack[-1] = '';
+ }
+ print $fh $tagmap->{$tagname} || next;
+ ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
+ or $tagname eq 'X';
+ }
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ } elsif( $type eq 'end' ) {
+ if( ($tagname = $token->tagname) =~ m/^over-/s ) {
+ if( my $end = pop @stack ) {
+ print $fh $end;
+ }
+ } elsif( $tagname =~ m/^item-/s and @stack) {
+ $stack[-1] = $tagmap->{"/$tagname"};
+ if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
+ $self->unget_token($next);
+ if( $next->type eq 'start' and $next->tagname !~ m/^item-/s ) {
+ print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
+ $stack[-1] = $tagmap->{"/item-body"};
+ }
+ }
+ next;
+ }
+ print $fh $tagmap->{"/$tagname"} || next;
+ --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ } elsif( $type eq 'text' ) {
+ esc($type = $token->text); # reuse $type, why not
+ $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
+ print $fh $type;
+ }
+
+ }
+ return 1;
+}
+
+###########################################################################
+#
+
+sub do_link {
+ my($self, $token) = @_;
+ my $type = $token->attr('type');
+ if(!defined $type) {
+ $self->whine("Typeless L!?", $token->attr('start_line'));
+ } elsif( $type eq 'pod') { return $self->do_pod_link($token);
+ } elsif( $type eq 'url') { return $self->do_url_link($token);
+ } elsif( $type eq 'man') { return $self->do_man_link($token);
+ } else {
+ $self->whine("L of unknown type $type!?", $token->attr('start_line'));
+ }
+ return 'FNORG'; # should never get called
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub do_url_link { return $_[1]->attr('to') }
+
+sub do_man_link { return undef }
+ # But subclasses are welcome to override this if they have man
+ # pages somewhere URL-accessible.
+
+
+sub do_pod_link {
+ # And now things get really messy...
+ my($self, $link) = @_;
+ my $to = $link->attr('to');
+ my $section = $link->attr('section');
+ return undef unless( # should never happen
+ (defined $to and length $to) or
+ (defined $section and length $section)
+ );
+
+ $section = $self->section_escape($section)
+ if defined $section and length($section .= ''); # (stringify)
+
+ DEBUG and printf "Resolving \"%s\" \"%s\"...\n",
+ $to || "(nil)", $section || "(nil)";
+
+ {
+ # An early hack:
+ my $complete_url = $self->resolve_pod_link_by_table($to, $section);
+ if( $complete_url ) {
+ DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ",
+ $complete_url, "\n (Returning that.)\n";
+ return $complete_url;
+ } else {
+ DEBUG > 4 and print " resolve_pod_link_by_table(T,S)",
+ " didn't return anything interesting.\n";
+ }
+ }
+
+ if(defined $to and length $to) {
+ # Give this routine first hack again
+ my $there = $self->resolve_pod_link_by_table($to);
+ if(defined $there and length $there) {
+ DEBUG > 1
+ and print "resolve_pod_link_by_table(T) gives $there\n";
+ } else {
+ $there =
+ $self->resolve_pod_page_link($to, $section);
+ # (I pass it the section value, but I don't see a
+ # particular reason it'd use it.)
+ DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n";
+ unless( defined $there and length $there ) {
+ DEBUG and print "Can't resolve $to\n";
+ return undef;
+ }
+ # resolve_pod_page_link returning undef is how it
+ # can signal that it gives up on making a link
+ }
+ $to = $there;
+ }
+
+ #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n";
+
+ my $out = (defined $to and length $to) ? $to : '';
+ $out .= "#" . $section if defined $section and length $section;
+
+ unless(length $out) { # sanity check
+ DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
+ $to || "(nil)", $section || "(nil)";
+ return undef;
+ }
+
+ DEBUG and print "Resolved to $out\n";
+ return $out;
+}
+
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub section_escape {
+ my($self, $section) = @_;
+ return $self->section_url_escape(
+ $self->section_name_tidy($section)
+ );
+}
+
+sub section_name_tidy {
+ my($self, $section) = @_;
+ $section =~ tr/ /_/;
+ $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
+ $section = $self->unicode_escape_url($section);
+ $section = '_' unless length $section;
+ return $section;
+}
+
+sub section_url_escape { shift->general_url_escape(@_) }
+sub pagepath_url_escape { shift->general_url_escape(@_) }
+
+sub general_url_escape {
+ my($self, $string) = @_;
+
+ $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
+ # express Unicode things as urlencode(utf(orig)).
+
+ # A pretty conservative escaping, behoovey even for query components
+ # of a URL (see RFC 2396)
+
+ $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
+ # Yes, stipulate the list without a range, so that this can work right on
+ # all charsets that this module happens to run under.
+ # Altho, hmm, what about that ord? Presumably that won't work right
+ # under non-ASCII charsets. Something should be done
+ # about that, I guess?
+
+ return $string;
+}
+
+#--------------------------------------------------------------------------
+#
+# Oh look, a yawning portal to Hell! Let's play touch football right by it!
+#
+
+sub resolve_pod_page_link {
+ # resolve_pod_page_link must return a properly escaped URL
+ my $self = shift;
+ return $self->batch_mode()
+ ? $self->resolve_pod_page_link_batch_mode(@_)
+ : $self->resolve_pod_page_link_singleton_mode(@_)
+ ;
+}
+
+sub resolve_pod_page_link_singleton_mode {
+ my($self, $it) = @_;
+ return undef unless defined $it and length $it;
+ my $url = $self->pagepath_url_escape($it);
+
+ $url =~ s{::$}{}s; # probably never comes up anyway
+ $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
+
+ return undef unless length $url;
+ return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
+}
+
+sub resolve_pod_page_link_batch_mode {
+ my($self, $to) = @_;
+ DEBUG > 1 and print " During batch mode, resolving $to ...\n";
+ my @path = grep length($_), split m/::/s, $to, -1;
+ unless( @path ) { # sanity
+ DEBUG and print "Very odd! Splitting $to gives (nil)!\n";
+ return undef;
+ }
+ $self->batch_mode_rectify_path(\@path);
+ my $out = join('/', map $self->pagepath_url_escape($_), @path)
+ . $HTML_EXTENSION;
+ DEBUG > 1 and print " => $out\n";
+ return $out;
+}
+
+sub batch_mode_rectify_path {
+ my($self, $pathbits) = @_;
+ my $level = $self->batch_mode_current_level;
+ $level--; # how many levels up to go to get to the root
+ if($level < 1) {
+ unshift @$pathbits, '.'; # just to be pretty
+ } else {
+ unshift @$pathbits, ('..') x $level;
+ }
+ return;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub resolve_pod_link_by_table {
+ # A crazy hack to allow specifying custom L<foo> => URL mappings
+
+ return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
+
+ my($self, $to, $section) = @_;
+
+ # TODO: add a method that actually populates podhtml_LOT from a file?
+
+ if(defined $section) {
+ $to = '' unless defined $to and length $to;
+ return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
+ } else {
+ return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
+ }
+ return;
+}
+
+###########################################################################
+
+sub linearize_tokens { # self, tokens
+ my $self = shift;
+ my $out = '';
+
+ my $t;
+ while($t = shift @_) {
+ if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
+ $out .= $t; # a string, or some insane thing
+ } elsif($t->is_text) {
+ $out .= $t->text;
+ } elsif($t->is_start and $t->tag eq 'X') {
+ # Ignore until the end of this X<...> sequence:
+ my $x_open = 1;
+ while($x_open) {
+ next if( ($t = shift @_)->is_text );
+ if( $t->is_start and $t->tag eq 'X') { ++$x_open }
+ elsif($t->is_end and $t->tag eq 'X') { --$x_open }
+ }
+ }
+ }
+ return undef if length $out > $Linearization_Limit;
+ return $out;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub unicode_escape_url {
+ my($self, $string) = @_;
+ $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
+ # Turn char 1234 into "(1234)"
+ return $string;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub esc { # a function.
+ if(defined wantarray) {
+ if(wantarray) {
+ @_ = splice @_; # break aliasing
+ } else {
+ my $x = shift;
+ $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
+ return $x;
+ }
+ }
+ foreach my $x (@_) {
+ # Escape things very cautiously:
+ $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
+ if defined $x;
+ # Leave out "- so that "--" won't make it thru in X-generated comments
+ # with text in them.
+
+ # Yes, stipulate the list without a range, so that this can work right on
+ # all charsets that this module happens to run under.
+ # Altho, hmm, what about that ord? Presumably that won't work right
+ # under non-ASCII charsets. Something should be done about that.
+ }
+ return @_;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1;
+__END__
+
+=head1 NAME
+
+Pod::Simple::HTML - convert Pod to HTML
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
+
+
+=head1 DESCRIPTION
+
+This class is for making an HTML rendering of a Pod document.
+
+This is a subclass of L<Pod::Simple::PullParser> and inherits all its
+methods (and options).
+
+Note that if you want to do a batch conversion of a lot of Pod
+documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
+
+
+
+=head1 CALLING FROM THE COMMAND LINE
+
+TODO
+
+ perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
+
+
+
+=head1 CALLING FROM PERL
+
+TODO make a new object, set any options, and use parse_from_file
+
+
+=head1 METHODS
+
+TODO
+all (most?) accessorized methods
+
+
+=head1 SUBCLASSING
+
+TODO
+
+ can just set any of: html_css html_javascript title_prefix
+ 'html_header_before_title',
+ 'html_header_after_title',
+ 'html_footer',
+
+maybe override do_pod_link
+
+maybe override do_beginning do_end
+
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
+
+
+TODO: a corpus of sample Pod input and HTML output? Or common
+idioms?
+
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002-2004 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm b/ext/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
new file mode 100644
index 0000000000..cb26cabf37
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
@@ -0,0 +1,1342 @@
+
+require 5;
+package Pod::Simple::HTMLBatch;
+use strict;
+use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION
+ $CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA
+);
+$VERSION = '3.02';
+@ISA = (); # Yup, we're NOT a subclass of Pod::Simple::HTML!
+
+# TODO: nocontents stylesheets. Strike some of the color variations?
+
+use Pod::Simple::HTML ();
+BEGIN {*esc = \&Pod::Simple::HTML::esc }
+use File::Spec ();
+use UNIVERSAL ();
+ # "Isn't the Universe an amazing place? I wouldn't live anywhere else!"
+
+use Pod::Simple::Search;
+$SEARCH_CLASS ||= 'Pod::Simple::Search';
+
+BEGIN {
+ if(defined &DEBUG) { } # no-op
+ elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
+ else { *DEBUG = sub () {0}; }
+}
+
+$SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i;
+# flag to occasionally sleep for $SLEEPY - 1 seconds.
+
+$HTML_RENDER_CLASS ||= "Pod::Simple::HTML";
+
+#
+# Methods beginning with "_" are particularly internal and possibly ugly.
+#
+
+Pod::Simple::_accessorize( __PACKAGE__,
+ 'verbose', # how verbose to be during batch conversion
+ 'html_render_class', # what class to use to render
+ 'contents_file', # If set, should be the name of a file (in current directory)
+ # to write the list of all modules to
+ 'index', # will set $htmlpage->index(...) to this (true or false)
+ 'progress', # progress object
+ 'contents_page_start', 'contents_page_end',
+
+ 'css_flurry', '_css_wad', 'javascript_flurry', '_javascript_wad',
+ 'no_contents_links', # set to true to suppress automatic adding of << links.
+ '_contents',
+);
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Just so we can run from the command line more easily
+sub go {
+ @ARGV == 2 or die sprintf(
+ "Usage: perl -M%s -e %s:go indirs outdir\n (or use \"\@INC\" for indirs)\n",
+ __PACKAGE__, __PACKAGE__,
+ );
+
+ if(defined($ARGV[1]) and length($ARGV[1])) {
+ my $d = $ARGV[1];
+ -e $d or die "I see no output directory named \"$d\"\nAborting";
+ -d $d or die "But \"$d\" isn't a directory!\nAborting";
+ -w $d or die "Directory \"$d\" isn't writeable!\nAborting";
+ }
+
+ __PACKAGE__->batch_convert(@ARGV);
+}
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+sub new {
+ my $new = bless {}, ref($_[0]) || $_[0];
+ $new->html_render_class($HTML_RENDER_CLASS);
+ $new->verbose(1 + DEBUG);
+ $new->_contents([]);
+
+ $new->index(1);
+
+ $new-> _css_wad([]); $new->css_flurry(1);
+ $new->_javascript_wad([]); $new->javascript_flurry(1);
+
+ $new->contents_file(
+ 'index' . ($HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION)
+ );
+
+ $new->contents_page_start( join "\n", grep $_,
+ $Pod::Simple::HTML::Doctype_decl,
+ "<html><head>",
+ "<title>Perl Documentation</title>",
+ $Pod::Simple::HTML::Content_decl,
+ "</head>",
+ "\n<body class='contentspage'>\n<h1>Perl Documentation</h1>\n"
+ ); # override if you need a different title
+
+
+ $new->contents_page_end( sprintf(
+ "\n\n<p class='contentsfooty'>Generated by %s v%s under Perl v%s\n<br >At %s GMT, which is %s local time.</p>\n\n</body></html>\n",
+ esc(
+ ref($new),
+ eval {$new->VERSION} || $VERSION,
+ $], scalar(gmtime), scalar(localtime),
+ )));
+
+ return $new;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub muse {
+ my $self = shift;
+ if($self->verbose) {
+ print 'T+', int(time() - $self->{'_batch_start_time'}), "s: ", @_, "\n";
+ }
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub batch_convert {
+ my($self, $dirs, $outdir) = @_;
+ $self ||= __PACKAGE__; # tolerate being called as an optionless function
+ $self = $self->new unless ref $self; # tolerate being used as a class method
+
+ if(!defined($dirs) or $dirs eq '' or $dirs eq '@INC' ) {
+ $dirs = '';
+ } elsif(ref $dirs) {
+ # OK, it's an explicit set of dirs to scan, specified as an arrayref.
+ } else {
+ # OK, it's an explicit set of dirs to scan, specified as a
+ # string like "/thing:/also:/whatever/perl" (":"-delim, as usual)
+ # or, under MSWin, like "c:/thing;d:/also;c:/whatever/perl" (";"-delim!)
+ require Config;
+ my $ps = quotemeta( $Config::Config{'path_sep'} || ":" );
+ $dirs = [ grep length($_), split qr/$ps/, $dirs ];
+ }
+
+ $outdir = $self->filespecsys->curdir
+ unless defined $outdir and length $outdir;
+
+ $self->_batch_convert_main($dirs, $outdir);
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _batch_convert_main {
+ my($self, $dirs, $outdir) = @_;
+ # $dirs is either false, or an arrayref.
+ # $outdir is a pathspec.
+
+ $self->{'_batch_start_time'} ||= time();
+
+ $self->muse( "= ", scalar(localtime) );
+ $self->muse( "Starting batch conversion to \"$outdir\"" );
+
+ my $progress = $self->progress;
+ if(!$progress and $self->verbose > 0 and $self->verbose() <= 5) {
+ require Pod::Simple::Progress;
+ $progress = Pod::Simple::Progress->new(
+ ($self->verbose < 2) ? () # Default omission-delay
+ : ($self->verbose == 2) ? 1 # Reduce the omission-delay
+ : 0 # Eliminate the omission-delay
+ );
+ $self->progress($progress);
+ }
+
+ if($dirs) {
+ $self->muse(scalar(@$dirs), " dirs to scan: @$dirs");
+ } else {
+ $self->muse("Scanning \@INC. This could take a minute or two.");
+ }
+ my $mod2path = $self->find_all_pods($dirs ? $dirs : ());
+ $self->muse("Done scanning.");
+
+ my $total = keys %$mod2path;
+ unless($total) {
+ $self->muse("No pod found. Aborting batch conversion.\n");
+ return $self;
+ }
+
+ $progress and $progress->goal($total);
+ $self->muse("Now converting pod files to HTML.",
+ ($total > 25) ? " This will take a while more." : ()
+ );
+
+ $self->_spray_css( $outdir );
+ $self->_spray_javascript( $outdir );
+
+ $self->_do_all_batch_conversions($mod2path, $outdir);
+
+ $progress and $progress->done(sprintf (
+ "Done converting %d files.", $self->{"__batch_conv_page_count"}
+ ));
+ return $self->_batch_convert_finish($outdir);
+ return $self;
+}
+
+
+sub _do_all_batch_conversions {
+ my($self, $mod2path, $outdir) = @_;
+ $self->{"__batch_conv_page_count"} = 0;
+
+ foreach my $module (sort {lc($a) cmp lc($b)} keys %$mod2path) {
+ $self->_do_one_batch_conversion($module, $mod2path, $outdir);
+ sleep($SLEEPY - 1) if $SLEEPY;
+ }
+
+ return;
+}
+
+sub _batch_convert_finish {
+ my($self, $outdir) = @_;
+ $self->write_contents_file($outdir);
+ $self->muse("Done with batch conversion. $$self{'__batch_conv_page_count'} files done.");
+ $self->muse( "= ", scalar(localtime) );
+ $self->progress and $self->progress->done("All done!");
+ return;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _do_one_batch_conversion {
+ my($self, $module, $mod2path, $outdir, $outfile) = @_;
+
+ my $retval;
+ my $total = scalar keys %$mod2path;
+ my $infile = $mod2path->{$module};
+ my @namelets = grep m/\S/, split "::", $module;
+ # this can stick around in the contents LoL
+ my $depth = scalar @namelets;
+ die "Contentless thingie?! $module $infile" unless @namelets; #sanity
+
+ $outfile ||= do {
+ my @n = @namelets;
+ $n[-1] .= $HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION;
+ $self->filespecsys->catfile( $outdir, @n );
+ };
+
+ my $progress = $self->progress;
+
+ my $page = $self->html_render_class->new;
+ if(DEBUG > 5) {
+ $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: ",
+ ref($page), " render ($depth) $module => $outfile");
+ } elsif(DEBUG > 2) {
+ $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: $module => $outfile")
+ }
+
+ # Give each class a chance to init the converter:
+
+ $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth)
+ if $page->can('batch_mode_page_object_init');
+ $self->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth)
+ if $self->can('batch_mode_page_object_init');
+
+ # Now get busy...
+ $self->makepath($outdir => \@namelets);
+
+ $progress and $progress->reach($self->{"__batch_conv_page_count"}, "Rendering $module");
+
+ if( $retval = $page->parse_from_file($infile, $outfile) ) {
+ ++ $self->{"__batch_conv_page_count"} ;
+ $self->note_for_contents_file( \@namelets, $infile, $outfile );
+ } else {
+ $self->muse("Odd, parse_from_file(\"$infile\", \"$outfile\") returned false.");
+ }
+
+ $page->batch_mode_page_object_kill($self, $module, $infile, $outfile, $depth)
+ if $page->can('batch_mode_page_object_kill');
+ # The following isn't a typo. Note that it switches $self and $page.
+ $self->batch_mode_page_object_kill($page, $module, $infile, $outfile, $depth)
+ if $self->can('batch_mode_page_object_kill');
+
+ DEBUG > 4 and printf "%s %sb < $infile %s %sb\n",
+ $outfile, -s $outfile, $infile, -s $infile
+ ;
+
+ undef($page);
+ return $retval;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+sub filespecsys { $_[0]{'_filespecsys'} || 'File::Spec' }
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub note_for_contents_file {
+ my($self, $namelets, $infile, $outfile) = @_;
+
+ # I think the infile and outfile parts are never used. -- SMB
+ # But it's handy to have them around for debugging.
+
+ if( $self->contents_file ) {
+ my $c = $self->_contents();
+ push @$c,
+ [ join("::", @$namelets), $infile, $outfile, $namelets ]
+ # 0 1 2 3
+ ;
+ DEBUG > 3 and print "Noting @$c[-1]\n";
+ }
+ return;
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub write_contents_file {
+ my($self, $outdir) = @_;
+ my $outfile = $self->_contents_filespec($outdir) || return;
+
+ $self->muse("Preparing list of modules for ToC");
+
+ my($toplevel, # maps toplevelbit => [all submodules]
+ $toplevel_form_freq, # ends up being 'foo' => 'Foo'
+ ) = $self->_prep_contents_breakdown;
+
+ my $Contents = eval { $self->_wopen($outfile) };
+ if( $Contents ) {
+ $self->muse( "Writing contents file $outfile" );
+ } else {
+ warn "Couldn't write-open contents file $outfile: $!\nAbort writing to $outfile at all";
+ return;
+ }
+
+ $self->_write_contents_start( $Contents, $outfile, );
+ $self->_write_contents_middle( $Contents, $outfile, $toplevel, $toplevel_form_freq );
+ $self->_write_contents_end( $Contents, $outfile, );
+ return $outfile;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _write_contents_start {
+ my($self, $Contents, $outfile) = @_;
+ my $starter = $self->contents_page_start || '';
+
+ {
+ my $css_wad = $self->_css_wad_to_markup(1);
+ if( $css_wad ) {
+ $starter =~ s{(</head>)}{\n$css_wad\n$1}i; # otherwise nevermind
+ }
+
+ my $javascript_wad = $self->_javascript_wad_to_markup(1);
+ if( $javascript_wad ) {
+ $starter =~ s{(</head>)}{\n$javascript_wad\n$1}i; # otherwise nevermind
+ }
+ }
+
+ unless(print $Contents $starter, "<dl class='superindex'>\n" ) {
+ warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all";
+ close($Contents);
+ return 0;
+ }
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _write_contents_middle {
+ my($self, $Contents, $outfile, $toplevel2submodules, $toplevel_form_freq) = @_;
+
+ foreach my $t (sort keys %$toplevel2submodules) {
+ my @downlines = sort {$a->[-1] cmp $b->[-1]}
+ @{ $toplevel2submodules->{$t} };
+
+ printf $Contents qq[<dt><a name="%s">%s</a></dt>\n<dd>\n],
+ esc( $t, $toplevel_form_freq->{$t} )
+ ;
+
+ my($path, $name);
+ foreach my $e (@downlines) {
+ $name = $e->[0];
+ $path = join( "/", '.', esc( @{$e->[3]} ) )
+ . ($HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION);
+ print $Contents qq{ <a href="$path">}, esc($name), "</a>&nbsp;&nbsp;\n";
+ }
+ print $Contents "</dd>\n\n";
+ }
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _write_contents_end {
+ my($self, $Contents, $outfile) = @_;
+ unless(
+ print $Contents "</dl>\n",
+ $self->contents_page_end || '',
+ ) {
+ warn "Couldn't write to $outfile: $!";
+ }
+ close($Contents) or warn "Couldn't close $outfile: $!";
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _prep_contents_breakdown {
+ my($self) = @_;
+ my $contents = $self->_contents;
+ my %toplevel; # maps lctoplevelbit => [all submodules]
+ my %toplevel_form_freq; # ends up being 'foo' => 'Foo'
+ # (mapping anycase forms to most freq form)
+
+ foreach my $entry (@$contents) {
+ my $toplevel =
+ $entry->[0] =~ m/^perl\w*$/ ? 'perl_core_docs'
+ # group all the perlwhatever docs together
+ : $entry->[3][0] # normal case
+ ;
+ ++$toplevel_form_freq{ lc $toplevel }{ $toplevel };
+ push @{ $toplevel{ lc $toplevel } }, $entry;
+ push @$entry, lc($entry->[0]); # add a sort-order key to the end
+ }
+
+ foreach my $toplevel (sort keys %toplevel) {
+ my $fgroup = $toplevel_form_freq{$toplevel};
+ $toplevel_form_freq{$toplevel} =
+ (
+ sort { $fgroup->{$b} <=> $fgroup->{$a} or $a cmp $b }
+ keys %$fgroup
+ # This hash is extremely unlikely to have more than 4 members, so this
+ # sort isn't so very wasteful
+ )[0];
+ }
+
+ return(\%toplevel, \%toplevel_form_freq) if wantarray;
+ return \%toplevel;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _contents_filespec {
+ my($self, $outdir) = @_;
+ my $outfile = $self->contents_file;
+ return unless $outfile;
+ return $self->filespecsys->catfile( $outdir, $outfile );
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub makepath {
+ my($self, $outdir, $namelets) = @_;
+ return unless @$namelets > 1;
+ for my $i (0 .. ($#$namelets - 1)) {
+ my $dir = $self->filespecsys->catdir( $outdir, @$namelets[0 .. $i] );
+ if(-e $dir) {
+ die "$dir exists but not as a directory!?" unless -d $dir;
+ next;
+ }
+ DEBUG > 3 and print " Making $dir\n";
+ mkdir $dir, 0777
+ or die "Can't mkdir $dir: $!\nAborting"
+ ;
+ }
+ return;
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub batch_mode_page_object_init {
+ my $self = shift;
+ my($page, $module, $infile, $outfile, $depth) = @_;
+
+ # TODO: any further options to percolate onto this new object here?
+
+ $page->default_title($module);
+ $page->index( $self->index );
+
+ $page->html_css( $self-> _css_wad_to_markup($depth) );
+ $page->html_javascript( $self->_javascript_wad_to_markup($depth) );
+
+ $self->add_header_backlink($page, $module, $infile, $outfile, $depth);
+ $self->add_footer_backlink($page, $module, $infile, $outfile, $depth);
+
+
+ return $self;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub add_header_backlink {
+ my $self = shift;
+ return if $self->no_contents_links;
+ my($page, $module, $infile, $outfile, $depth) = @_;
+ $page->html_header_after_title( join '',
+ $page->html_header_after_title || '',
+
+ qq[<p class="backlinktop"><b><a name="___top" href="],
+ $self->url_up_to_contents($depth),
+ qq[" accesskey="1" title="All Documents">&lt;&lt;</a></b></p>\n],
+ )
+ if $self->contents_file
+ ;
+ return;
+}
+
+sub add_footer_backlink {
+ my $self = shift;
+ return if $self->no_contents_links;
+ my($page, $module, $infile, $outfile, $depth) = @_;
+ $page->html_footer( join '',
+ qq[<p class="backlinkbottom"><b><a name="___bottom" href="],
+ $self->url_up_to_contents($depth),
+ qq[" title="All Documents">&lt;&lt;</a></b></p>\n],
+
+ $page->html_footer || '',
+ )
+ if $self->contents_file
+ ;
+ return;
+}
+
+sub url_up_to_contents {
+ my($self, $depth) = @_;
+ --$depth;
+ return join '/', ('..') x $depth, esc($self->contents_file);
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub find_all_pods {
+ my($self, $dirs) = @_;
+ # You can override find_all_pods in a subclass if you want to
+ # do extra filtering or whatnot. But for the moment, we just
+ # pass to modnames2paths:
+ return $self->modnames2paths($dirs);
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub modnames2paths { # return a hashref mapping modulenames => paths
+ my($self, $dirs) = @_;
+
+ my $m2p;
+ {
+ my $search = $SEARCH_CLASS->new;
+ DEBUG and print "Searching via $search\n";
+ $search->verbose(1) if DEBUG > 10;
+ $search->progress( $self->progress->copy->goal(0) ) if $self->progress;
+ $search->shadows(0); # don't bother noting shadowed files
+ $search->inc( $dirs ? 0 : 1 );
+ $search->survey( $dirs ? @$dirs : () );
+ $m2p = $search->name2path;
+ die "What, no name2path?!" unless $m2p;
+ }
+
+ $self->muse("That's odd... no modules found!") unless keys %$m2p;
+ if( DEBUG > 4 ) {
+ print "Modules found (name => path):\n";
+ foreach my $m (sort {lc($a) cmp lc($b)} keys %$m2p) {
+ print " $m $$m2p{$m}\n";
+ }
+ print "(total ", scalar(keys %$m2p), ")\n\n";
+ } elsif( DEBUG ) {
+ print "Found ", scalar(keys %$m2p), " modules.\n";
+ }
+ $self->muse( "Found ", scalar(keys %$m2p), " modules." );
+
+ # return the Foo::Bar => /whatever/Foo/Bar.pod|pm hashref
+ return $m2p;
+}
+
+#===========================================================================
+
+sub _wopen {
+ # this is abstracted out so that the daemon class can override it
+ my($self, $outpath) = @_;
+ require Symbol;
+ my $out_fh = Symbol::gensym();
+ DEBUG > 5 and print "Write-opening to $outpath\n";
+ return $out_fh if open($out_fh, "> $outpath");
+ require Carp;
+ Carp::croak("Can't write-open $outpath: $!");
+}
+
+#==========================================================================
+
+sub add_css {
+ my($self, $url, $is_default, $name, $content_type, $media, $_code) = @_;
+ return unless $url;
+ unless($name) {
+ # cook up a reasonable name based on the URL
+ $name = $url;
+ if( $name !~ m/\?/ and $name =~ m{([^/]+)$}s ) {
+ $name = $1;
+ $name =~ s/\.css//i;
+ }
+ }
+ $media ||= 'all';
+ $content_type ||= 'text/css';
+
+ my $bunch = [$url, $name, $content_type, $media, $_code];
+ if($is_default) { unshift @{ $self->_css_wad }, $bunch }
+ else { push @{ $self->_css_wad }, $bunch }
+ return;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _spray_css {
+ my($self, $outdir) = @_;
+
+ return unless $self->css_flurry();
+ $self->_gen_css_wad();
+
+ my $lol = $self->_css_wad;
+ foreach my $chunk (@$lol) {
+ my $url = $chunk->[0];
+ my $outfile;
+ if( ref($chunk->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.css$)} ) {
+ $outfile = $self->filespecsys->catfile( $outdir, "$1" );
+ DEBUG > 5 and print "Noting $$chunk[0] as a file I'll create.\n";
+ } else {
+ DEBUG > 5 and print "OK, noting $$chunk[0] as an external CSS.\n";
+ # Requires no further attention.
+ next;
+ }
+
+ #$self->muse( "Writing autogenerated CSS file $outfile" );
+ my $Cssout = $self->_wopen($outfile);
+ print $Cssout ${$chunk->[-1]}
+ or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all";
+ close($Cssout);
+ DEBUG > 5 and print "Wrote $outfile\n";
+ }
+
+ return;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _css_wad_to_markup {
+ my($self, $depth) = @_;
+
+ my @css = @{ $self->_css_wad || return '' };
+ return '' unless @css;
+
+ my $rel = 'stylesheet';
+ my $out = '';
+
+ --$depth;
+ my $uplink = $depth ? ('../' x $depth) : '';
+
+ foreach my $chunk (@css) {
+ next unless $chunk and @$chunk;
+
+ my( $url1, $url2, $title, $type, $media) = (
+ $self->_maybe_uplink( $chunk->[0], $uplink ),
+ esc(grep !ref($_), @$chunk)
+ );
+
+ $out .= qq{<link rel="$rel" title="$title" type="$type" href="$url1$url2" media="$media" >\n};
+
+ $rel = 'alternate stylesheet'; # alternates = all non-first iterations
+ }
+ return $out;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+sub _maybe_uplink {
+ # if the given URL looks relative, return the given uplink string --
+ # otherwise return emptystring
+ my($self, $url, $uplink) = @_;
+ ($url =~ m{^\./} or $url !~ m{[/\:]} )
+ ? $uplink
+ : ''
+ # qualify it, if/as needed
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+sub _gen_css_wad {
+ my $self = $_[0];
+ my $css_template = $self->_css_template;
+ foreach my $variation (
+
+ # Commented out for sake of concision:
+ #
+ # 011n=black_with_red_on_white
+ # 001n=black_with_yellow_on_white
+ # 101n=black_with_green_on_white
+ # 110=white_with_yellow_on_black
+ # 010=white_with_green_on_black
+ # 011=white_with_blue_on_black
+ # 100=white_with_red_on_black
+
+ qw[
+ 110n=black_with_blue_on_white
+ 010n=black_with_magenta_on_white
+ 100n=black_with_cyan_on_white
+
+ 101=white_with_purple_on_black
+ 001=white_with_navy_blue_on_black
+
+ 010a=grey_with_green_on_black
+ 010b=white_with_green_on_grey
+ 101an=black_with_green_on_grey
+ 101bn=grey_with_green_on_white
+ ]) {
+
+ my $outname = $variation;
+ my($flipmode, @swap) = ( ($4 || ''), $1,$2,$3)
+ if $outname =~ s/^([012])([012])([[012])([a-z]*)=?//s;
+ @swap = () if '010' eq join '', @swap; # 010 is a swop-no-op!
+
+ my $this_css =
+ "/* This file is autogenerated. Do not edit. $variation */\n\n"
+ . $css_template;
+
+ # Only look at three-digitty colors, for now at least.
+ if( $flipmode =~ m/n/ ) {
+ $this_css =~ s/(#[0-9a-fA-F]{3})\b/_color_negate($1)/eg;
+ $this_css =~ s/\bthin\b/medium/g;
+ }
+ $this_css =~ s<#([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])\b>
+ < join '', '#', ($1,$2,$3)[@swap] >eg if @swap;
+
+ if( $flipmode =~ m/a/)
+ { $this_css =~ s/#fff\b/#999/gi } # black -> dark grey
+ elsif($flipmode =~ m/b/)
+ { $this_css =~ s/#000\b/#666/gi } # white -> light grey
+
+ my $name = $outname;
+ $name =~ tr/-_/ /;
+ $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css);
+ }
+
+ # Now a few indexless variations:
+ foreach my $variation (qw[
+ black_with_blue_on_white white_with_purple_on_black
+ white_with_green_on_grey grey_with_green_on_white
+ ]) {
+ my $outname = "indexless_$variation";
+ my $this_css = join "\n",
+ "/* This file is autogenerated. Do not edit. $outname */\n",
+ "\@import url(\"./_$variation.css\");",
+ ".indexgroup { display: none; }",
+ "\n",
+ ;
+ my $name = $outname;
+ $name =~ tr/-_/ /;
+ $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css);
+ }
+
+ return;
+}
+
+sub _color_negate {
+ my $x = lc $_[0];
+ $x =~ tr[0123456789abcdef]
+ [fedcba9876543210];
+ return $x;
+}
+
+#===========================================================================
+
+sub add_javascript {
+ my($self, $url, $content_type, $_code) = @_;
+ return unless $url;
+ push @{ $self->_javascript_wad }, [
+ $url, $content_type || 'text/javascript', $_code
+ ];
+ return;
+}
+
+sub _spray_javascript {
+ my($self, $outdir) = @_;
+ return unless $self->javascript_flurry();
+ $self->_gen_javascript_wad();
+
+ my $lol = $self->_javascript_wad;
+ foreach my $script (@$lol) {
+ my $url = $script->[0];
+ my $outfile;
+
+ if( ref($script->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.js$)} ) {
+ $outfile = $self->filespecsys->catfile( $outdir, "$1" );
+ DEBUG > 5 and print "Noting $$script[0] as a file I'll create.\n";
+ } else {
+ DEBUG > 5 and print "OK, noting $$script[0] as an external JavaScript.\n";
+ next;
+ }
+
+ #$self->muse( "Writing JavaScript file $outfile" );
+ my $Jsout = $self->_wopen($outfile);
+
+ print $Jsout ${$script->[-1]}
+ or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all";
+ close($Jsout);
+ DEBUG > 5 and print "Wrote $outfile\n";
+ }
+
+ return;
+}
+
+sub _gen_javascript_wad {
+ my $self = $_[0];
+ my $js_code = $self->_javascript || return;
+ $self->add_javascript( "_podly.js", 0, \$js_code);
+ return;
+}
+
+sub _javascript_wad_to_markup {
+ my($self, $depth) = @_;
+
+ my @scripts = @{ $self->_javascript_wad || return '' };
+ return '' unless @scripts;
+
+ my $out = '';
+
+ --$depth;
+ my $uplink = $depth ? ('../' x $depth) : '';
+
+ foreach my $s (@scripts) {
+ next unless $s and @$s;
+
+ my( $url1, $url2, $type, $media) = (
+ $self->_maybe_uplink( $s->[0], $uplink ),
+ esc(grep !ref($_), @$s)
+ );
+
+ $out .= qq{<script type="$type" src="$url1$url2"></script>\n};
+ }
+ return $out;
+}
+
+#===========================================================================
+
+sub _css_template { return $CSS }
+sub _javascript { return $JAVASCRIPT }
+
+$CSS = <<'EOCSS';
+/* For accessibility reasons, never specify text sizes in px/pt/pc/in/cm/mm */
+
+@media all { .hide { display: none; } }
+
+@media print {
+ .noprint, div.indexgroup, .backlinktop, .backlinkbottom { display: none }
+
+ * {
+ border-color: black !important;
+ color: black !important;
+ background-color: transparent !important;
+ background-image: none !important;
+ }
+
+ dl.superindex > dd {
+ word-spacing: .6em;
+ }
+}
+
+@media aural, braille, embossed {
+ div.indexgroup { display: none; } /* Too noisy, don't you think? */
+ dl.superindex > dt:before { content: "Group "; }
+ dl.superindex > dt:after { content: " contains:"; }
+ .backlinktop a:before { content: "Back to contents"; }
+ .backlinkbottom a:before { content: "Back to contents"; }
+}
+
+@media aural {
+ dl.superindex > dt { pause-before: 600ms; }
+}
+
+@media screen, tty, tv, projection {
+ .noscreen { display: none; }
+
+ a:link { color: #7070ff; text-decoration: underline; }
+ a:visited { color: #e030ff; text-decoration: underline; }
+ a:active { color: #800000; text-decoration: underline; }
+ body.contentspage a { text-decoration: none; }
+ a.u { color: #fff !important; text-decoration: none; }
+
+ body.pod {
+ margin: 0 5px;
+ color: #fff;
+ background-color: #000;
+ }
+
+ body.pod h1, body.pod h2, body.pod h3, body.pod h4 {
+ font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif;
+ font-weight: normal;
+ margin-top: 1.2em;
+ margin-bottom: .1em;
+ border-top: thin solid transparent;
+ /* margin-left: -5px; border-left: 2px #7070ff solid; padding-left: 3px; */
+ }
+
+ body.pod h1 { border-top-color: #0a0; }
+ body.pod h2 { border-top-color: #080; }
+ body.pod h3 { border-top-color: #040; }
+ body.pod h4 { border-top-color: #010; }
+
+ p.backlinktop + h1 { border-top: none; margin-top: 0em; }
+ p.backlinktop + h2 { border-top: none; margin-top: 0em; }
+ p.backlinktop + h3 { border-top: none; margin-top: 0em; }
+ p.backlinktop + h4 { border-top: none; margin-top: 0em; }
+
+ body.pod dt {
+ font-size: 105%; /* just a wee bit more than normal */
+ }
+
+ .indexgroup { font-size: 80%; }
+
+ .backlinktop, .backlinkbottom {
+ margin-left: -5px;
+ margin-right: -5px;
+ background-color: #040;
+ border-top: thin solid #050;
+ border-bottom: thin solid #050;
+ }
+
+ .backlinktop a, .backlinkbottom a {
+ text-decoration: none;
+ color: #080;
+ background-color: #000;
+ border: thin solid #0d0;
+ }
+ .backlinkbottom { margin-bottom: 0; padding-bottom: 0; }
+ .backlinktop { margin-top: 0; padding-top: 0; }
+
+ body.contentspage {
+ color: #fff;
+ background-color: #000;
+ }
+
+ body.contentspage h1 {
+ color: #0d0;
+ margin-left: 1em;
+ margin-right: 1em;
+ text-indent: -.9em;
+ font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif;
+ font-weight: normal;
+ border-top: thin solid #fff;
+ border-bottom: thin solid #fff;
+ text-align: center;
+ }
+
+ dl.superindex > dt {
+ font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif;
+ font-weight: normal;
+ font-size: 90%;
+ margin-top: .45em;
+ /* margin-bottom: -.15em; */
+ }
+ dl.superindex > dd {
+ word-spacing: .6em; /* most important rule here! */
+ }
+ dl.superindex > a:link {
+ text-decoration: none;
+ color: #fff;
+ }
+
+ .contentsfooty {
+ border-top: thin solid #999;
+ font-size: 90%;
+ }
+
+}
+
+/* The End */
+
+EOCSS
+
+#==========================================================================
+
+$JAVASCRIPT = <<'EOJAVASCRIPT';
+
+// From http://www.alistapart.com/articles/alternate/
+
+function setActiveStyleSheet(title) {
+ var i, a, main;
+ for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) {
+ if(a.getAttribute("rel").indexOf("style") != -1 && a.getAttribute("title")) {
+ a.disabled = true;
+ if(a.getAttribute("title") == title) a.disabled = false;
+ }
+ }
+}
+
+function getActiveStyleSheet() {
+ var i, a;
+ for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) {
+ if( a.getAttribute("rel").indexOf("style") != -1
+ && a.getAttribute("title")
+ && !a.disabled
+ ) return a.getAttribute("title");
+ }
+ return null;
+}
+
+function getPreferredStyleSheet() {
+ var i, a;
+ for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) {
+ if( a.getAttribute("rel").indexOf("style") != -1
+ && a.getAttribute("rel").indexOf("alt") == -1
+ && a.getAttribute("title")
+ ) return a.getAttribute("title");
+ }
+ return null;
+}
+
+function createCookie(name,value,days) {
+ if (days) {
+ var date = new Date();
+ date.setTime(date.getTime()+(days*24*60*60*1000));
+ var expires = "; expires="+date.toGMTString();
+ }
+ else expires = "";
+ document.cookie = name+"="+value+expires+"; path=/";
+}
+
+function readCookie(name) {
+ var nameEQ = name + "=";
+ var ca = document.cookie.split(';');
+ for(var i=0 ; i < ca.length ; i++) {
+ var c = ca[i];
+ while (c.charAt(0)==' ') c = c.substring(1,c.length);
+ if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length,c.length);
+ }
+ return null;
+}
+
+window.onload = function(e) {
+ var cookie = readCookie("style");
+ var title = cookie ? cookie : getPreferredStyleSheet();
+ setActiveStyleSheet(title);
+}
+
+window.onunload = function(e) {
+ var title = getActiveStyleSheet();
+ createCookie("style", title, 365);
+}
+
+var cookie = readCookie("style");
+var title = cookie ? cookie : getPreferredStyleSheet();
+setActiveStyleSheet(title);
+
+// The End
+
+EOJAVASCRIPT
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+1;
+__END__
+
+
+=head1 NAME
+
+Pod::Simple::HTMLBatch - convert several Pod files to several HTML files
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::HTMLBatch -e 'Pod::Simple::HTMLBatch::go' in out
+
+
+=head1 DESCRIPTION
+
+This module is used for running batch-conversions of a lot of HTML
+documents
+
+This class is NOT a subclass of Pod::Simple::HTML
+(nor of bad old Pod::Html) -- although it uses
+Pod::Simple::HTML for doing the conversion of each document.
+
+The normal use of this class is like so:
+
+ use Pod::Simple::HTMLBatch;
+ my $batchconv = Pod::Simple::HTMLBatch->new;
+ $batchconv->some_option( some_value );
+ $batchconv->some_other_option( some_other_value );
+ $batchconv->batch_convert( \@search_dirs, $output_dir );
+
+=head2 FROM THE COMMAND LINE
+
+Note that this class also provides
+(but does not export) the function Pod::Simple::HTMLBatch::go.
+This is basically just a shortcut for C<<
+Pod::Simple::HTMLBatch->batch_convert(@ARGV) >>.
+It's meant to be handy for calling from the command line.
+
+However, the shortcut requires that you specify exactly two command-line
+arguments, C<indirs> and C<outdir>.
+
+Example:
+
+ % mkdir out_html
+ % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go @INC out_html
+ (to convert the pod from Perl's @INC
+ files under the directory ../htmlversion)
+
+(Note that the command line there contains a literal atsign-I-N-C. This
+is handled as a special case by batch_convert, in order to save you having
+to enter the odd-looking "" as the first command-line parameter when you
+mean "just use whatever's in @INC".)
+
+Example:
+
+ % mkdir ../seekrut
+ % chmod og-rx ../seekrut
+ % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go . ../htmlversion
+ (to convert the pod under the current dir into HTML
+ files under the directory ../htmlversion)
+
+Example:
+
+ % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go happydocs .
+ (to convert all pod from happydocs into the current directory)
+
+
+
+=head1 MAIN METHODS
+
+=over
+
+=item $batchconv = Pod::Simple::HTMLBatch->new;
+
+This TODO
+
+
+=item $batchconv->batch_convert( I<indirs>, I<outdir> );
+
+this TODO
+
+=item $batchconv->batch_convert( undef , ...);
+
+=item $batchconv->batch_convert( q{@INC}, ...);
+
+These two values for I<indirs> specify that the normal Perl @INC
+
+=item $batchconv->batch_convert( \@dirs , ...);
+
+This specifies that the input directories are the items in
+the arrayref C<\@dirs>.
+
+=item $batchconv->batch_convert( "somedir" , ...);
+
+This specifies that the director "somedir" is the input.
+(This can be an absolute or relative path, it doesn't matter.)
+
+A common value you might want would be just "." for the current
+directory:
+
+ $batchconv->batch_convert( "." , ...);
+
+
+=item $batchconv->batch_convert( 'somedir:someother:also' , ...);
+
+This specifies that you want the dirs "somedir", "somother", and "also"
+scanned, just as if you'd passed the arrayref
+C<[qw( somedir someother also)]>. Note that a ":"-separator is normal
+under Unix, but Under MSWin, you'll need C<'somedir;someother;also'>
+instead, since the pathsep on MSWin is ";" instead of ":". (And
+I<that> is because ":" often comes up in paths, like
+C<"c:/perl/lib">.)
+
+(Exactly what separator character should be used, is gotten from
+C<$Config::Config{'path_sep'}>, via the L<Config> module.)
+
+=item $batchconv->batch_convert( ... , undef );
+
+This specifies that you want the HTML output to go into the current
+directory.
+
+(Note that a missing or undefined value means a different thing in
+the first slot than in the second. That's so that C<batch_convert()>
+with no arguments (or undef arguments) means "go from @INC, into
+the current directory.)
+
+=item $batchconv->batch_convert( ... , 'somedir' );
+
+This specifies that you want the HTML output to go into the
+directory 'somedir'.
+(This can be an absolute or relative path, it doesn't matter.)
+
+=back
+
+
+Note that you can also call C<batch_convert> as a class method,
+like so:
+
+ Pod::Simple::HTMLBatch->batch_convert( ... );
+
+That is just short for this:
+
+ Pod::Simple::HTMLBatch-> new-> batch_convert(...);
+
+That is, it runs a conversion with default options, for
+whatever inputdirs and output dir you specify.
+
+
+=head2 ACCESSOR METHODS
+
+The following are all accessor methods -- that is, they don't do anything
+on their own, but just alter the contents of the conversion object,
+which comprises the options for this particular batch conversion.
+
+We show the "put" form of the accessors below (i.e., the syntax you use
+for setting the accessor to a specific value). But you can also
+call each method with no parameters to get its current value. For
+example, C<< $self->contents_file() >> returns the current value of
+the contents_file attribute.
+
+=over
+
+
+=item $batchconv->verbose( I<nonnegative_integer> );
+
+This controls how verbose to be during batch conversion, as far as
+notes to STDOUT (or whatever is C<select>'d) about how the conversion
+is going. If 0, no progress information is printed.
+If 1 (the default value), some progress information is printed.
+Higher values print more information.
+
+
+=item $batchconv->index( I<true-or-false> );
+
+This controls whether or not each HTML page is liable to have a little
+table of contents at the top (which we call an "index" for historical
+reasons). This is true by default.
+
+
+=item $batchconv->contents_file( I<filename> );
+
+If set, should be the name of a file (in the output directory)
+to write the HTML index to. The default value is "index.html".
+If you set this to a false value, no contents file will be written.
+
+=item $batchconv->contents_page_start( I<HTML_string> );
+
+This specifies what string should be put at the beginning of
+the contents page.
+The default is a string more or less like this:
+
+ <html>
+ <head><title>Perl Documentation</title></head>
+ <body class='contentspage'>
+ <h1>Perl Documentation</h1>
+
+=item $batchconv->contents_page_end( I<HTML_string> );
+
+This specifies what string should be put at the end of the contents page.
+The default is a string more or less like this:
+
+ <p class='contentsfooty'>Generated by
+ Pod::Simple::HTMLBatch v3.01 under Perl v5.008
+ <br >At Fri May 14 22:26:42 2004 GMT,
+ which is Fri May 14 14:26:42 2004 local time.</p>
+
+
+
+=item $batchconv->add_css( $url );
+
+TODO
+
+=item $batchconv->add_javascript( $url );
+
+TODO
+
+=item $batchconv->css_flurry( I<true-or-false> );
+
+If true (the default value), we autogenerate some CSS files in the
+output directory, and set our HTML files to use those.
+TODO: continue
+
+=item $batchconv->javascript_flurry( I<true-or-false> );
+
+If true (the default value), we autogenerate a JavaScript in the
+output directory, and set our HTML files to use it. Currently,
+the JavaScript is used only to get the browser to remember what
+stylesheet it prefers.
+TODO: continue
+
+=item $batchconv->no_contents_links( I<true-or-false> );
+
+TODO
+
+=item $batchconv->html_render_class( I<classname> );
+
+This sets what class is used for rendering the files.
+The default is "Pod::Simple::Search". If you set it to something else,
+it should probably be a subclass of Pod::Simple::Search, and you should
+C<require> or C<use> that class so that's it's loaded before
+Pod::Simple::HTMLBatch tries loading it.
+
+=back
+
+
+
+
+=head1 NOTES ON CUSTOMIZATION
+
+TODO
+
+ call add_css($someurl) to add stylesheet as alternate
+ call add_css($someurl,1) to add as primary stylesheet
+
+ call add_javascript
+
+ subclass Pod::Simple::HTML and set $batchconv->html_render_class to
+ that classname
+ and maybe override
+ $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth)
+ or maybe override
+ $batchconv->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth)
+
+
+
+=head1 ASK ME!
+
+If you want to do some kind of big pod-to-HTML version with some
+particular kind of option that you don't see how to achieve using this
+module, email me (C<sburke@cpan.org>) and I'll probably have a good idea
+how to do it. For reasons of concision and energetic laziness, some
+methods and options in this module (and the dozen modules it depends on)
+are undocumented; but one of those undocumented bits might be just what
+you're looking for.
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::HTMLBatch>, L<perlpod>, L<perlpodspec>
+
+
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2004 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/HTMLLegacy.pm b/ext/Pod-Simple/lib/Pod/Simple/HTMLLegacy.pm
new file mode 100644
index 0000000000..f78de90144
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/HTMLLegacy.pm
@@ -0,0 +1,104 @@
+
+require 5;
+package Pod::Simple::HTMLLegacy;
+use strict;
+
+use vars qw($VERSION);
+use Getopt::Long;
+
+$VERSION = "5.01";
+
+#--------------------------------------------------------------------------
+#
+# This class is meant to thinly emulate bad old Pod::Html
+#
+# TODO: some basic docs
+
+sub pod2html {
+ my @args = (@_);
+
+ my( $verbose, $infile, $outfile, $title );
+ my $index = 1;
+
+ {
+ my($help);
+
+ my($netscape); # dummy
+ local @ARGV = @args;
+ GetOptions(
+ "help" => \$help,
+ "verbose!" => \$verbose,
+ "infile=s" => \$infile,
+ "outfile=s" => \$outfile,
+ "title=s" => \$title,
+ "index!" => \$index,
+
+ "netscape!" => \$netscape,
+ ) or return bad_opts(@args);
+ bad_opts(@args) if @ARGV; # it should be all switches!
+ return help_message() if $help;
+ }
+
+ for($infile, $outfile) { $_ = undef unless defined and length }
+
+ if($verbose) {
+ warn sprintf "%s version %s\n", __PACKAGE__, $VERSION;
+ warn "OK, processed args [@args] ...\n";
+ warn sprintf
+ " Verbose: %s\n Index: %s\n Infile: %s\n Outfile: %s\n Title: %s\n",
+ map defined($_) ? $_ : "(nil)",
+ $verbose, $index, $infile, $outfile, $title,
+ ;
+ *Pod::Simple::HTML::DEBUG = sub(){1};
+ }
+ require Pod::Simple::HTML;
+ Pod::Simple::HTML->VERSION(3);
+
+ die "No such input file as $infile\n"
+ if defined $infile and ! -e $infile;
+
+
+ my $pod = Pod::Simple::HTML->new;
+ $pod->force_title($title) if defined $title;
+ $pod->index($index);
+ return $pod->parse_from_file($infile, $outfile);
+}
+
+#--------------------------------------------------------------------------
+
+sub bad_opts { die _help_message(); }
+sub help_message { print STDOUT _help_message() }
+
+#--------------------------------------------------------------------------
+
+sub _help_message {
+
+ join '',
+
+"[", __PACKAGE__, " version ", $VERSION, qq~]
+Usage: pod2html --help --infile=<name> --outfile=<name>
+ --verbose --index --noindex
+
+Options:
+ --help - prints this message.
+ --[no]index - generate an index at the top of the resulting html
+ (default behavior).
+ --infile - filename for the pod to convert (input taken from stdin
+ by default).
+ --outfile - filename for the resulting html file (output sent to
+ stdout by default).
+ --title - title that will appear in resulting html file.
+ --[no]verbose - self-explanatory (off by default).
+
+Note that pod2html is DEPRECATED, and this version implements only
+ some of the options known to older versions.
+For more information, see 'perldoc pod2html'.
+~;
+
+}
+
+1;
+__END__
+
+OVER the underpass! UNDER the overpass! Around the FUTURE and BEYOND REPAIR!!
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/LinkSection.pm b/ext/Pod-Simple/lib/Pod/Simple/LinkSection.pm
new file mode 100644
index 0000000000..14c3ba85d2
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/LinkSection.pm
@@ -0,0 +1,145 @@
+
+require 5;
+package Pod::Simple::LinkSection;
+ # Based somewhat dimly on Array::Autojoin
+
+use strict;
+use Pod::Simple::BlackBox;
+
+use overload( # So it'll stringify nice
+ '""' => \&Pod::Simple::BlackBox::stringify_lol,
+ 'bool' => \&Pod::Simple::BlackBox::stringify_lol,
+ # '.=' => \&tack_on, # grudgingly support
+
+ 'fallback' => 1, # turn on cleverness
+);
+
+sub tack_on {
+ $_[0] = ['', {}, "$_[0]" ];
+ return $_[0][2] .= $_[1];
+}
+
+sub as_string {
+ goto &Pod::Simple::BlackBox::stringify_lol;
+}
+sub stringify {
+ goto &Pod::Simple::BlackBox::stringify_lol;
+}
+
+sub new {
+ my $class = shift;
+ $class = ref($class) || $class;
+ my $new;
+ if(@_ == 1) {
+ if (!ref($_[0] || '')) { # most common case: one bare string
+ return bless ['', {}, $_[0] ], $class;
+ } elsif( ref($_[0] || '') eq 'ARRAY') {
+ $new = [ @{ $_[0] } ];
+ } else {
+ Carp::croak( "$class new() doesn't know to clone $new" );
+ }
+ } else { # misc stuff
+ $new = [ '', {}, @_ ];
+ }
+
+ # By now it's a treelet: [ 'foo', {}, ... ]
+ foreach my $x (@$new) {
+ if(ref($x || '') eq 'ARRAY') {
+ $x = $class->new($x); # recurse
+ } elsif(ref($x || '') eq 'HASH') {
+ $x = { %$x };
+ }
+ # otherwise leave it.
+ }
+
+ return bless $new, $class;
+}
+
+# Not much in this class is likely to be link-section specific --
+# but it just so happens that link-sections are about the only treelets
+# that are exposed to the user.
+
+1;
+
+__END__
+
+# TODO: let it be an option whether a given subclass even wants little treelets?
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::LinkSection -- represent "section" attributes of L codes
+
+=head1 SYNOPSIS
+
+ # a long story
+
+=head1 DESCRIPTION
+
+This class is not of interest to general users.
+
+Pod::Simple uses this class for representing the value of the
+"section" attribute of "L" start-element events. Most applications
+can just use the normal stringification of objects of this class;
+they stringify to just the text content of the section,
+such as "foo" for
+C<< LZ<><Stuff/foo> >>, and "bar" for
+C<< LZ<><Stuff/bIZ<><ar>> >>.
+
+However, anyone particularly interested in getting the full value of
+the treelet, can just traverse the content of the treeleet
+@$treelet_object. To wit:
+
+
+ % perl -MData::Dumper -e
+ "use base qw(Pod::Simple::Methody);
+ sub start_L { print Dumper($_[1]{'section'} ) }
+ __PACKAGE__->new->parse_string_document('=head1 L<Foo/bI<ar>baz>>')
+ "
+Output:
+ $VAR1 = bless( [
+ '',
+ {},
+ 'b',
+ bless( [
+ 'I',
+ {},
+ 'ar'
+ ], 'Pod::Simple::LinkSection' ),
+ 'baz'
+ ], 'Pod::Simple::LinkSection' );
+
+But stringify it and you get just the text content:
+
+ % perl -MData::Dumper -e
+ "use base qw(Pod::Simple::Methody);
+ sub start_L { print Dumper( '' . $_[1]{'section'} ) }
+ __PACKAGE__->new->parse_string_document('=head1 L<Foo/bI<ar>baz>>')
+ "
+Output:
+ $VAR1 = 'barbaz';
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Methody.pm b/ext/Pod-Simple/lib/Pod/Simple/Methody.pm
new file mode 100644
index 0000000000..2ad607e61b
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Methody.pm
@@ -0,0 +1,127 @@
+
+require 5;
+package Pod::Simple::Methody;
+use strict;
+use Pod::Simple ();
+use vars qw(@ISA $VERSION);
+$VERSION = '2.02';
+@ISA = ('Pod::Simple');
+
+# Yes, we could use named variables, but I want this to be impose
+# as little an additional performance hit as possible.
+
+sub _handle_element_start {
+ $_[1] =~ tr/-:./__/;
+ ( $_[0]->can( 'start_' . $_[1] )
+ || return
+ )->(
+ $_[0], $_[2]
+ );
+}
+
+sub _handle_text {
+ ( $_[0]->can( 'handle_text' )
+ || return
+ )->(
+ @_
+ );
+}
+
+sub _handle_element_end {
+ $_[1] =~ tr/-:./__/;
+ ( $_[0]->can( 'end_' . $_[1] )
+ || return
+ )->(
+ $_[0]
+ );
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::Methody -- turn Pod::Simple events into method calls
+
+=head1 SYNOPSIS
+
+ require 5;
+ use strict;
+ package SomePodFormatter;
+ use base qw(Pod::Simple::Methody);
+
+ sub handle_text {
+ my($self, $text) = @_;
+ ...
+ }
+
+ sub start_head1 {
+ my($self, $attrs) = @_;
+ ...
+ }
+ sub end_head1 {
+ my($self) = @_;
+ ...
+ }
+
+...and start_/end_ methods for whatever other events you want to catch.
+
+=head1 DESCRIPTION
+
+This class is of
+interest to people writing Pod formatters based on Pod::Simple.
+
+This class (which is very small -- read the source) overrides
+Pod::Simple's _handle_element_start, _handle_text, and
+_handle_element_end methods so that parser events are turned into method
+calls. (Otherwise, this is a subclass of L<Pod::Simple> and inherits all
+its methods.)
+
+You can use this class as the base class for a Pod formatter/processor.
+
+=head1 METHOD CALLING
+
+When Pod::Simple sees a "=head1 Hi there", for example, it basically does
+this:
+
+ $parser->_handle_element_start( "head1", \%attributes );
+ $parser->_handle_text( "Hi there" );
+ $parser->_handle_element_end( "head1" );
+
+But if you subclass Pod::Simple::Methody, it will instead do this
+when it sees a "=head1 Hi there":
+
+ $parser->start_head1( \%attributes ) if $parser->can('start_head1');
+ $parser->handle_text( "Hi there" ) if $parser->can('handle_text');
+ $parser->end_head1() if $parser->can('end_head1');
+
+If Pod::Simple sends an event where the element name has a dash,
+period, or colon, the corresponding method name will have a underscore
+in its place. For example, "foo.bar:baz" becomes start_foo_bar_baz
+and end_foo_bar_baz.
+
+See the source for Pod::Simple::Text for an example of using this class.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::Subclassing>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Progress.pm b/ext/Pod-Simple/lib/Pod/Simple/Progress.pm
new file mode 100644
index 0000000000..bc42a952dc
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Progress.pm
@@ -0,0 +1,93 @@
+
+require 5;
+package Pod::Simple::Progress;
+$VERSION = "1.01";
+use strict;
+
+# Objects of this class are used for noting progress of an
+# operation every so often. Messages delivered more often than that
+# are suppressed.
+#
+# There's actually nothing in here that's specific to Pod processing;
+# but it's ad-hoc enough that I'm not willing to give it a name that
+# implies that it's generally useful, like "IO::Progress" or something.
+#
+# -- sburke
+#
+#--------------------------------------------------------------------------
+
+sub new {
+ my($class,$delay) = @_;
+ my $self = bless {'quiet_until' => 1}, ref($class) || $class;
+ $self->to(*STDOUT{IO});
+ $self->delay(defined($delay) ? $delay : 5);
+ return $self;
+}
+
+sub copy {
+ my $orig = shift;
+ bless {%$orig, 'quiet_until' => 1}, ref($orig);
+}
+#--------------------------------------------------------------------------
+
+sub reach {
+ my($self, $point, $note) = @_;
+ if( (my $now = time) >= $self->{'quiet_until'}) {
+ my $goal;
+ my $to = $self->{'to'};
+ print $to join('',
+ ($self->{'quiet_until'} == 1) ? () : '... ',
+ (defined $point) ? (
+ '#',
+ ($goal = $self->{'goal'}) ? (
+ ' ' x (length($goal) - length($point)),
+ $point, '/', $goal,
+ ) : $point,
+ $note ? ': ' : (),
+ ) : (),
+ $note || '',
+ "\n"
+ );
+ $self->{'quiet_until'} = $now + $self->{'delay'};
+ }
+ return $self;
+}
+
+#--------------------------------------------------------------------------
+
+sub done {
+ my($self, $note) = @_;
+ $self->{'quiet_until'} = 1;
+ return $self->reach( undef, $note );
+}
+
+#--------------------------------------------------------------------------
+# Simple accessors:
+
+sub delay {
+ return $_[0]{'delay'} if @_ == 1; $_[0]{'delay'} = $_[1]; return $_[0] }
+sub goal {
+ return $_[0]{'goal' } if @_ == 1; $_[0]{'goal' } = $_[1]; return $_[0] }
+sub to {
+ return $_[0]{'to' } if @_ == 1; $_[0]{'to' } = $_[1]; return $_[0] }
+
+#--------------------------------------------------------------------------
+
+unless(caller) { # Simple self-test:
+ my $p = __PACKAGE__->new->goal(5);
+ $p->reach(1, "Primus!");
+ sleep 1;
+ $p->reach(2, "Secundus!");
+ sleep 3;
+ $p->reach(3, "Tertius!");
+ sleep 5;
+ $p->reach(4);
+ $p->reach(5, "Quintus!");
+ sleep 1;
+ $p->done("All done");
+}
+
+#--------------------------------------------------------------------------
+1;
+__END__
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/PullParser.pm b/ext/Pod-Simple/lib/Pod/Simple/PullParser.pm
new file mode 100644
index 0000000000..15d973134c
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/PullParser.pm
@@ -0,0 +1,795 @@
+
+require 5;
+package Pod::Simple::PullParser;
+$VERSION = '2.02';
+use Pod::Simple ();
+BEGIN {@ISA = ('Pod::Simple')}
+
+use strict;
+use Carp ();
+
+use Pod::Simple::PullParserStartToken;
+use Pod::Simple::PullParserEndToken;
+use Pod::Simple::PullParserTextToken;
+
+BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
+
+__PACKAGE__->_accessorize(
+ 'source_fh', # the filehandle we're reading from
+ 'source_scalar_ref', # the scalarref we're reading from
+ 'source_arrayref', # the arrayref we're reading from
+);
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# And here is how we implement a pull-parser on top of a push-parser...
+
+sub filter {
+ my($self, $source) = @_;
+ $self = $self->new unless ref $self;
+
+ $source = *STDIN{IO} unless defined $source;
+ $self->set_source($source);
+ $self->output_fh(*STDOUT{IO});
+
+ $self->run; # define run() in a subclass if you want to use filter()!
+ return $self;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub parse_string_document {
+ my $this = shift;
+ $this->set_source(\ $_[0]);
+ $this->run;
+}
+
+sub parse_file {
+ my($this, $filename) = @_;
+ $this->set_source($filename);
+ $this->run;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# In case anyone tries to use them:
+
+sub run {
+ use Carp ();
+ if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
+ Carp::croak "You can call run() only on subclasses of "
+ . __PACKAGE__;
+ } else {
+ Carp::croak join '',
+ "You can't call run() because ",
+ ref($_[0]) || $_[0], " didn't define a run() method";
+ }
+}
+
+sub parse_lines {
+ use Carp ();
+ Carp::croak "Use set_source with ", __PACKAGE__,
+ " and subclasses, not parse_lines";
+}
+
+sub parse_line {
+ use Carp ();
+ Carp::croak "Use set_source with ", __PACKAGE__,
+ " and subclasses, not parse_line";
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ die "Couldn't construct for $class" unless $self;
+
+ $self->{'token_buffer'} ||= [];
+ $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
+ $self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken';
+ $self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken';
+
+ DEBUG > 1 and print "New pullparser object: $self\n";
+
+ return $self;
+}
+
+# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+
+sub get_token {
+ my $self = shift;
+ DEBUG > 1 and print "\nget_token starting up on $self.\n";
+ DEBUG > 2 and print " Items in token-buffer (",
+ scalar( @{ $self->{'token_buffer'} } ) ,
+ ") :\n", map(
+ " " . $_->dump . "\n", @{ $self->{'token_buffer'} }
+ ),
+ @{ $self->{'token_buffer'} } ? '' : ' (no tokens)',
+ "\n"
+ ;
+
+ until( @{ $self->{'token_buffer'} } ) {
+ DEBUG > 3 and print "I need to get something into my empty token buffer...\n";
+ if($self->{'source_dead'}) {
+ DEBUG and print "$self 's source is dead.\n";
+ push @{ $self->{'token_buffer'} }, undef;
+ } elsif(exists $self->{'source_fh'}) {
+ my @lines;
+ my $fh = $self->{'source_fh'}
+ || Carp::croak('You have to call set_source before you can call get_token');
+
+ DEBUG and print "$self 's source is filehandle $fh.\n";
+ # Read those many lines at a time
+ for(my $i = Pod::Simple::MANY_LINES; $i--;) {
+ DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n";
+ local $/ = $Pod::Simple::NL;
+ push @lines, scalar(<$fh>); # readline
+ DEBUG > 3 and print " Line is: ",
+ defined($lines[-1]) ? $lines[-1] : "<undef>\n";
+ unless( defined $lines[-1] ) {
+ DEBUG and print "That's it for that source fh! Killing.\n";
+ delete $self->{'source_fh'}; # so it can be GC'd
+ last;
+ }
+ # but pass thru the undef, which will set source_dead to true
+
+ # TODO: look to see if $lines[-1] is =encoding, and if so,
+ # do horribly magic things
+
+ }
+
+ if(DEBUG > 8) {
+ print "* I've gotten ", scalar(@lines), " lines:\n";
+ foreach my $l (@lines) {
+ if(defined $l) {
+ print " line {$l}\n";
+ } else {
+ print " line undef\n";
+ }
+ }
+ print "* end of ", scalar(@lines), " lines\n";
+ }
+
+ $self->SUPER::parse_lines(@lines);
+
+ } elsif(exists $self->{'source_arrayref'}) {
+ DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ",
+ scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
+
+ DEBUG > 3 and print " Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
+ $self->SUPER::parse_lines(
+ splice @{ $self->{'source_arrayref'} },
+ 0,
+ Pod::Simple::MANY_LINES
+ );
+ unless( @{ $self->{'source_arrayref'} } ) {
+ DEBUG and print "That's it for that source arrayref! Killing.\n";
+ $self->SUPER::parse_lines(undef);
+ delete $self->{'source_arrayref'}; # so it can be GC'd
+ }
+ # to make sure that an undef is always sent to signal end-of-stream
+
+ } elsif(exists $self->{'source_scalar_ref'}) {
+
+ DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
+ length(${ $self->{'source_scalar_ref'} }) -
+ (pos(${ $self->{'source_scalar_ref'} }) || 0),
+ " characters left to parse.\n";
+
+ DEBUG > 3 and print " Fetching a line from source-string...\n";
+ if( ${ $self->{'source_scalar_ref'} } =~
+ m/([^\n\r]*)((?:\r?\n)?)/g
+ ) {
+ #print(">> $1\n"),
+ $self->SUPER::parse_lines($1)
+ if length($1) or length($2)
+ or pos( ${ $self->{'source_scalar_ref'} })
+ != length( ${ $self->{'source_scalar_ref'} });
+ # I.e., unless it's a zero-length "empty line" at the very
+ # end of "foo\nbar\n" (i.e., between the \n and the EOS).
+ } else { # that's the end. Byebye
+ $self->SUPER::parse_lines(undef);
+ delete $self->{'source_scalar_ref'};
+ DEBUG and print "That's it for that source scalarref! Killing.\n";
+ }
+
+
+ } else {
+ die "What source??";
+ }
+ }
+ DEBUG and print "get_token about to return ",
+ Pod::Simple::pretty( @{$self->{'token_buffer'}}
+ ? $self->{'token_buffer'}[-1] : undef
+ ), "\n";
+ return shift @{$self->{'token_buffer'}}; # that's an undef if empty
+}
+
+use UNIVERSAL ();
+sub unget_token {
+ my $self = shift;
+ DEBUG and print "Ungetting ", scalar(@_), " tokens: ",
+ @_ ? "@_\n" : "().\n";
+ foreach my $t (@_) {
+ Carp::croak "Can't unget that, because it's not a token -- it's undef!"
+ unless defined $t;
+ Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
+ unless ref $t;
+ Carp::croak "Can't unget $t, because it's not a token object!"
+ unless UNIVERSAL::can($t, 'type');
+ }
+
+ unshift @{$self->{'token_buffer'}}, @_;
+ DEBUG > 1 and print "Token buffer now has ",
+ scalar(@{$self->{'token_buffer'}}), " items in it.\n";
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+# $self->{'source_filename'} = $source;
+
+sub set_source {
+ my $self = shift @_;
+ return $self->{'source_fh'} unless @_;
+ my $handle;
+ if(!defined $_[0]) {
+ Carp::croak("Can't use empty-string as a source for set_source");
+ } elsif(ref(\( $_[0] )) eq 'GLOB') {
+ $self->{'source_filename'} = '' . ($handle = $_[0]);
+ DEBUG and print "$self 's source is glob $_[0]\n";
+ # and fall thru
+ } elsif(ref( $_[0] ) eq 'SCALAR') {
+ $self->{'source_scalar_ref'} = $_[0];
+ DEBUG and print "$self 's source is scalar ref $_[0]\n";
+ return;
+ } elsif(ref( $_[0] ) eq 'ARRAY') {
+ $self->{'source_arrayref'} = $_[0];
+ DEBUG and print "$self 's source is array ref $_[0]\n";
+ return;
+ } elsif(ref $_[0]) {
+ $self->{'source_filename'} = '' . ($handle = $_[0]);
+ DEBUG and print "$self 's source is fh-obj $_[0]\n";
+ } elsif(!length $_[0]) {
+ Carp::croak("Can't use empty-string as a source for set_source");
+ } else { # It's a filename!
+ DEBUG and print "$self 's source is filename $_[0]\n";
+ {
+ local *PODSOURCE;
+ open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
+ $handle = *PODSOURCE{IO};
+ }
+ $self->{'source_filename'} = $_[0];
+ DEBUG and print " Its name is $_[0].\n";
+
+ # TODO: file-discipline things here!
+ }
+
+ $self->{'source_fh'} = $handle;
+ DEBUG and print " Its handle is $handle\n";
+ return 1;
+}
+
+# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+
+sub get_title_short { shift->get_short_title(@_) } # alias
+
+sub get_short_title {
+ my $title = shift->get_title(@_);
+ $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
+ # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
+ return $title;
+}
+
+sub get_title { shift->_get_titled_section(
+ 'NAME', max_token => 50, desperate => 1, @_)
+}
+sub get_version { shift->_get_titled_section(
+ 'VERSION',
+ max_token => 400,
+ accept_verbatim => 1,
+ max_content_length => 3_000,
+ @_,
+ );
+}
+sub get_description { shift->_get_titled_section(
+ 'DESCRIPTION',
+ max_token => 400,
+ max_content_length => 3_000,
+ @_,
+) }
+
+sub get_authors { shift->get_author(@_) } # a harmless alias
+
+sub get_author {
+ my $this = shift;
+ # Max_token is so high because these are
+ # typically at the end of the document:
+ $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
+ $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
+}
+
+#--------------------------------------------------------------------------
+
+sub _get_titled_section {
+ # Based on a get_title originally contributed by Graham Barr
+ my($self, $titlename, %options) = (@_);
+
+ my $max_token = delete $options{'max_token'};
+ my $desperate_for_title = delete $options{'desperate'};
+ my $accept_verbatim = delete $options{'accept_verbatim'};
+ my $max_content_length = delete $options{'max_content_length'};
+ $max_content_length = 120 unless defined $max_content_length;
+
+ Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
+ . join " ", map "[$_]", sort keys %options
+ )
+ if keys %options;
+
+ my %content_containers;
+ $content_containers{'Para'} = 1;
+ if($accept_verbatim) {
+ $content_containers{'Verbatim'} = 1;
+ $content_containers{'VerbatimFormatted'} = 1;
+ }
+
+ my $token_count = 0;
+ my $title;
+ my @to_unget;
+ my $state = 0;
+ my $depth = 0;
+
+ Carp::croak "What kind of titlename is \"$titlename\"?!" unless
+ defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
+ my $titlename_re = quotemeta($titlename);
+
+ my $head1_text_content;
+ my $para_text_content;
+
+ while(
+ ++$token_count <= ($max_token || 1_000_000)
+ and defined(my $token = $self->get_token)
+ ) {
+ push @to_unget, $token;
+
+ if ($state == 0) { # seeking =head1
+ if( $token->is_start and $token->tagname eq 'head1' ) {
+ DEBUG and print " Found head1. Seeking content...\n";
+ ++$state;
+ $head1_text_content = '';
+ }
+ }
+
+ elsif($state == 1) { # accumulating text until end of head1
+ if( $token->is_text ) {
+ DEBUG and print " Adding \"", $token->text, "\" to head1-content.\n";
+ $head1_text_content .= $token->text;
+ } elsif( $token->is_end and $token->tagname eq 'head1' ) {
+ DEBUG and print " Found end of head1. Considering content...\n";
+ if($head1_text_content eq $titlename
+ or $head1_text_content =~ m/\($titlename_re\)/s
+ # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
+ ) {
+ DEBUG and print " Yup, it was $titlename. Seeking next para-content...\n";
+ ++$state;
+ } elsif(
+ $desperate_for_title
+ # if we're so desperate we'll take the first
+ # =head1's content as a title
+ and $head1_text_content =~ m/\S/
+ and $head1_text_content !~ m/^[ A-Z]+$/s
+ and $head1_text_content !~
+ m/\((?:
+ NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
+ | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
+ | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
+ )\)/sx
+ # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
+ and ($max_content_length
+ ? (length($head1_text_content) <= $max_content_length) # sanity
+ : 1)
+ ) {
+ DEBUG and print " It looks titular: \"$head1_text_content\".\n",
+ "\n Using that.\n";
+ $title = $head1_text_content;
+ last;
+ } else {
+ --$state;
+ DEBUG and print " Didn't look titular ($head1_text_content).\n",
+ "\n Dropping back to seeking-head1-content mode...\n";
+ }
+ }
+ }
+
+ elsif($state == 2) {
+ # seeking start of para (which must immediately follow)
+ if($token->is_start and $content_containers{ $token->tagname }) {
+ DEBUG and print " Found start of Para. Accumulating content...\n";
+ $para_text_content = '';
+ ++$state;
+ } else {
+ DEBUG and print
+ " Didn't see an immediately subsequent start-Para. Reseeking H1\n";
+ $state = 0;
+ }
+ }
+
+ elsif($state == 3) {
+ # accumulating text until end of Para
+ if( $token->is_text ) {
+ DEBUG and print " Adding \"", $token->text, "\" to para-content.\n";
+ $para_text_content .= $token->text;
+ # and keep looking
+
+ } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
+ DEBUG and print " Found end of Para. Considering content: ",
+ $para_text_content, "\n";
+
+ if( $para_text_content =~ m/\S/
+ and ($max_content_length
+ ? (length($para_text_content) <= $max_content_length)
+ : 1)
+ ) {
+ # Some minimal sanity constraints, I think.
+ DEBUG and print " It looks contentworthy, I guess. Using it.\n";
+ $title = $para_text_content;
+ last;
+ } else {
+ DEBUG and print " Doesn't look at all contentworthy!\n Giving up.\n";
+ undef $title;
+ last;
+ }
+ }
+ }
+
+ else {
+ die "IMPOSSIBLE STATE $state!\n"; # should never happen
+ }
+
+ }
+
+ # Put it all back!
+ $self->unget_token(@to_unget);
+
+ if(DEBUG) {
+ if(defined $title) { print " Returing title <$title>\n" }
+ else { print "Returning title <>\n" }
+ }
+
+ return '' unless defined $title;
+ $title =~ s/^\s+//;
+ return $title;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# Methods that actually do work at parse-time:
+
+sub _handle_element_start {
+ my $self = shift; # leaving ($element_name, $attr_hash_r)
+ DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
+
+ push @{ $self->{'token_buffer'} },
+ $self->{'start_token_class'}->new(@_);
+ return;
+}
+
+sub _handle_text {
+ my $self = shift; # leaving ($text)
+ DEBUG > 2 and print "== $_[0]\n";
+ push @{ $self->{'token_buffer'} },
+ $self->{'text_token_class'}->new(@_);
+ return;
+}
+
+sub _handle_element_end {
+ my $self = shift; # leaving ($element_name);
+ DEBUG > 2 and print "-- $_[0]\n";
+ push @{ $self->{'token_buffer'} },
+ $self->{'end_token_class'}->new(@_);
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
+
+=head1 SYNOPSIS
+
+ my $parser = SomePodProcessor->new;
+ $parser->set_source( "whatever.pod" );
+ $parser->run;
+
+Or:
+
+ my $parser = SomePodProcessor->new;
+ $parser->set_source( $some_filehandle_object );
+ $parser->run;
+
+Or:
+
+ my $parser = SomePodProcessor->new;
+ $parser->set_source( \$document_source );
+ $parser->run;
+
+Or:
+
+ my $parser = SomePodProcessor->new;
+ $parser->set_source( \@document_lines );
+ $parser->run;
+
+And elsewhere:
+
+ require 5;
+ package SomePodProcessor;
+ use strict;
+ use base qw(Pod::Simple::PullParser);
+
+ sub run {
+ my $self = shift;
+ Token:
+ while(my $token = $self->get_token) {
+ ...process each token...
+ }
+ }
+
+=head1 DESCRIPTION
+
+This class is for using Pod::Simple to build a Pod processor -- but
+one that uses an interface based on a stream of token objects,
+instead of based on events.
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+A subclass of Pod::Simple::PullParser should define a C<run> method
+that calls C<< $token = $parser->get_token >> to pull tokens.
+
+See the source for Pod::Simple::RTF for an example of a formatter
+that uses Pod::Simple::PullParser.
+
+=head1 METHODS
+
+=over
+
+=item my $token = $parser->get_token
+
+This returns the next token object (which will be of a subclass of
+L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
+the end of the document.
+
+=item $parser->unget_token( $token )
+
+=item $parser->unget_token( $token1, $token2, ... )
+
+This restores the token object(s) to the front of the parser stream.
+
+=back
+
+The source has to be set before you can parse anything. The lowest-level
+way is to call C<set_source>:
+
+=over
+
+=item $parser->set_source( $filename )
+
+=item $parser->set_source( $filehandle_object )
+
+=item $parser->set_source( \$document_source )
+
+=item $parser->set_source( \@document_lines )
+
+=back
+
+Or you can call these methods, which Pod::Simple::PullParser has defined
+to work just like Pod::Simple's same-named methods:
+
+=over
+
+=item $parser->parse_file(...)
+
+=item $parser->parse_string_document(...)
+
+=item $parser->filter(...)
+
+=item $parser->parse_from_file(...)
+
+=back
+
+For those to work, the Pod-processing subclass of
+Pod::Simple::PullParser has to have defined a $parser->run method --
+so it is advised that all Pod::Simple::PullParser subclasses do so.
+See the Synopsis above, or the source for Pod::Simple::RTF.
+
+Authors of formatter subclasses might find these methods useful to
+call on a parser object that you haven't started pulling tokens
+from yet:
+
+=over
+
+=item my $title_string = $parser->get_title
+
+This tries to get the title string out of $parser, by getting some tokens,
+and scanning them for the title, and then ungetting them so that you can
+process the token-stream from the beginning.
+
+For example, suppose you have a document that starts out:
+
+ =head1 NAME
+
+ Hoo::Boy::Wowza -- Stuff B<wow> yeah!
+
+$parser->get_title on that document will return "Hoo::Boy::Wowza --
+Stuff wow yeah!".
+
+In cases where get_title can't find the title, it will return empty-string
+("").
+
+=item my $title_string = $parser->get_short_title
+
+This is just like get_title, except that it returns just the modulename, if
+the title seems to be of the form "SomeModuleName -- description".
+
+For example, suppose you have a document that starts out:
+
+ =head1 NAME
+
+ Hoo::Boy::Wowza -- Stuff B<wow> yeah!
+
+then $parser->get_short_title on that document will return
+"Hoo::Boy::Wowza".
+
+But if the document starts out:
+
+ =head1 NAME
+
+ Hooboy, stuff B<wow> yeah!
+
+then $parser->get_short_title on that document will return "Hooboy,
+stuff wow yeah!".
+
+If the title can't be found, then get_short_title returns empty-string
+("").
+
+=item $author_name = $parser->get_author
+
+This works like get_title except that it returns the contents of the
+"=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
+isn't terribly long.
+
+(This method tolerates "AUTHORS" instead of "AUTHOR" too.)
+
+=item $description_name = $parser->get_description
+
+This works like get_title except that it returns the contents of the
+"=head1 PARAGRAPH\n\nParagraph...\n" section, assuming that that section
+isn't terribly long.
+
+=item $version_block = $parser->get_version
+
+This works like get_title except that it returns the contents of
+the "=head1 VERSION\n\n[BIG BLOCK]\n" block. Note that this does NOT
+return the module's C<$VERSION>!!
+
+
+=back
+
+=head1 NOTE
+
+You don't actually I<have> to define a C<run> method. If you're
+writing a Pod-formatter class, you should define a C<run> just so
+that users can call C<parse_file> etc, but you don't I<have> to.
+
+And if you're not writing a formatter class, but are instead just
+writing a program that does something simple with a Pod::PullParser
+object (and not an object of a subclass), then there's no reason to
+bother subclassing to add a C<run> method.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>
+
+L<Pod::Simple::PullParserToken> -- and its subclasses
+L<Pod::Simple::PullParserStartToken>,
+L<Pod::Simple::PullParserTextToken>, and
+L<Pod::Simple::PullParserEndToken>.
+
+L<HTML::TokeParser>, which inspired this.
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
+
+
+JUNK:
+
+sub _old_get_title { # some witchery in here
+ my $self = $_[0];
+ my $title;
+ my @to_unget;
+
+ while(1) {
+ push @to_unget, $self->get_token;
+ unless(defined $to_unget[-1]) { # whoops, short doc!
+ pop @to_unget;
+ last;
+ }
+
+ DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n";
+
+ (DEBUG and print "Too much in the buffer.\n"),
+ last if @to_unget > 25; # sanity
+
+ my $pattern = '';
+ if( #$to_unget[-1]->type eq 'end'
+ #and $to_unget[-1]->tagname eq 'Para'
+ #and
+ ($pattern = join('',
+ map {;
+ ($_->type eq 'start') ? ("<" . $_->tagname .">")
+ : ($_->type eq 'end' ) ? ("</". $_->tagname .">")
+ : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
+ : "BLORP"
+ } @to_unget
+ )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
+ ) {
+ # Whee, it fits the pattern
+ DEBUG and print "Seems to match =head1 NAME pattern.\n";
+ $title = '';
+ foreach my $t (reverse @to_unget) {
+ last if $t->type eq 'start' and $t->tagname eq 'Para';
+ $title = $t->text . $title if $t->type eq 'text';
+ }
+ undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
+ last;
+
+ } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
+ and !( $1 eq '1' and $2 eq 'NAME' )
+ ) {
+ # Well, it fits a fallback pattern
+ DEBUG and print "Seems to match NAMEless pattern.\n";
+ $title = '';
+ foreach my $t (reverse @to_unget) {
+ last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
+ $title = $t->text . $title if $t->type eq 'text';
+ }
+ undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
+ last;
+
+ } else {
+ DEBUG and $pattern and print "Leading pattern: $pattern\n";
+ }
+ }
+
+ # Put it all back:
+ $self->unget_token(@to_unget);
+
+ if(DEBUG) {
+ if(defined $title) { print " Returing title <$title>\n" }
+ else { print "Returning title <>\n" }
+ }
+
+ return '' unless defined $title;
+ return $title;
+}
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm b/ext/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm
new file mode 100644
index 0000000000..7b219f8660
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm
@@ -0,0 +1,93 @@
+
+require 5;
+package Pod::Simple::PullParserEndToken;
+use Pod::Simple::PullParserToken ();
+@ISA = ('Pod::Simple::PullParserToken');
+use strict;
+
+sub new { # Class->new(tagname);
+ my $class = shift;
+ return bless ['end', @_], ref($class) || $class;
+}
+
+# Purely accessors:
+
+sub tagname { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] }
+sub tag { shift->tagname(@_) }
+
+# shortcut:
+sub is_tagname { $_[0][1] eq $_[1] }
+sub is_tag { shift->is_tagname(@_) }
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::PullParserEndToken -- end-tokens from Pod::Simple::PullParser
+
+=head1 SYNOPSIS
+
+(See L<Pod::Simple::PullParser>)
+
+=head1 DESCRIPTION
+
+When you do $parser->get_token on a L<Pod::Simple::PullParser>, you might
+get an object of this class.
+
+This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods,
+and adds these methods:
+
+=over
+
+=item $token->tagname
+
+This returns the tagname for this end-token object.
+For example, parsing a "=head1 ..." line will give you
+a start-token with the tagname of "head1", token(s) for its
+content, and then an end-token with the tagname of "head1".
+
+=item $token->tagname(I<somestring>)
+
+This changes the tagname for this end-token object.
+You probably won't need to do this.
+
+=item $token->tag(...)
+
+A shortcut for $token->tagname(...)
+
+=item $token->is_tag(I<somestring>) or $token->is_tagname(I<somestring>)
+
+These are shortcuts for C<< $token->tag() eq I<somestring> >>
+
+=back
+
+You're unlikely to ever need to construct an object of this class for
+yourself, but if you want to, call
+C<<
+Pod::Simple::PullParserEndToken->new( I<tagname> )
+>>
+
+=head1 SEE ALSO
+
+L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm b/ext/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm
new file mode 100644
index 0000000000..9ead50d96e
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm
@@ -0,0 +1,130 @@
+
+require 5;
+package Pod::Simple::PullParserStartToken;
+use Pod::Simple::PullParserToken ();
+@ISA = ('Pod::Simple::PullParserToken');
+use strict;
+
+sub new { # Class->new(tagname, optional_attrhash);
+ my $class = shift;
+ return bless ['start', @_], ref($class) || $class;
+}
+
+# Purely accessors:
+
+sub tagname { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] }
+sub tag { shift->tagname(@_) }
+
+sub is_tagname { $_[0][1] eq $_[1] }
+sub is_tag { shift->is_tagname(@_) }
+
+
+sub attr_hash { $_[0][2] ||= {} }
+
+sub attr {
+ if(@_ == 2) { # Reading: $token->attr('attrname')
+ ${$_[0][2] || return undef}{ $_[1] };
+ } elsif(@_ > 2) { # Writing: $token->attr('attrname', 'newval')
+ ${$_[0][2] ||= {}}{ $_[1] } = $_[2];
+ } else {
+ require Carp;
+ Carp::croak(
+ 'usage: $object->attr("val") or $object->attr("key", "newval")');
+ return undef;
+ }
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::PullParserStartToken -- start-tokens from Pod::Simple::PullParser
+
+=head1 SYNOPSIS
+
+(See L<Pod::Simple::PullParser>)
+
+=head1 DESCRIPTION
+
+When you do $parser->get_token on a L<Pod::Simple::PullParser> object, you might
+get an object of this class.
+
+This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods,
+and adds these methods:
+
+=over
+
+=item $token->tagname
+
+This returns the tagname for this start-token object.
+For example, parsing a "=head1 ..." line will give you
+a start-token with the tagname of "head1", token(s) for its
+content, and then an end-token with the tagname of "head1".
+
+=item $token->tagname(I<somestring>)
+
+This changes the tagname for this start-token object.
+You probably won't need
+to do this.
+
+=item $token->tag(...)
+
+A shortcut for $token->tagname(...)
+
+=item $token->is_tag(I<somestring>) or $token->is_tagname(I<somestring>)
+
+These are shortcuts for C<< $token->tag() eq I<somestring> >>
+
+=item $token->attr(I<attrname>)
+
+This returns the value of the I<attrname> attribute for this start-token
+object, or undef.
+
+For example, parsing a LZ<><Foo/"Bar"> link will produce a start-token
+with a "to" attribute with the value "Foo", a "type" attribute with the
+value "pod", and a "section" attribute with the value "Bar".
+
+=item $token->attr(I<attrname>, I<newvalue>)
+
+This sets the I<attrname> attribute for this start-token object to
+I<newvalue>. You probably won't need to do this.
+
+=item $token->attr_hash
+
+This returns the hashref that is the attribute set for this start-token.
+This is useful if (for example) you want to ask what all the attributes
+are -- you can just do C<< keys %{$token->attr_hash} >>
+
+=back
+
+
+You're unlikely to ever need to construct an object of this class for
+yourself, but if you want to, call
+C<<
+Pod::Simple::PullParserStartToken->new( I<tagname>, I<attrhash> )
+>>
+
+=head1 SEE ALSO
+
+L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm b/ext/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm
new file mode 100644
index 0000000000..2d1a1d7dc4
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm
@@ -0,0 +1,101 @@
+
+require 5;
+package Pod::Simple::PullParserTextToken;
+use Pod::Simple::PullParserToken ();
+@ISA = ('Pod::Simple::PullParserToken');
+use strict;
+
+sub new { # Class->new(text);
+ my $class = shift;
+ return bless ['text', @_], ref($class) || $class;
+}
+
+# Purely accessors:
+
+sub text { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] }
+
+sub text_r { \ $_[0][1] }
+
+1;
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::PullParserTextToken -- text-tokens from Pod::Simple::PullParser
+
+=head1 SYNOPSIS
+
+(See L<Pod::Simple::PullParser>)
+
+=head1 DESCRIPTION
+
+When you do $parser->get_token on a L<Pod::Simple::PullParser>, you might
+get an object of this class.
+
+This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods,
+and adds these methods:
+
+=over
+
+=item $token->text
+
+This returns the text that this token holds. For example, parsing
+CZ<><foo> will return a C start-token, a text-token, and a C end-token. And
+if you want to get the "foo" out of the text-token, call C<< $token->text >>
+
+=item $token->text(I<somestring>)
+
+This changes the string that this token holds. You probably won't need
+to do this.
+
+=item $token->text_r()
+
+This returns a scalar reference to the string that this token holds.
+This can be useful if you don't want to memory-copy the potentially
+large text value (well, as large as a paragraph or a verbatim block)
+as calling $token->text would do.
+
+Or, if you want to alter the value, you can even do things like this:
+
+ for ( ${ $token->text_r } ) { # Aliases it with $_ !!
+
+ s/ The / the /g; # just for example
+
+ if( 'A' eq chr(65) ) { # (if in an ASCII world)
+ tr/\xA0/ /;
+ tr/\xAD//d;
+ }
+
+ ...or however you want to alter the value...
+ }
+
+=back
+
+You're unlikely to ever need to construct an object of this class for
+yourself, but if you want to, call
+C<<
+Pod::Simple::PullParserTextToken->new( I<text> )
+>>
+
+=head1 SEE ALSO
+
+L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/PullParserToken.pm b/ext/Pod-Simple/lib/Pod/Simple/PullParserToken.pm
new file mode 100644
index 0000000000..9ec3659f4e
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/PullParserToken.pm
@@ -0,0 +1,138 @@
+
+require 5;
+package Pod::Simple::PullParserToken;
+ # Base class for tokens gotten from Pod::Simple::PullParser's $parser->get_token
+@ISA = ();
+$VERSION = '2.02';
+use strict;
+
+sub new { # Class->new('type', stuff...); ## Overridden in derived classes anyway
+ my $class = shift;
+ return bless [@_], ref($class) || $class;
+}
+
+sub type { $_[0][0] } # Can't change the type of an object
+sub dump { Pod::Simple::pretty( [ @{ $_[0] } ] ) }
+
+sub is_start { $_[0][0] eq 'start' }
+sub is_end { $_[0][0] eq 'end' }
+sub is_text { $_[0][0] eq 'text' }
+
+1;
+__END__
+
+sub dump { '[' . _esc( @{ $_[0] } ) . ']' }
+
+# JUNK:
+
+sub _esc {
+ return '' unless @_;
+ my @out;
+ foreach my $in (@_) {
+ push @out, '"' . $in . '"';
+ $out[-1] =~ s/([^- \:\:\.\,\'\>\<\"\/\=\?\+\|\[\]\{\}\_a-zA-Z0-9_\`\~\!\#\%\^\&\*\(\)])/
+ sprintf( (ord($1) < 256) ? "\\x%02X" : "\\x{%X}", ord($1))
+ /eg;
+ }
+ return join ', ', @out;
+}
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::PullParserToken -- tokens from Pod::Simple::PullParser
+
+=head1 SYNOPSIS
+
+Given a $parser that's an object of class Pod::Simple::PullParser
+(or a subclass)...
+
+ while(my $token = $parser->get_token) {
+ $DEBUG and print "Token: ", $token->dump, "\n";
+ if($token->is_start) {
+ ...access $token->tagname, $token->attr, etc...
+
+ } elsif($token->is_text) {
+ ...access $token->text, $token->text_r, etc...
+
+ } elsif($token->is_end) {
+ ...access $token->tagname...
+
+ }
+ }
+
+(Also see L<Pod::Simple::PullParser>)
+
+=head1 DESCRIPTION
+
+When you do $parser->get_token on a L<Pod::Simple::PullParser>, you should
+get an object of a subclass of Pod::Simple::PullParserToken.
+
+Subclasses will add methods, and will also inherit these methods:
+
+=over
+
+=item $token->type
+
+This returns the type of the token. This will be either the string
+"start", the string "text", or the string "end".
+
+Once you know what the type of an object is, you then know what
+subclass it belongs to, and therefore what methods it supports.
+
+Yes, you could probably do the same thing with code like
+$token->isa('Pod::Simple::PullParserEndToken'), but that's not so
+pretty as using just $token->type, or even the following shortcuts:
+
+=item $token->is_start
+
+This is a shortcut for C<< $token->type() eq "start" >>
+
+=item $token->is_text
+
+This is a shortcut for C<< $token->type() eq "text" >>
+
+=item $token->is_end
+
+This is a shortcut for C<< $token->type() eq "end" >>
+
+=item $token->dump
+
+This returns a handy stringified value of this object. This
+is useful for debugging, as in:
+
+ while(my $token = $parser->get_token) {
+ $DEBUG and print "Token: ", $token->dump, "\n";
+ ...
+ }
+
+=back
+
+=head1 SEE ALSO
+
+My subclasses:
+L<Pod::Simple::PullParserStartToken>,
+L<Pod::Simple::PullParserTextToken>, and
+L<Pod::Simple::PullParserEndToken>.
+
+L<Pod::Simple::PullParser> and L<Pod::Simple>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/RTF.pm b/ext/Pod-Simple/lib/Pod/Simple/RTF.pm
new file mode 100644
index 0000000000..de2a7b32d6
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/RTF.pm
@@ -0,0 +1,674 @@
+
+require 5;
+package Pod::Simple::RTF;
+
+#sub DEBUG () {4};
+#sub Pod::Simple::DEBUG () {4};
+#sub Pod::Simple::PullParser::DEBUG () {4};
+
+use strict;
+use vars qw($VERSION @ISA %Escape $WRAP %Tagmap);
+$VERSION = '2.02';
+use Pod::Simple::PullParser ();
+BEGIN {@ISA = ('Pod::Simple::PullParser')}
+
+use Carp ();
+BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
+
+$WRAP = 1 unless defined $WRAP;
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub _openclose {
+ return map {;
+ m/^([-A-Za-z]+)=(\w[^\=]*)$/s or die "what's <$_>?";
+ ( $1, "{\\$2\n", "/$1", "}" );
+ } @_;
+}
+
+my @_to_accept;
+
+%Tagmap = (
+ # 'foo=bar' means ('foo' => '{\bar'."\n", '/foo' => '}')
+ _openclose(
+ 'B=cs18\b',
+ 'I=cs16\i',
+ 'C=cs19\f1\lang1024\noproof',
+ 'F=cs17\i\lang1024\noproof',
+
+ 'VerbatimI=cs26\i',
+ 'VerbatimB=cs27\b',
+ 'VerbatimBI=cs28\b\i',
+
+ map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
+ qw[
+ underline=ul smallcaps=scaps shadow=shad
+ superscript=super subscript=sub strikethrough=strike
+ outline=outl emboss=embo engrave=impr
+ dotted-underline=uld dash-underline=uldash
+ dot-dash-underline=uldashd dot-dot-dash-underline=uldashdd
+ double-underline=uldb thick-underline=ulth
+ word-underline=ulw wave-underline=ulwave
+ ]
+ # But no double-strikethrough, because MSWord can't agree with the
+ # RTF spec on whether it's supposed to be \strikedl or \striked1 (!!!)
+ ),
+
+ # Bit of a hack here:
+ 'L=pod' => '{\cs22\i'."\n",
+ 'L=url' => '{\cs23\i'."\n",
+ 'L=man' => '{\cs24\i'."\n",
+ '/L' => '}',
+
+ 'Data' => "\n",
+ '/Data' => "\n",
+
+ 'Verbatim' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n",
+ '/Verbatim' => "\n\\par}\n",
+ 'VerbatimFormatted' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n",
+ '/VerbatimFormatted' => "\n\\par}\n",
+ 'Para' => "\n{\\pard\\li#rtfindent#\\sa180\n",
+ '/Para' => "\n\\par}\n",
+ 'head1' => "\n{\\pard\\li#rtfindent#\\s31\\keepn\\sb90\\sa180\\f2\\fs#head1_halfpoint_size#\\ul{\n",
+ '/head1' => "\n}\\par}\n",
+ 'head2' => "\n{\\pard\\li#rtfindent#\\s32\\keepn\\sb90\\sa180\\f2\\fs#head2_halfpoint_size#\\ul{\n",
+ '/head2' => "\n}\\par}\n",
+ 'head3' => "\n{\\pard\\li#rtfindent#\\s33\\keepn\\sb90\\sa180\\f2\\fs#head3_halfpoint_size#\\ul{\n",
+ '/head3' => "\n}\\par}\n",
+ 'head4' => "\n{\\pard\\li#rtfindent#\\s34\\keepn\\sb90\\sa180\\f2\\fs#head4_halfpoint_size#\\ul{\n",
+ '/head4' => "\n}\\par}\n",
+ # wordpad borks on \tc\tcl1, or I'd put that in =head1 and =head2
+
+ 'item-bullet' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
+ '/item-bullet' => "\n\\par}\n",
+ 'item-number' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
+ '/item-number' => "\n\\par}\n",
+ 'item-text' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
+ '/item-text' => "\n\\par}\n",
+
+ # we don't need any styles for over-* and /over-*
+);
+
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub new {
+ my $new = shift->SUPER::new(@_);
+ $new->nix_X_codes(1);
+ $new->nbsp_for_S(1);
+ $new->accept_targets( 'rtf', 'RTF' );
+
+ $new->{'Tagmap'} = {%Tagmap};
+
+ $new->accept_codes(@_to_accept);
+ $new->accept_codes('VerbatimFormatted');
+ DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n";
+ $new->doc_lang(
+ ( $ENV{'RTFDEFLANG'} || '') =~ m/^(\d{1,10})$/s ? $1
+ : ($ENV{'RTFDEFLANG'} || '') =~ m/^0?x([a-fA-F0-9]{1,10})$/s ? hex($1)
+ # yes, tolerate hex!
+ : ($ENV{'RTFDEFLANG'} || '') =~ m/^([a-fA-F0-9]{4})$/s ? hex($1)
+ # yes, tolerate even more hex!
+ : '1033'
+ );
+
+ $new->head1_halfpoint_size(32);
+ $new->head2_halfpoint_size(28);
+ $new->head3_halfpoint_size(25);
+ $new->head4_halfpoint_size(22);
+ $new->codeblock_halfpoint_size(18);
+ $new->header_halfpoint_size(17);
+ $new->normal_halfpoint_size(25);
+
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+__PACKAGE__->_accessorize(
+ 'doc_lang',
+ 'head1_halfpoint_size',
+ 'head2_halfpoint_size',
+ 'head3_halfpoint_size',
+ 'head4_halfpoint_size',
+ 'codeblock_halfpoint_size',
+ 'header_halfpoint_size',
+ 'normal_halfpoint_size',
+ 'no_proofing_exemptions',
+);
+
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub run {
+ my $self = $_[0];
+ return $self->do_middle if $self->bare_output;
+ return
+ $self->do_beginning && $self->do_middle && $self->do_end;
+}
+
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub do_middle { # the main work
+ my $self = $_[0];
+ my $fh = $self->{'output_fh'};
+
+ my($token, $type, $tagname, $scratch);
+ my @stack;
+ my @indent_stack;
+ $self->{'rtfindent'} = 0 unless defined $self->{'rtfindent'};
+
+ while($token = $self->get_token) {
+
+ if( ($type = $token->type) eq 'text' ) {
+ if( $self->{'rtfverbatim'} ) {
+ DEBUG > 1 and print " $type " , $token->text, " in verbatim!\n";
+ rtf_esc_codely($scratch = $token->text);
+ print $fh $scratch;
+ next;
+ }
+
+ DEBUG > 1 and print " $type " , $token->text, "\n";
+
+ $scratch = $token->text;
+ $scratch =~ tr/\t\cb\cc/ /d;
+
+ $self->{'no_proofing_exemptions'} or $scratch =~
+ s/(?:
+ ^
+ |
+ (?<=[\cm\cj\t "\[\<\(])
+ ) # start on whitespace, sequence-start, or quote
+ ( # something looking like a Perl token:
+ (?:
+ [\$\@\:\<\*\\_]\S+ # either starting with a sigil, etc.
+ )
+ |
+ # or starting alpha, but containing anything strange:
+ (?:
+ [a-zA-Z'\x80-\xFF]+[\$\@\:_<>\(\\\*]\S+
+ )
+ )
+ /\cb$1\cc/xsg
+ ;
+
+ rtf_esc($scratch);
+ $scratch =~
+ s/(
+ [^\cm\cj\n]{65} # Snare 65 characters from a line
+ [^\cm\cj\n\x20]{0,50} # and finish any current word
+ )
+ (\x20{1,10})(?![\cm\cj\n]) # capture some spaces not at line-end
+ /$1$2\n/gx # and put a NL before those spaces
+ if $WRAP;
+ # This may wrap at well past the 65th column, but not past the 120th.
+
+ print $fh $scratch;
+
+ } elsif( $type eq 'start' ) {
+ DEBUG > 1 and print " +$type ",$token->tagname,
+ " (", map("<$_> ", %{$token->attr_hash}), ")\n";
+
+ if( ($tagname = $token->tagname) eq 'Verbatim'
+ or $tagname eq 'VerbatimFormatted'
+ ) {
+ ++$self->{'rtfverbatim'};
+ my $next = $self->get_token;
+ next unless defined $next;
+ my $line_count = 1;
+ if($next->type eq 'text') {
+ my $t = $next->text_r;
+ while( $$t =~ m/$/mg ) {
+ last if ++$line_count > 15; # no point in counting further
+ }
+ DEBUG > 3 and print " verbatim line count: $line_count\n";
+ }
+ $self->unget_token($next);
+ $self->{'rtfkeep'} = ($line_count > 15) ? '' : '\keepn' ;
+
+ } elsif( $tagname =~ m/^item-/s ) {
+ my @to_unget;
+ my $text_count_here = 0;
+ $self->{'rtfitemkeepn'} = '';
+ # Some heuristics to stop item-*'s functioning as subheadings
+ # from getting split from the things they're subheadings for.
+ #
+ # It's not terribly pretty, but it really does make things pretty.
+ #
+ while(1) {
+ push @to_unget, $self->get_token;
+ pop(@to_unget), last unless defined $to_unget[-1];
+ # Erroneously used to be "unshift" instead of pop! Adds instead
+ # of removes, and operates on the beginning instead of the end!
+
+ if($to_unget[-1]->type eq 'text') {
+ if( ($text_count_here += length ${$to_unget[-1]->text_r}) > 150 ){
+ DEBUG > 1 and print " item-* is too long to be keepn'd.\n";
+ last;
+ }
+ } elsif (@to_unget > 1 and
+ $to_unget[-2]->type eq 'end' and
+ $to_unget[-2]->tagname =~ m/^item-/s
+ ) {
+ # Bail out here, after setting rtfitemkeepn yea or nay.
+ $self->{'rtfitemkeepn'} = '\keepn' if
+ $to_unget[-1]->type eq 'start' and
+ $to_unget[-1]->tagname eq 'Para';
+
+ DEBUG > 1 and printf " item-* before %s(%s) %s keepn'd.\n",
+ $to_unget[-1]->type,
+ $to_unget[-1]->can('tagname') ? $to_unget[-1]->tagname : '',
+ $self->{'rtfitemkeepn'} ? "gets" : "doesn't get";
+ last;
+ } elsif (@to_unget > 40) {
+ DEBUG > 1 and print " item-* now has too many tokens (",
+ scalar(@to_unget),
+ (DEBUG > 4) ? (q<: >, map($_->dump, @to_unget)) : (),
+ ") to be keepn'd.\n";
+ last; # give up
+ }
+ # else keep while'ing along
+ }
+ # Now put it aaaaall back...
+ $self->unget_token(@to_unget);
+
+ } elsif( $tagname =~ m/^over-/s ) {
+ push @stack, $1;
+ push @indent_stack,
+ int($token->attr('indent') * 4 * $self->normal_halfpoint_size);
+ DEBUG and print "Indenting over $indent_stack[-1] twips.\n";
+ $self->{'rtfindent'} += $indent_stack[-1];
+
+ } elsif ($tagname eq 'L') {
+ $tagname .= '=' . ($token->attr('type') || 'pod');
+
+ } elsif ($tagname eq 'Data') {
+ my $next = $self->get_token;
+ next unless defined $next;
+ unless( $next->type eq 'text' ) {
+ $self->unget_token($next);
+ next;
+ }
+ DEBUG and print " raw text ", $next->text, "\n";
+ printf $fh "\n" . $next->text . "\n";
+ next;
+ }
+
+ defined($scratch = $self->{'Tagmap'}{$tagname}) or next;
+ $scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate
+ print $fh $scratch;
+
+ if ($tagname eq 'item-number') {
+ print $fh $token->attr('number'), ". \n";
+ } elsif ($tagname eq 'item-bullet') {
+ print $fh "\\'95 \n";
+ #for funky testing: print $fh '', rtf_esc("\x{4E4B}\x{9053}");
+ }
+
+ } elsif( $type eq 'end' ) {
+ DEBUG > 1 and print " -$type ",$token->tagname,"\n";
+ if( ($tagname = $token->tagname) =~ m/^over-/s ) {
+ DEBUG and print "Indenting back $indent_stack[-1] twips.\n";
+ $self->{'rtfindent'} -= pop @indent_stack;
+ pop @stack;
+ } elsif( $tagname eq 'Verbatim' or $tagname eq 'VerbatimFormatted') {
+ --$self->{'rtfverbatim'};
+ }
+ defined($scratch = $self->{'Tagmap'}{"/$tagname"}) or next;
+ $scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate
+ print $fh $scratch;
+ }
+ }
+ return 1;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub do_beginning {
+ my $self = $_[0];
+ my $fh = $self->{'output_fh'};
+ return print $fh join '',
+ $self->doc_init,
+ $self->font_table,
+ $self->stylesheet,
+ $self->color_table,
+ $self->doc_info,
+ $self->doc_start,
+ "\n"
+ ;
+}
+
+sub do_end {
+ my $self = $_[0];
+ my $fh = $self->{'output_fh'};
+ return print $fh '}'; # that should do it
+}
+
+###########################################################################
+
+sub stylesheet {
+ return sprintf <<'END',
+{\stylesheet
+{\snext0 Normal;}
+{\*\cs10 \additive Default Paragraph Font;}
+{\*\cs16 \additive \i \sbasedon10 pod-I;}
+{\*\cs17 \additive \i\lang1024\noproof \sbasedon10 pod-F;}
+{\*\cs18 \additive \b \sbasedon10 pod-B;}
+{\*\cs19 \additive \f1\lang1024\noproof\sbasedon10 pod-C;}
+{\s20\ql \li0\ri0\sa180\widctlpar\f1\fs%s\lang1024\noproof\sbasedon0 \snext0 pod-codeblock;}
+{\*\cs21 \additive \lang1024\noproof \sbasedon10 pod-computerese;}
+{\*\cs22 \additive \i\lang1024\noproof\sbasedon10 pod-L-pod;}
+{\*\cs23 \additive \i\lang1024\noproof\sbasedon10 pod-L-url;}
+{\*\cs24 \additive \i\lang1024\noproof\sbasedon10 pod-L-man;}
+
+{\*\cs25 \additive \f1\lang1024\noproof\sbasedon0 pod-codelbock-plain;}
+{\*\cs26 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-ital;}
+{\*\cs27 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold;}
+{\*\cs28 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold-ital;}
+
+{\s31\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head1;}
+{\s32\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head2;}
+{\s33\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head3;}
+{\s34\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head4;}
+}
+
+END
+
+ $_[0]->codeblock_halfpoint_size(),
+ $_[0]->head1_halfpoint_size(),
+ $_[0]->head2_halfpoint_size(),
+ $_[0]->head3_halfpoint_size(),
+ $_[0]->head4_halfpoint_size(),
+ ;
+}
+
+###########################################################################
+# Override these as necessary for further customization
+
+sub font_table {
+ return <<'END'; # text font, code font, heading font
+{\fonttbl
+{\f0\froman Times New Roman;}
+{\f1\fmodern Courier New;}
+{\f2\fswiss Arial;}
+}
+
+END
+}
+
+sub doc_init {
+ return <<'END';
+{\rtf1\ansi\deff0
+
+END
+}
+
+sub color_table {
+ return <<'END';
+{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
+END
+}
+
+
+sub doc_info {
+ my $self = $_[0];
+
+ my $class = ref($self) || $self;
+
+ my $tag = __PACKAGE__ . ' ' . $VERSION;
+
+ unless($class eq __PACKAGE__) {
+ $tag = " ($tag)";
+ $tag = " v" . $self->VERSION . $tag if defined $self->VERSION;
+ $tag = $class . $tag;
+ }
+
+ return sprintf <<'END',
+{\info{\doccomm
+%s
+ using %s v%s
+ under Perl v%s at %s GMT}
+{\author [see doc]}{\company [see doc]}{\operator [see doc]}
+}
+
+END
+
+ # None of the following things should need escaping, I dare say!
+ $tag,
+ $ISA[0], $ISA[0]->VERSION(),
+ $], scalar(gmtime),
+ ;
+}
+
+sub doc_start {
+ my $self = $_[0];
+ my $title = $self->get_short_title();
+ DEBUG and print "Short Title: <$title>\n";
+ $title .= ' ' if length $title;
+
+ $title =~ s/ *$/ /s;
+ $title =~ s/^ //s;
+ $title =~ s/ $/, /s;
+ # make sure it ends in a comma and a space, unless it's 0-length
+
+ my $is_obviously_module_name;
+ $is_obviously_module_name = 1
+ if $title =~ m/^\S+$/s and $title =~ m/::/s;
+ # catches the most common case, at least
+
+ DEBUG and print "Title0: <$title>\n";
+ $title = rtf_esc($title);
+ DEBUG and print "Title1: <$title>\n";
+ $title = '\lang1024\noproof ' . $title
+ if $is_obviously_module_name;
+
+ return sprintf <<'END',
+\deflang%s\plain\lang%s\widowctrl
+{\header\pard\qr\plain\f2\fs%s
+%s
+p.\chpgn\par}
+\fs%s
+
+END
+ ($self->doc_lang) x 2,
+ $self->header_halfpoint_size,
+ $title,
+ $self->normal_halfpoint_size,
+ ;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#-------------------------------------------------------------------------
+
+use integer;
+sub rtf_esc {
+ my $x; # scratch
+ if(!defined wantarray) { # void context: alter in-place!
+ for(@_) {
+ s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
+ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ }
+ return;
+ } elsif(wantarray) { # return an array
+ return map {; ($x = $_) =~
+ s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
+ $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ $x;
+ } @_;
+ } else { # return a single scalar
+ ($x = ((@_ == 1) ? $_[0] : join '', @_)
+ ) =~ s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
+ # Escape \, {, }, -, control chars, and 7f-ff.
+ $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ return $x;
+ }
+}
+
+sub rtf_esc_codely {
+ # Doesn't change "-" to hard-hyphen, nor apply computerese style-smarts.
+ # We don't want to change the "-" to hard-hyphen, because we want to
+ # be able to paste this into a file and run it without there being
+ # dire screaming about the mysterious hard-hyphen character (which
+ # looks just like a normal dash character).
+
+ my $x; # scratch
+ if(!defined wantarray) { # void context: alter in-place!
+ for(@_) {
+ s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
+ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ }
+ return;
+ } elsif(wantarray) { # return an array
+ return map {; ($x = $_) =~
+ s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
+ $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ $x;
+ } @_;
+ } else { # return a single scalar
+ ($x = ((@_ == 1) ? $_[0] : join '', @_)
+ ) =~ s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g; # ESCAPER
+ # Escape \, {, }, -, control chars, and 7f-ff.
+ $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ return $x;
+ }
+}
+
+%Escape = (
+ map( (chr($_),chr($_)), # things not apparently needing escaping
+ 0x20 .. 0x7E ),
+ map( (chr($_),sprintf("\\'%02x", $_)), # apparently escapeworthy things
+ 0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46),
+
+ # We get to escape out 'F' so that we can send RTF files thru the mail
+ # without the slightest worry that paragraphs beginning with "From"
+ # will get munged.
+
+ # And some refinements:
+ "\cm" => "\n",
+ "\cj" => "\n",
+ "\n" => "\n\\line ",
+
+ "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay)
+ "\f" => "\n\\page\n", # Formfeed
+ "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen
+ "\xA0" => "\\~", # Latin-1 non-breaking space
+ "\xAD" => "\\-", # Latin-1 soft (optional) hyphen
+
+ # CRAZY HACKS:
+ "\n" => "\\line\n",
+ "\r" => "\n",
+ "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1
+ "\cc" => "}",
+);
+1;
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::RTF -- format Pod as RTF
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::RTF -e \
+ "exit Pod::Simple::RTF->filter(shift)->any_errata_seen" \
+ thingy.pod > thingy.rtf
+
+=head1 DESCRIPTION
+
+This class is a formatter that takes Pod and renders it as RTF, good for
+viewing/printing in MSWord, WordPad/write.exe, TextEdit, etc.
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+=head1 FORMAT CONTROL ATTRIBUTES
+
+You can set these attributes on the parser object before you
+call C<parse_file> (or a similar method) on it:
+
+=over
+
+=item $parser->head1_halfpoint_size( I<halfpoint_integer> );
+
+=item $parser->head2_halfpoint_size( I<halfpoint_integer> );
+
+=item $parser->head3_halfpoint_size( I<halfpoint_integer> );
+
+=item $parser->head4_halfpoint_size( I<halfpoint_integer> );
+
+These methods set the size (in half-points, like 52 for 26-point)
+that these heading levels will appear as.
+
+=item $parser->codeblock_halfpoint_size( I<halfpoint_integer> );
+
+This method sets the size (in half-points, like 21 for 10.5-point)
+that codeblocks ("verbatim sections") will appear as.
+
+=item $parser->header_halfpoint_size( I<halfpoint_integer> );
+
+This method sets the size (in half-points, like 15 for 7.5-point)
+that the header on each page will appear in. The header
+is usually just "I<modulename> p. I<pagenumber>".
+
+=item $parser->normal_halfpoint_size( I<halfpoint_integer> );
+
+This method sets the size (in half-points, like 26 for 13-point)
+that normal paragraphic text will appear in.
+
+=item $parser->no_proofing_exemptions( I<true_or_false> );
+
+Set this value to true if you don't want the formatter to try
+putting a hidden code on all Perl symbols (as best as it can
+notice them) that labels them as being not in English, and
+so not worth spellchecking.
+
+=item $parser->doc_lang( I<microsoft_decimal_language_code> )
+
+This sets the language code to tag this document as being in. By
+default, it is currently the value of the environment variable
+C<RTFDEFLANG>, or if that's not set, then the value
+1033 (for US English).
+
+Setting this appropriately is useful if you want to use the RTF
+to spellcheck, and/or if you want it to hyphenate right.
+
+Here are some notable values:
+
+ 1033 US English
+ 2057 UK English
+ 3081 Australia English
+ 4105 Canada English
+ 1034 Spain Spanish
+ 2058 Mexico Spanish
+ 1031 Germany German
+ 1036 France French
+ 3084 Canada French
+ 1035 Finnish
+ 1044 Norwegian (Bokmal)
+ 2068 Norwegian (Nynorsk)
+
+=back
+
+If you are particularly interested in customizing this module's output
+even more, see the source and/or write to me.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<RTF::Writer>, L<RTF::Cookbook>, L<RTF::Document>,
+L<RTF::Generator>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Search.pm b/ext/Pod-Simple/lib/Pod/Simple/Search.pm
new file mode 100644
index 0000000000..980b3b7739
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Search.pm
@@ -0,0 +1,1016 @@
+
+require 5.005;
+package Pod::Simple::Search;
+use strict;
+
+use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY);
+$VERSION = 3.04; ## Current version of this package
+
+BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level
+use Carp ();
+
+$SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i;
+ # flag to occasionally sleep for $SLEEPY - 1 seconds.
+
+$MAX_VERSION_WITHIN ||= 60;
+
+#############################################################################
+
+#use diagnostics;
+use File::Spec ();
+use File::Basename qw( basename );
+use Config ();
+use Cwd qw( cwd );
+
+#==========================================================================
+__PACKAGE__->_accessorize( # Make my dumb accessor methods
+ 'callback', 'progress', 'dir_prefix', 'inc', 'laborious', 'limit_glob',
+ 'limit_re', 'shadows', 'verbose', 'name2path', 'path2name',
+);
+#==========================================================================
+
+sub new {
+ my $class = shift;
+ my $self = bless {}, ref($class) || $class;
+ $self->init;
+ return $self;
+}
+
+sub init {
+ my $self = shift;
+ $self->inc(1);
+ $self->verbose(DEBUG);
+ return $self;
+}
+
+#--------------------------------------------------------------------------
+
+sub survey {
+ my($self, @search_dirs) = @_;
+ $self = $self->new unless ref $self; # tolerate being a class method
+
+ $self->_expand_inc( \@search_dirs );
+
+
+ $self->{'_scan_count'} = 0;
+ $self->{'_dirs_visited'} = {};
+ $self->path2name( {} );
+ $self->name2path( {} );
+ $self->limit_re( $self->_limit_glob_to_limit_re ) if $self->{'limit_glob'};
+ my $cwd = cwd();
+ my $verbose = $self->verbose;
+ local $_; # don't clobber the caller's $_ !
+
+ foreach my $try (@search_dirs) {
+ unless( File::Spec->file_name_is_absolute($try) ) {
+ # make path absolute
+ $try = File::Spec->catfile( $cwd ,$try);
+ }
+ # simplify path
+ $try = File::Spec->canonpath($try);
+
+ my $start_in;
+ my $modname_prefix;
+ if($self->{'dir_prefix'}) {
+ $start_in = File::Spec->catdir(
+ $try,
+ grep length($_), split '[\\/:]+', $self->{'dir_prefix'}
+ );
+ $modname_prefix = [grep length($_), split m{[:/\\]}, $self->{'dir_prefix'}];
+ $verbose and print "Appending \"$self->{'dir_prefix'}\" to $try, ",
+ "giving $start_in (= @$modname_prefix)\n";
+ } else {
+ $start_in = $try;
+ }
+
+ if( $self->{'_dirs_visited'}{$start_in} ) {
+ $verbose and print "Directory '$start_in' already seen, skipping.\n";
+ next;
+ } else {
+ $self->{'_dirs_visited'}{$start_in} = 1;
+ }
+
+ unless(-e $start_in) {
+ $verbose and print "Skipping non-existent $start_in\n";
+ next;
+ }
+
+ my $closure = $self->_make_search_callback;
+
+ if(-d $start_in) {
+ # Normal case:
+ $verbose and print "Beginning excursion under $start_in\n";
+ $self->_recurse_dir( $start_in, $closure, $modname_prefix );
+ $verbose and print "Back from excursion under $start_in\n\n";
+
+ } elsif(-f _) {
+ # A excursion consisting of just one file!
+ $_ = basename($start_in);
+ $verbose and print "Pondering $start_in ($_)\n";
+ $closure->($start_in, $_, 0, []);
+
+ } else {
+ $verbose and print "Skipping mysterious $start_in\n";
+ }
+ }
+ $self->progress and $self->progress->done(
+ "Noted $$self{'_scan_count'} Pod files total");
+
+ return unless defined wantarray; # void
+ return $self->name2path unless wantarray; # scalar
+ return $self->name2path, $self->path2name; # list
+}
+
+
+#==========================================================================
+sub _make_search_callback {
+ my $self = $_[0];
+
+ # Put the options in variables, for easy access
+ my( $laborious, $verbose, $shadows, $limit_re, $callback, $progress,$path2name,$name2path) =
+ map scalar($self->$_()),
+ qw(laborious verbose shadows limit_re callback progress path2name name2path);
+
+ my($file, $shortname, $isdir, $modname_bits);
+ return sub {
+ ($file, $shortname, $isdir, $modname_bits) = @_;
+
+ if($isdir) { # this never gets called on the startdir itself, just subdirs
+
+ if( $self->{'_dirs_visited'}{$file} ) {
+ $verbose and print "Directory '$file' already seen, skipping.\n";
+ return 'PRUNE';
+ }
+
+ print "Looking in dir $file\n" if $verbose;
+
+ unless ($laborious) { # $laborious overrides pruning
+ if( m/^(\d+\.[\d_]{3,})\z/s
+ and do { my $x = $1; $x =~ tr/_//d; $x != $] }
+ ) {
+ $verbose and print "Perl $] version mismatch on $_, skipping.\n";
+ return 'PRUNE';
+ }
+
+ if( m/^([A-Za-z][a-zA-Z0-9_]*)\z/s ) {
+ $verbose and print "$_ is a well-named module subdir. Looking....\n";
+ } else {
+ $verbose and print "$_ is a fishy directory name. Skipping.\n";
+ return 'PRUNE';
+ }
+ } # end unless $laborious
+
+ $self->{'_dirs_visited'}{$file} = 1;
+ return; # (not pruning);
+ }
+
+
+ # Make sure it's a file even worth even considering
+ if($laborious) {
+ unless(
+ m/\.(pod|pm|plx?)\z/i || -x _ and -T _
+ # Note that the cheapest operation (the RE) is run first.
+ ) {
+ $verbose > 1 and print " Brushing off uninteresting $file\n";
+ return;
+ }
+ } else {
+ unless( m/^[-_a-zA-Z0-9]+\.(?:pod|pm|plx?)\z/is ) {
+ $verbose > 1 and print " Brushing off oddly-named $file\n";
+ return;
+ }
+ }
+
+ $verbose and print "Considering item $file\n";
+ my $name = $self->_path2modname( $file, $shortname, $modname_bits );
+ $verbose > 0.01 and print " Nominating $file as $name\n";
+
+ if($limit_re and $name !~ m/$limit_re/i) {
+ $verbose and print "Shunning $name as not matching $limit_re\n";
+ return;
+ }
+
+ if( !$shadows and $name2path->{$name} ) {
+ $verbose and print "Not worth considering $file ",
+ "-- already saw $name as ",
+ join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n";
+ return;
+ }
+
+ # Put off until as late as possible the expense of
+ # actually reading the file:
+ if( m/\.pod\z/is ) {
+ # just assume it has pod, okay?
+ } else {
+ $progress and $progress->reach($self->{'_scan_count'}, "Scanning $file");
+ return unless $self->contains_pod( $file );
+ }
+ ++ $self->{'_scan_count'};
+
+ # Or finally take note of it:
+ if( $name2path->{$name} ) {
+ $verbose and print
+ "Duplicate POD found (shadowing?): $name ($file)\n",
+ " Already seen in ",
+ join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n";
+ } else {
+ $name2path->{$name} = $file; # Noting just the first occurrence
+ }
+ $verbose and print " Noting $name = $file\n";
+ if( $callback ) {
+ local $_ = $_; # insulate from changes, just in case
+ $callback->($file, $name);
+ }
+ $path2name->{$file} = $name;
+ return;
+ }
+}
+
+#==========================================================================
+
+sub _path2modname {
+ my($self, $file, $shortname, $modname_bits) = @_;
+
+ # this code simplifies the POD name for Perl modules:
+ # * remove "site_perl"
+ # * remove e.g. "i586-linux" (from 'archname')
+ # * remove e.g. 5.00503
+ # * remove pod/ if followed by perl*.pod (e.g. in pod/perlfunc.pod)
+ # * dig into the file for case-preserved name if not already mixed case
+
+ my @m = @$modname_bits;
+ my $x;
+ my $verbose = $self->verbose;
+
+ # Shaving off leading naughty-bits
+ while(@m
+ and defined($x = lc( $m[0] ))
+ and( $x eq 'site_perl'
+ or($x eq 'pod' and @m == 1 and $shortname =~ m{^perl.*\.pod$}s )
+ or $x =~ m{\\d+\\.z\\d+([_.]?\\d+)?} # if looks like a vernum
+ or $x eq lc( $Config::Config{'archname'} )
+ )) { shift @m }
+
+ my $name = join '::', @m, $shortname;
+ $self->_simplify_base($name);
+
+ # On VMS, case-preserved document names can't be constructed from
+ # filenames, so try to extract them from the "=head1 NAME" tag in the
+ # file instead.
+ if ($^O eq 'VMS' && ($name eq lc($name) || $name eq uc($name))) {
+ open PODFILE, "<$file" or die "_path2modname: Can't open $file: $!";
+ my $in_pod = 0;
+ my $in_name = 0;
+ my $line;
+ while ($line = <PODFILE>) {
+ chomp $line;
+ $in_pod = 1 if ($line =~ m/^=\w/);
+ $in_pod = 0 if ($line =~ m/^=cut/);
+ next unless $in_pod; # skip non-pod text
+ next if ($line =~ m/^\s*\z/); # and blank lines
+ next if ($in_pod && ($line =~ m/^X</)); # and commands
+ if ($in_name) {
+ if ($line =~ m/(\w+::)?(\w+)/) {
+ # substitute case-preserved version of name
+ my $podname = $2;
+ my $prefix = $1 || '';
+ $verbose and print "Attempting case restore of '$name' from '$prefix$podname'\n";
+ unless ($name =~ s/$prefix$podname/$prefix$podname/i) {
+ $verbose and print "Attempting case restore of '$name' from '$podname'\n";
+ $name =~ s/$podname/$podname/i;
+ }
+ last;
+ }
+ }
+ $in_name = 1 if ($line =~ m/^=head1 NAME/);
+ }
+ close PODFILE;
+ }
+
+ return $name;
+}
+
+#==========================================================================
+
+sub _recurse_dir {
+ my($self, $startdir, $callback, $modname_bits) = @_;
+
+ my $maxdepth = $self->{'fs_recursion_maxdepth'} || 10;
+ my $verbose = $self->verbose;
+
+ my $here_string = File::Spec->curdir;
+ my $up_string = File::Spec->updir;
+ $modname_bits ||= [];
+
+ my $recursor;
+ $recursor = sub {
+ my($dir_long, $dir_bare) = @_;
+ if( @$modname_bits >= 10 ) {
+ $verbose and print "Too deep! [@$modname_bits]\n";
+ return;
+ }
+
+ unless(-d $dir_long) {
+ $verbose > 2 and print "But it's not a dir! $dir_long\n";
+ return;
+ }
+ unless( opendir(INDIR, $dir_long) ) {
+ $verbose > 2 and print "Can't opendir $dir_long : $!\n";
+ closedir(INDIR);
+ return
+ }
+ my @items = sort readdir(INDIR);
+ closedir(INDIR);
+
+ push @$modname_bits, $dir_bare unless $dir_bare eq '';
+
+ my $i_full;
+ foreach my $i (@items) {
+ next if $i eq $here_string or $i eq $up_string or $i eq '';
+ $i_full = File::Spec->catfile( $dir_long, $i );
+
+ if(!-r $i_full) {
+ $verbose and print "Skipping unreadable $i_full\n";
+
+ } elsif(-f $i_full) {
+ $_ = $i;
+ $callback->( $i_full, $i, 0, $modname_bits );
+
+ } elsif(-d _) {
+ $i =~ s/\.DIR\z//i if $^O eq 'VMS';
+ $_ = $i;
+ my $rv = $callback->( $i_full, $i, 1, $modname_bits ) || '';
+
+ if($rv eq 'PRUNE') {
+ $verbose > 1 and print "OK, pruning";
+ } else {
+ # Otherwise, recurse into it
+ $recursor->( File::Spec->catdir($dir_long, $i) , $i);
+ }
+ } else {
+ $verbose > 1 and print "Skipping oddity $i_full\n";
+ }
+ }
+ pop @$modname_bits;
+ return;
+ };;
+
+ local $_;
+ $recursor->($startdir, '');
+
+ undef $recursor; # allow it to be GC'd
+
+ return;
+}
+
+
+#==========================================================================
+
+sub run {
+ # A function, useful in one-liners
+
+ my $self = __PACKAGE__->new;
+ $self->limit_glob($ARGV[0]) if @ARGV;
+ $self->callback( sub {
+ my($file, $name) = @_;
+ my $version = '';
+
+ # Yes, I know we won't catch the version in like a File/Thing.pm
+ # if we see File/Thing.pod first. That's just the way the
+ # cookie crumbles. -- SMB
+
+ if($file =~ m/\.pod$/i) {
+ # Don't bother looking for $VERSION in .pod files
+ DEBUG and print "Not looking for \$VERSION in .pod $file\n";
+ } elsif( !open(INPOD, $file) ) {
+ DEBUG and print "Couldn't open $file: $!\n";
+ close(INPOD);
+ } else {
+ # Sane case: file is readable
+ my $lines = 0;
+ while(<INPOD>) {
+ last if $lines++ > $MAX_VERSION_WITHIN; # some degree of sanity
+ if( s/^\s*\$VERSION\s*=\s*//s and m/\d/ ) {
+ DEBUG and print "Found version line (#$lines): $_";
+ s/\s*\#.*//s;
+ s/\;\s*$//s;
+ s/\s+$//s;
+ s/\t+/ /s; # nix tabs
+ # Optimize the most common cases:
+ $_ = "v$1"
+ if m{^v?["']?([0-9_]+(\.[0-9_]+)*)["']?$}s
+ # like in $VERSION = "3.14159";
+ or m{\$Revision:\s*([0-9_]+(?:\.[0-9_]+)*)\s*\$}s
+ # like in sprintf("%d.%02d", q$Revision: 4.13 $ =~ /(\d+)\.(\d+)/);
+ ;
+
+ # Like in sprintf("%d.%s", map {s/_//g; $_} q$Name: release-0_55-public $ =~ /-(\d+)_([\d_]+)/)
+ $_ = sprintf("v%d.%s",
+ map {s/_//g; $_}
+ $1 =~ m/-(\d+)_([\d_]+)/) # snare just the numeric part
+ if m{\$Name:\s*([^\$]+)\$}s
+ ;
+ $version = $_;
+ DEBUG and print "Noting $version as version\n";
+ last;
+ }
+ }
+ close(INPOD);
+ }
+ print "$name\t$version\t$file\n";
+ return;
+ # End of callback!
+ });
+
+ $self->survey;
+}
+
+#==========================================================================
+
+sub simplify_name {
+ my($self, $str) = @_;
+
+ # Remove all path components
+ # XXX Why not just use basename()? -- SMB
+
+ if ($^O eq 'MacOS') { $str =~ s{^.*:+}{}s }
+ else { $str =~ s{^.*/+}{}s }
+
+ $self->_simplify_base($str);
+ return $str;
+}
+
+#==========================================================================
+
+sub _simplify_base { # Internal method only
+
+ # strip Perl's own extensions
+ $_[1] =~ s/\.(pod|pm|plx?)\z//i;
+
+ # strip meaningless extensions on Win32 and OS/2
+ $_[1] =~ s/\.(bat|exe|cmd)\z//i if $^O =~ /mswin|os2/i;
+
+ # strip meaningless extensions on VMS
+ $_[1] =~ s/\.(com)\z//i if $^O eq 'VMS';
+
+ return;
+}
+
+#==========================================================================
+
+sub _expand_inc {
+ my($self, $search_dirs) = @_;
+
+ return unless $self->{'inc'};
+
+ if ($^O eq 'MacOS') {
+ push @$search_dirs,
+ grep $_ ne File::Spec->curdir, $self->_mac_whammy(@INC);
+ # Any other OSs need custom handling here?
+ } else {
+ push @$search_dirs, grep $_ ne File::Spec->curdir, @INC;
+ }
+
+ $self->{'laborious'} = 0; # Since inc said to use INC
+ return;
+}
+
+#==========================================================================
+
+sub _mac_whammy { # Tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS
+ my @them;
+ (undef,@them) = @_;
+ for $_ (@them) {
+ if ( $_ eq '.' ) {
+ $_ = ':';
+ } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) {
+ $_ = ':'. $_;
+ } else {
+ $_ =~ s|^\./|:|;
+ }
+ }
+ return @them;
+}
+
+#==========================================================================
+
+sub _limit_glob_to_limit_re {
+ my $self = $_[0];
+ my $limit_glob = $self->{'limit_glob'} || return;
+
+ my $limit_re = '^' . quotemeta($limit_glob) . '$';
+ $limit_re =~ s/\\\?/./g; # glob "?" => "."
+ $limit_re =~ s/\\\*/.*?/g; # glob "*" => ".*?"
+ $limit_re =~ s/\.\*\?\$$//s; # final glob "*" => ".*?$" => ""
+
+ $self->{'verbose'} and print "Turning limit_glob $limit_glob into re $limit_re\n";
+
+ # A common optimization:
+ if(!exists($self->{'dir_prefix'})
+ and $limit_glob =~ m/^(?:\w+\:\:)+/s # like "File::*" or "File::Thing*"
+ # Optimize for sane and common cases (but not things like "*::File")
+ ) {
+ $self->{'dir_prefix'} = join "::", $limit_glob =~ m/^(?:\w+::)+/sg;
+ $self->{'verbose'} and print " and setting dir_prefix to $self->{'dir_prefix'}\n";
+ }
+
+ return $limit_re;
+}
+
+#==========================================================================
+
+# contribution mostly from Tim Jenness <t.jenness@jach.hawaii.edu>
+
+sub find {
+ my($self, $pod, @search_dirs) = @_;
+ $self = $self->new unless ref $self; # tolerate being a class method
+
+ # Check usage
+ Carp::carp 'Usage: \$self->find($podname, ...)'
+ unless defined $pod and length $pod;
+
+ my $verbose = $self->verbose;
+
+ # Split on :: and then join the name together using File::Spec
+ my @parts = split /::/, $pod;
+ $verbose and print "Chomping {$pod} => {@parts}\n";
+
+ #@search_dirs = File::Spec->curdir unless @search_dirs;
+
+ if( $self->inc ) {
+ if( $^O eq 'MacOS' ) {
+ push @search_dirs, $self->_mac_whammy(@INC);
+ } else {
+ push @search_dirs, @INC;
+ }
+
+ # Add location of pod documentation for perl man pages (eg perlfunc)
+ # This is a pod directory in the private install tree
+ #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'},
+ # 'pod');
+ #push (@search_dirs, $perlpoddir)
+ # if -d $perlpoddir;
+
+ # Add location of binaries such as pod2text:
+ push @search_dirs, $Config::Config{'scriptdir'};
+ # and if that's undef or q{} or nonexistent, we just ignore it later
+ }
+
+ my %seen_dir;
+ Dir:
+ foreach my $dir ( @search_dirs ) {
+ next unless defined $dir and length $dir;
+ next if $seen_dir{$dir};
+ $seen_dir{$dir} = 1;
+ unless(-d $dir) {
+ print "Directory $dir does not exist\n" if $verbose;
+ next Dir;
+ }
+
+ print "Looking in directory $dir\n" if $verbose;
+ my $fullname = File::Spec->catfile( $dir, @parts );
+ print "Filename is now $fullname\n" if $verbose;
+
+ foreach my $ext ('', '.pod', '.pm', '.pl') { # possible extensions
+ my $fullext = $fullname . $ext;
+ if( -f $fullext and $self->contains_pod( $fullext ) ){
+ print "FOUND: $fullext\n" if $verbose;
+ return $fullext;
+ }
+ }
+ my $subdir = File::Spec->catdir($dir,'pod');
+ if(-d $subdir) { # slip in the ./pod dir too
+ $verbose and print "Noticing $subdir and stopping there...\n";
+ $dir = $subdir;
+ redo Dir;
+ }
+ }
+
+ return undef;
+}
+
+#==========================================================================
+
+sub contains_pod {
+ my($self, $file) = @_;
+ my $verbose = $self->{'verbose'};
+
+ # check for one line of POD
+ $verbose > 1 and print " Scanning $file for pod...\n";
+ unless( open(MAYBEPOD,"<$file") ) {
+ print "Error: $file is unreadable: $!\n";
+ return undef;
+ }
+
+ sleep($SLEEPY - 1) if $SLEEPY;
+ # avoid totally hogging the processor on OSs with poor process control
+
+ local $_;
+ while( <MAYBEPOD> ) {
+ if(m/^=(head\d|pod|over|item)\b/s) {
+ close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting";
+ chomp;
+ $verbose > 1 and print " Found some pod ($_) in $file\n";
+ return 1;
+ }
+ }
+ close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting";
+ $verbose > 1 and print " No POD in $file, skipping.\n";
+ return 0;
+}
+
+#==========================================================================
+
+sub _accessorize { # A simple-minded method-maker
+ shift;
+ no strict 'refs';
+ foreach my $attrname (@_) {
+ *{caller() . '::' . $attrname} = sub {
+ use strict;
+ $Carp::CarpLevel = 1, Carp::croak(
+ "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
+ ) unless (@_ == 1 or @_ == 2) and ref $_[0];
+
+ # Read access:
+ return $_[0]->{$attrname} if @_ == 1;
+
+ # Write access:
+ $_[0]->{$attrname} = $_[1];
+ return $_[0]; # RETURNS MYSELF!
+ };
+ }
+ # Ya know, they say accessories make the ensemble!
+ return;
+}
+
+#==========================================================================
+sub _state_as_string {
+ my $self = $_[0];
+ return '' unless ref $self;
+ my @out = "{\n # State of $self ...\n";
+ foreach my $k (sort keys %$self) {
+ push @out, " ", _esc($k), " => ", _esc($self->{$k}), ",\n";
+ }
+ push @out, "}\n";
+ my $x = join '', @out;
+ $x =~ s/^/#/mg;
+ return $x;
+}
+
+sub _esc {
+ my $in = $_[0];
+ return 'undef' unless defined $in;
+ $in =~
+ s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
+ <'\\x'.(unpack("H2",$1))>eg;
+ return qq{"$in"};
+}
+
+#==========================================================================
+
+run() unless caller; # run if "perl whatever/Search.pm"
+
+1;
+
+#==========================================================================
+
+__END__
+
+
+=head1 NAME
+
+Pod::Simple::Search - find POD documents in directory trees
+
+=head1 SYNOPSIS
+
+ use Pod::Simple::Search;
+ my $name2path = Pod::Simple::Search->new->limit_glob('LWP::*')->survey;
+ print "Looky see what I found: ",
+ join(' ', sort keys %$name2path), "\n";
+
+ print "LWPUA docs = ",
+ Pod::Simple::Search->new->find('LWP::UserAgent') || "?",
+ "\n";
+
+=head1 DESCRIPTION
+
+B<Pod::Simple::Search> is a class that you use for running searches
+for Pod files. An object of this class has several attributes
+(mostly options for controlling search options), and some methods
+for searching based on those attributes.
+
+The way to use this class is to make a new object of this class,
+set any options, and then call one of the search options
+(probably C<survey> or C<find>). The sections below discuss the
+syntaxes for doing all that.
+
+
+=head1 CONSTRUCTOR
+
+This class provides the one constructor, called C<new>.
+It takes no parameters:
+
+ use Pod::Simple::Search;
+ my $search = Pod::Simple::Search->new;
+
+=head1 ACCESSORS
+
+This class defines several methods for setting (and, occasionally,
+reading) the contents of an object. With two exceptions (discussed at
+the end of this section), these attributes are just for controlling the
+way searches are carried out.
+
+Note that each of these return C<$self> when you call them as
+C<< $self->I<whatever(value)> >>. That's so that you can chain
+together set-attribute calls like this:
+
+ my $name2path =
+ Pod::Simple::Search->new
+ -> inc(0) -> verbose(1) -> callback(\&blab)
+ ->survey(@there);
+
+...which works exactly as if you'd done this:
+
+ my $search = Pod::Simple::Search->new;
+ $search->inc(0);
+ $search->verbose(1);
+ $search->callback(\&blab);
+ my $name2path = $search->survey(@there);
+
+=over
+
+=item $search->inc( I<true-or-false> );
+
+This attribute, if set to a true value, means that searches should
+implicitly add perl's I<@INC> paths. This
+automatically considers paths specified in the C<PERL5LIB> environment
+as this is prepended to I<@INC> by the Perl interpreter itself.
+This attribute's default value is B<TRUE>. If you want to search
+only specific directories, set $self->inc(0) before calling
+$inc->survey or $inc->find.
+
+
+=item $search->verbose( I<nonnegative-number> );
+
+This attribute, if set to a nonzero positive value, will make searches output
+(via C<warn>) notes about what they're doing as they do it.
+This option may be useful for debugging a pod-related module.
+This attribute's default value is zero, meaning that no C<warn> messages
+are produced. (Setting verbose to 1 turns on some messages, and setting
+it to 2 turns on even more messages, i.e., makes the following search(es)
+even more verbose than 1 would make them.)
+
+
+=item $search->limit_glob( I<some-glob-string> );
+
+This option means that you want to limit the results just to items whose
+podnames match the given glob/wildcard expression. For example, you
+might limit your search to just "LWP::*", to search only for modules
+starting with "LWP::*" (but not including the module "LWP" itself); or
+you might limit your search to "LW*" to see only modules whose (full)
+names begin with "LW"; or you might search for "*Find*" to search for
+all modules with "Find" somewhere in their full name. (You can also use
+"?" in a glob expression; so "DB?" will match "DBI" and "DBD".)
+
+
+=item $search->callback( I<\&some_routine> );
+
+This attribute means that every time this search sees a matching
+Pod file, it should call this callback routine. The routine is called
+with two parameters: the current file's filespec, and its pod name.
+(For example: C<("/etc/perljunk/File/Crunk.pm", "File::Crunk")> would
+be in C<@_>.)
+
+The callback routine's return value is not used for anything.
+
+This attribute's default value is false, meaning that no callback
+is called.
+
+=item $search->laborious( I<true-or-false> );
+
+Unless you set this attribute to a true value, Pod::Search will
+apply Perl-specific heuristics to find the correct module PODs quickly.
+This attribute's default value is false. You won't normally need
+to set this to true.
+
+Specifically: Turning on this option will disable the heuristics for
+seeing only files with Perl-like extensions, omitting subdirectories
+that are numeric but do I<not> match the current Perl interpreter's
+version ID, suppressing F<site_perl> as a module hierarchy name, etc.
+
+
+=item $search->shadows( I<true-or-false> );
+
+Unless you set this attribute to a true value, Pod::Simple::Search will
+consider only the first file of a given modulename as it looks thru the
+specified directories; that is, with this option off, if
+Pod::Simple::Search has seen a C<somepathdir/Foo/Bar.pm> already in this
+search, then it won't bother looking at a C<somelaterpathdir/Foo/Bar.pm>
+later on in that search, because that file is merely a "shadow". But if
+you turn on C<< $self->shadows(1) >>, then these "shadow" files are
+inspected too, and are noted in the pathname2podname return hash.
+
+This attribute's default value is false; and normally you won't
+need to turn it on.
+
+
+=item $search->limit_re( I<some-regxp> );
+
+Setting this attribute (to a value that's a regexp) means that you want
+to limit the results just to items whose podnames match the given
+regexp. Normally this option is not needed, and the more efficient
+C<limit_glob> attribute is used instead.
+
+
+=item $search->dir_prefix( I<some-string-value> );
+
+Setting this attribute to a string value means that the searches should
+begin in the specified subdirectory name (like "Pod" or "File::Find",
+also expressable as "File/Find"). For example, the search option
+C<< $search->limit_glob("File::Find::R*") >>
+is the same as the combination of the search options
+C<< $search->limit_re("^File::Find::R") -> dir_prefix("File::Find") >>.
+
+Normally you don't need to know about the C<dir_prefix> option, but I
+include it in case it might prove useful for someone somewhere.
+
+(Implementationally, searching with limit_glob ends up setting limit_re
+and usually dir_prefix.)
+
+
+=item $search->progress( I<some-progress-object> );
+
+If you set a value for this attribute, the value is expected
+to be an object (probably of a class that you define) that has a
+C<reach> method and a C<done> method. This is meant for reporting
+progress during the search, if you don't want to use a simple
+callback.
+
+Normally you don't need to know about the C<progress> option, but I
+include it in case it might prove useful for someone somewhere.
+
+While a search is in progress, the progress object's C<reach> and
+C<done> methods are called like this:
+
+ # Every time a file is being scanned for pod:
+ $progress->reach($count, "Scanning $file"); ++$count;
+
+ # And then at the end of the search:
+ $progress->done("Noted $count Pod files total");
+
+Internally, we often set this to an object of class
+Pod::Simple::Progress. That class is probably undocumented,
+but you may wish to look at its source.
+
+
+=item $name2path = $self->name2path;
+
+This attribute is not a search parameter, but is used to report the
+result of C<survey> method, as discussed in the next section.
+
+=item $path2name = $self->path2name;
+
+This attribute is not a search parameter, but is used to report the
+result of C<survey> method, as discussed in the next section.
+
+=back
+
+=head1 MAIN SEARCH METHODS
+
+Once you've actually set any options you want (if any), you can go
+ahead and use the following methods to search for Pod files
+in particular ways.
+
+
+=head2 C<< $search->survey( @directories ) >>
+
+The method C<survey> searches for POD documents in a given set of
+files and/or directories. This runs the search according to the various
+options set by the accessors above. (For example, if the C<inc> attribute
+is on, as it is by default, then the perl @INC directories are implicitly
+added to the list of directories (if any) that you specify.)
+
+The return value of C<survey> is two hashes:
+
+=over
+
+=item C<name2path>
+
+A hash that maps from each pod-name to the filespec (like
+"Stuff::Thing" => "/whatever/plib/Stuff/Thing.pm")
+
+=item C<path2name>
+
+A hash that maps from each Pod filespec to its pod-name (like
+"/whatever/plib/Stuff/Thing.pm" => "Stuff::Thing")
+
+=back
+
+Besides saving these hashes as the hashref attributes
+C<name2path> and C<path2name>, calling this function also returns
+these hashrefs. In list context, the return value of
+C<< $search->survey >> is the list C<(\%name2path, \%path2name)>.
+In scalar context, the return value is C<\%name2path>.
+Or you can just call this in void context.
+
+Regardless of calling context, calling C<survey> saves
+its results in its C<name2path> and C<path2name> attributes.
+
+E.g., when searching in F<$HOME/perl5lib>, the file
+F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>,
+whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be
+I<Myclass::Subclass>. The name information can be used for POD
+translators.
+
+Only text files containing at least one valid POD command are found.
+
+In verbose mode, a warning is printed if shadows are found (i.e., more
+than one POD file with the same POD name is found, e.g. F<CPAN.pm> in
+different directories). This usually indicates duplicate occurrences of
+modules in the I<@INC> search path, which is occasionally inadvertent
+(but is often simply a case of a user's path dir having a more recent
+version than the system's general path dirs in general.)
+
+The options to this argument is a list of either directories that are
+searched recursively, or files. (Usually you wouldn't specify files,
+but just dirs.) Or you can just specify an empty-list, as in
+$name2path; with the
+C<inc> option on, as it is by default, teh
+
+The POD names of files are the plain basenames with any Perl-like
+extension (.pm, .pl, .pod) stripped, and path separators replaced by
+C<::>'s.
+
+Calling Pod::Simple::Search->search(...) is short for
+Pod::Simple::Search->new->search(...). That is, a throwaway object
+with default attribute values is used.
+
+
+=head2 C<< $search->simplify_name( $str ) >>
+
+The method B<simplify_name> is equivalent to B<basename>, but also
+strips Perl-like extensions (.pm, .pl, .pod) and extensions like
+F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively.
+
+
+=head2 C<< $search->find( $pod ) >>
+
+=head2 C<< $search->find( $pod, @search_dirs ) >>
+
+Returns the location of a Pod file, given a Pod/module/script name
+(like "Foo::Bar" or "perlvar" or "perldoc"), and an idea of
+what files/directories to look in.
+It searches according to the various options set by the accessors above.
+(For example, if the C<inc> attribute is on, as it is by default, then
+the perl @INC directories are implicitly added to the list of
+directories (if any) that you specify.)
+
+This returns the full path of the first occurrence to the file.
+Package names (eg 'A::B') are automatically converted to directory
+names in the selected directory. Additionally, '.pm', '.pl' and '.pod'
+are automatically appended to the search as required.
+(So, for example, under Unix, "A::B" is converted to "somedir/A/B.pm",
+"somedir/A/B.pod", or "somedir/A/B.pl", as appropriate.)
+
+If no such Pod file is found, this method returns undef.
+
+If any of the given search directories contains a F<pod/> subdirectory,
+then it is searched. (That's how we manage to find F<perlfunc>,
+for example, which is usually in F<pod/perlfunc> in most Perl dists.)
+
+The C<verbose> and C<inc> attributes influence the behavior of this
+search; notably, C<inc>, if true, adds @INC I<and also
+$Config::Config{'scriptdir'}> to the list of directories to search.
+
+It is common to simply say C<< $filename = Pod::Simple::Search-> new
+->find("perlvar") >> so that just the @INC (well, and scriptdir)
+directories are searched. (This happens because the C<inc>
+attribute is true by default.)
+
+Calling Pod::Simple::Search->find(...) is short for
+Pod::Simple::Search->new->find(...). That is, a throwaway object
+with default attribute values is used.
+
+
+=head2 C<< $self->contains_pod( $file ) >>
+
+Returns true if the supplied filename (not POD module) contains some Pod
+documentation.
+
+
+=head1 AUTHOR
+
+Sean M. Burke E<lt>sburke@cpan.orgE<gt>
+borrowed code from
+Marek Rouchal's Pod::Find, which in turn
+heavily borrowed code from Nick Ing-Simmons' PodToHtml.
+
+Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt> provided
+C<find> and C<contains_pod> to Pod::Find.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Perldoc>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/SimpleTree.pm b/ext/Pod-Simple/lib/Pod/Simple/SimpleTree.pm
new file mode 100644
index 0000000000..64dd155104
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/SimpleTree.pm
@@ -0,0 +1,155 @@
+
+
+require 5;
+package Pod::Simple::SimpleTree;
+use strict;
+use Carp ();
+use Pod::Simple ();
+use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS);
+$VERSION = '2.02';
+BEGIN {
+ @ISA = ('Pod::Simple');
+ *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG;
+}
+
+__PACKAGE__->_accessorize(
+ 'root', # root of the tree
+);
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub _handle_element_start { # self, tagname, attrhash
+ DEBUG > 2 and print "Handling $_[1] start-event\n";
+ my $x = [$_[1], $_[2]];
+ if($_[0]{'_currpos'}) {
+ push @{ $_[0]{'_currpos'}[0] }, $x; # insert in parent's child-list
+ unshift @{ $_[0]{'_currpos'} }, $x; # prefix to stack
+ } else {
+ DEBUG and print " And oo, it gets to be root!\n";
+ $_[0]{'_currpos'} = [ $_[0]{'root'} = $x ];
+ # first event! set to stack, and set as root.
+ }
+ DEBUG > 3 and print "Stack is now: ",
+ join(">", map $_->[0], @{$_[0]{'_currpos'}}), "\n";
+ return;
+}
+
+sub _handle_element_end { # self, tagname
+ DEBUG > 2 and print "Handling $_[1] end-event\n";
+ shift @{$_[0]{'_currpos'}};
+ DEBUG > 3 and print "Stack is now: ",
+ join(">", map $_->[0], @{$_[0]{'_currpos'}}), "\n";
+ return;
+}
+
+sub _handle_text { # self, text
+ DEBUG > 2 and print "Handling $_[1] text-event\n";
+ push @{ $_[0]{'_currpos'}[0] }, $_[1];
+ return;
+}
+
+
+# A bit of evil from the black box... please avert your eyes, kind souls.
+sub _traverse_treelet_bit {
+ DEBUG > 2 and print "Handling $_[1] paragraph event\n";
+ my $self = shift;
+ push @{ $self->{'_currpos'}[0] }, [@_];
+ return;
+}
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+1;
+__END__
+
+=head1 NAME
+
+Pod::Simple::SimpleTree -- parse Pod into a simple parse tree
+
+=head1 SYNOPSIS
+
+ % cat ptest.pod
+
+ =head1 PIE
+
+ I like B<pie>!
+
+ % perl -MPod::Simple::SimpleTree -MData::Dumper -e \
+ "print Dumper(Pod::Simple::SimpleTree->new->parse_file(shift)->root)" \
+ ptest.pod
+
+ $VAR1 = [
+ 'Document',
+ { 'start_line' => 1 },
+ [
+ 'head1',
+ { 'start_line' => 1 },
+ 'PIE'
+ ],
+ [
+ 'Para',
+ { 'start_line' => 3 },
+ 'I like ',
+ [
+ 'B',
+ {},
+ 'pie'
+ ],
+ '!'
+ ]
+ ];
+
+=head1 DESCRIPTION
+
+This class is of interest to people writing a Pod processor/formatter.
+
+This class takes Pod and parses it, returning a parse tree made just
+of arrayrefs, and hashrefs, and strings.
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+This class is inspired by XML::Parser's "Tree" parsing-style, although
+it doesn't use exactly the same LoL format.
+
+=head1 METHODS
+
+At the end of the parse, call C<< $parser->root >> to get the
+tree's top node.
+
+=head1 Tree Contents
+
+Every element node in the parse tree is represented by an arrayref of
+the form: C<[ I<elementname>, \%attributes, I<...subnodes...> ]>.
+See the example tree dump in the Synopsis, above.
+
+Every text node in the tree is represented by a simple (non-ref)
+string scalar. So you can test C<ref($node)> to see whather you have
+an element node or just a text node.
+
+The top node in the tree is C<[ 'Document', \%attributes,
+I<...subnodes...> ]>
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple>
+
+L<perllol>
+
+L<The "Tree" subsubsection in XML::Parser|XML::Parser/"Tree">
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Subclassing.pod b/ext/Pod-Simple/lib/Pod/Simple/Subclassing.pod
new file mode 100644
index 0000000000..d4ee694344
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Subclassing.pod
@@ -0,0 +1,922 @@
+
+=head1 NAME
+
+Pod::Simple::Subclassing -- write a formatter as a Pod::Simple subclass
+
+=head1 SYNOPSIS
+
+ package Pod::SomeFormatter;
+ use Pod::Simple;
+ @ISA = qw(Pod::Simple);
+ $VERSION = '1.01';
+ use strict;
+
+ sub _handle_element_start {
+ my($parser, $element_name, $attr_hash_r) = @_;
+ ...
+ }
+
+ sub _handle_element_end {
+ my($parser, $element_name) = @_;
+ ...
+ }
+
+ sub _handle_text {
+ my($parser, $text) = @_;
+ ...
+ }
+ 1;
+
+=head1 DESCRIPTION
+
+This document is about using Pod::Simple to write a Pod processor,
+generally a Pod formatter. If you just want to know about using an
+existing Pod formatter, instead see its documentation and see also the
+docs in L<Pod::Simple>.
+
+The zeroeth step in writing a Pod formatter is to make sure that there
+isn't already a decent one in CPAN. See L<http://search.cpan.org/>, and
+run a search on the name of the format you want to render to. Also
+consider joining the Pod People list
+L<http://lists.perl.org/showlist.cgi?name=pod-people> and asking whether
+anyone has a formatter for that format -- maybe someone cobbled one
+together but just hasn't released it.
+
+The first step in writing a Pod processor is to read L<perlpodspec>,
+which contains notes information on writing a Pod parser (which has been
+largely taken care of by Pod::Simple), but also a lot of requirements
+and recommendations for writing a formatter.
+
+The second step is to actually learn the format you're planning to
+format to -- or at least as much as you need to know to represent Pod,
+which probably isn't much.
+
+The third step is to pick which of Pod::Simple's interfaces you want to
+use -- the basic interface via Pod::Simple or L<Pod::Simple::Methody> is
+event-based, sort of like L<HTML::Parser>'s interface, or sort of like
+L<XML::Parser>'s "Handlers" interface), but L<Pod::Simple::PullParser>
+provides a token-stream interface, sort of like L<HTML::TokeParser>'s
+interface; L<Pod::Simple::SimpleTree> provides a simple tree interface,
+rather like XML::Parser's "Tree" interface. Users familiar with
+XML-handling will find one of these styles relatively familiar; but if
+you would be even more at home with XML, there are classes that produce
+an XML representation of the Pod stream, notably
+L<Pod::Simple::XMLOutStream>; you can feed the output of such a class to
+whatever XML parsing system you are most at home with.
+
+The last step is to write your code based on how the events (or tokens,
+or tree-nodes, or the XML, or however you're parsing) will map to
+constructs in the output format. Also sure to consider how to escape
+text nodes containing arbitrary text, and also what to do with text
+nodes that represent preformatted text (from verbatim sections).
+
+
+
+=head1 Events
+
+TODO intro... mention that events are supplied for implicits, like for
+missing >'s
+
+
+In the following section, we use XML to represent the event structure
+associated with a particular construct. That is, TODO
+
+=over
+
+=item C<< $parser->_handle_element_start( I<element_name>, I<attr_hashref> ) >>
+
+=item C<< $parser->_handle_element_end( I<element_name> ) >>
+
+=item C<< $parser->_handle_text( I<text_string> ) >>
+
+=back
+
+TODO describe
+
+
+=over
+
+=item events with an element_name of Document
+
+Parsing a document produces this event structure:
+
+ <Document start_line="543">
+ ...all events...
+ </Document>
+
+The value of the I<start_line> attribute will be the line number of the first
+Pod directive in the document.
+
+If there is no Pod in the given document, then the
+event structure will be this:
+
+ <Document contentless="1" start_line="543">
+ </Document>
+
+In that case, the value of the I<start_line> attribute will not be meaningful;
+under current implementations, it will probably be the line number of the
+last line in the file.
+
+=item events with an element_name of Para
+
+Parsing a plain (non-verbatim, non-directive, non-data) paragraph in
+a Pod document produces this event structure:
+
+ <Para start_line="543">
+ ...all events in this paragraph...
+ </Para>
+
+The value of the I<start_line> attribute will be the line number of the start
+of the paragraph.
+
+For example, parsing this paragraph of Pod:
+
+ The value of the I<start_line> attribute will be the
+ line number of the start of the paragraph.
+
+produces this event structure:
+
+ <Para start_line="129">
+ The value of the
+ <I>
+ start_line
+ </I>
+ attribute will be the line number of the first Pod directive
+ in the document.
+ </Para>
+
+=item events with an element_name of B, C, F, or I.
+
+Parsing a BE<lt>...E<gt> formatting code (or of course any of its
+semantically identical syntactic variants
+S<BE<lt>E<lt> ... E<gt>E<gt>>,
+or S<BE<lt>E<lt>E<lt>E<lt> ... E<gt>E<gt>E<gt>E<gt>>, etc.)
+produces this event structure:
+
+ <B>
+ ...stuff...
+ </B>
+
+Currently, there are no attributes conveyed.
+
+Parsing C, F, or I codes produce the same structure, with only a
+different element name.
+
+If your parser object has been set to accept other formatting codes,
+then they will be presented like these B/C/F/I codes -- i.e., without
+any attributes.
+
+=item events with an element_name of S
+
+Normally, parsing an SE<lt>...E<gt> sequence produces this event
+structure, just as if it were a B/C/F/I code:
+
+ <S>
+ ...stuff...
+ </S>
+
+However, Pod::Simple (and presumably all derived parsers) offers the
+C<nbsp_for_S> option which, if enabled, will suppress all S events, and
+instead change all spaces in the content to non-breaking spaces. This is
+intended for formatters that output to a format that has no code that
+means the same as SE<lt>...E<gt>, but which has a code/character that
+means non-breaking space.
+
+=item events with an element_name of X
+
+Normally, parsing an XE<lt>...E<gt> sequence produces this event
+structure, just as if it were a B/C/F/I code:
+
+ <X>
+ ...stuff...
+ </X>
+
+However, Pod::Simple (and presumably all derived parsers) offers the
+C<nix_X_codes> option which, if enabled, will suppress all X events
+and ignore their content. For formatters/processors that don't use
+X events, this is presumably quite useful.
+
+
+=item events with an element_name of L
+
+Because the LE<lt>...E<gt> is the most complex construct in the
+language, it should not surprise you that the events it generates are
+the most complex in the language. Most of complexity is hidden away in
+the attribute values, so for those of you writing a Pod formatter that
+produces a non-hypertextual format, you can just ignore the attributes
+and treat an L event structure like a formatting element that
+(presumably) doesn't actually produce a change in formatting. That is,
+the content of the L event structure (as opposed to its
+attributes) is always what text should be displayed.
+
+There are, at first glance, three kinds of L links: URL, man, and pod.
+
+When a LE<lt>I<some_url>E<gt> code is parsed, it produces this event
+structure:
+
+ <L content-implicit="yes" to="that_url" type="url">
+ that_url
+ </L>
+
+The C<type="url"> attribute is always specified for this type of
+L code.
+
+For example, this Pod source:
+
+ L<http://www.perl.com/CPAN/authors/>
+
+produces this event structure:
+
+ <L content-implicit="yes" to="http://www.perl.com/CPAN/authors/" type="url">
+ http://www.perl.com/CPAN/authors/
+ </L>
+
+When a LE<lt>I<manpage(section)>E<gt> code is parsed (and these are
+fairly rare and not terribly useful), it produces this event structure:
+
+ <L content-implicit="yes" to="manpage(section)" type="man">
+ manpage(section)
+ </L>
+
+The C<type="man"> attribute is always specified for this type of
+L code.
+
+For example, this Pod source:
+
+ L<crontab(5)>
+
+produces this event structure:
+
+ <L content-implicit="yes" to="crontab(5)" type="man">
+ crontab(5)
+ </L>
+
+In the rare cases where a man page link has a specified, that text appears
+in a I<section> attribute. For example, this Pod source:
+
+ L<crontab(5)/"ENVIRONMENT">
+
+will produce this event structure:
+
+ <L content-implicit="yes" section="ENVIRONMENT" to="crontab(5)" type="man">
+ "ENVIRONMENT" in crontab(5)
+ </L>
+
+In the rare case where the Pod document has code like
+LE<lt>I<sometext>|I<manpage(section)>E<gt>, then the I<sometext> will appear
+as the content of the element, the I<manpage(section)> text will appear
+only as the value of the I<to> attribute, and there will be no
+C<content-implicit="yes"> attribute (whose presence means that the Pod parser
+had to infer what text should appear as the link text -- as opposed to
+cases where that attribute is absent, which means that the Pod parser did
+I<not> have to infer the link text, because that L code explicitly specified
+some link text.)
+
+For example, this Pod source:
+
+ L<hell itself!|crontab(5)>
+
+will produce this event structure:
+
+ <L to="crontab(5)" type="man">
+ hell itself!
+ </L>
+
+The last type of L structure is for links to/within Pod documents. It is
+the most complex because it can have a I<to> attribute, I<or> a
+I<section> attribute, or both. The C<type="pod"> attribute is always
+specified for this type of L code.
+
+In the most common case, the simple case of a LE<lt>podpageE<gt> code
+produces this event structure:
+
+ <L content-implicit="yes" to="Net::Ping" type="pod">
+ podpage
+ </L>
+
+For example, this Pod source:
+
+ L<Net::Ping>
+
+produces this event structure:
+
+ <L content-implicit="yes" to="Net::Ping" type="pod">
+ Net::Ping
+ </L>
+
+In cases where there is link-text explicitly specified, it
+is to be found in the content of the element (and not the
+attributes), just as with the LE<lt>I<sometext>|I<manpage(section)>E<gt>
+case discussed above. For example, this Pod source:
+
+ L<Perl Error Messages|perldiag>
+
+produces this event structure:
+
+ <L to="perldiag" type="pod">
+ Perl Error Messages
+ </L>
+
+In cases of links to a section in the current Pod document,
+there is a I<section> attribute instead of a I<to> attribute.
+For example, this Pod source:
+
+ L</"Member Data">
+
+produces this event structure:
+
+ <L content-implicit="yes" section="Member Data" type="pod">
+ "Member Data"
+ </L>
+
+As another example, this Pod source:
+
+ L<the various attributes|/"Member Data">
+
+produces this event structure:
+
+ <L section="Member Data" type="pod">
+ the various attributes
+ </L>
+
+In cases of links to a section in a different Pod document,
+there are both a I<section> attribute and a L<to> attribute.
+For example, this Pod source:
+
+ L<perlsyn/"Basic BLOCKs and Switch Statements">
+
+produces this event structure:
+
+ <L content-implicit="yes" section="Basic BLOCKs and Switch Statements" to="perlsyn" type="pod">
+ "Basic BLOCKs and Switch Statements" in perlsyn
+ </L>
+
+As another example, this Pod source:
+
+ L<SWITCH statements|perlsyn/"Basic BLOCKs and Switch Statements">
+
+produces this event structure:
+
+ <L section="Basic BLOCKs and Switch Statements" to="perlsyn" type="pod">
+ SWITCH statements
+ </L>
+
+Incidentally, note that we do not distinguish between these syntaxes:
+
+ L</"Member Data">
+ L<"Member Data">
+ L</Member Data>
+ L<Member Data> [deprecated syntax]
+
+That is, they all produce the same event structure, namely:
+
+ <L content-implicit="yes" section="Member Data" type="pod">
+ &#34;Member Data&#34;
+ </L>
+
+=item events with an element_name of E or Z
+
+While there are Pod codes EE<lt>...E<gt> and ZE<lt>E<gt>, these
+I<do not> produce any E or Z events -- that is, there are no such
+events as E or Z.
+
+=item events with an element_name of Verbatim
+
+When a Pod verbatim paragraph (AKA "codeblock") is parsed, it
+produces this event structure:
+
+ <Verbatim start_line="543" xml:space="preserve">
+ ...text...
+ </Verbatim>
+
+The value of the I<start_line> attribute will be the line number of the
+first line of this verbatim block. The I<xml:space> attribute is always
+present, and always has the value "preserve".
+
+The text content will have tabs already expanded.
+
+
+=item events with an element_name of head1 .. head4
+
+When a "=head1 ..." directive is parsed, it produces this event
+structure:
+
+ <head1>
+ ...stuff...
+ </head1>
+
+For example, a directive consisting of this:
+
+ =head1 Options to C<new> et al.
+
+will produce this event structure:
+
+ <head1 start_line="543">
+ Options to
+ <C>
+ new
+ </C>
+ et al.
+ </head1>
+
+"=head2" thru "=head4" directives are the same, except for the element
+names in the event structure.
+
+=item events with an element_name of over-bullet
+
+When an "=over ... Z<>=back" block is parsed where the items are
+a bulletted list, it will produce this event structure:
+
+ <over-bullet indent="4" start_line="543">
+ <item-bullet start_line="545">
+ ...Stuff...
+ </item-bullet>
+ ...more item-bullets...
+ </over-bullet>
+
+The value of the I<indent> attribute is whatever value is after the
+"=over" directive, as in "=over 8". If no such value is specified
+in the directive, then the I<indent> attribute has the value "4".
+
+For example, this Pod source:
+
+ =over
+
+ =item *
+
+ Stuff
+
+ =item *
+
+ Bar I<baz>!
+
+ =back
+
+produces this event structure:
+
+ <over-bullet indent="4" start_line="10">
+ <item-bullet start_line="12">
+ Stuff
+ </item-bullet>
+ <item-bullet start_line="14">
+ Bar <I>baz</I>!
+ </item-bullet>
+ </over-bullet>
+
+=item events with an element_name of over-number
+
+When an "=over ... Z<>=back" block is parsed where the items are
+a numbered list, it will produce this event structure:
+
+ <over-number indent="4" start_line="543">
+ <item-number number="1" start_line="545">
+ ...Stuff...
+ </item-number>
+ ...more item-number...
+ </over-bullet>
+
+This is like the "over-bullet" event structure; but note that the contents
+are "item-number" instead of "item-bullet", and note that they will have
+a "number" attribute, which some formatters/processors may ignore
+(since, for example, there's no need for it in HTML when producing
+an "<UL><LI>...</LI>...</UL>" structure), but which any processor may use.
+
+Note that the values for the I<number> attributes of "item-number"
+elements in a given "over-number" area I<will> start at 1 and go up by
+one each time. If the Pod source doesn't follow that order (even though
+it really should should!), whatever numbers it has will be ignored (with
+the correct values being put in the I<number> attributes), and an error
+message might be issued to the user.
+
+=item events with an element_name of over-text
+
+These events are are somewhat unlike the other over-*
+structures, as far as what their contents are. When
+an "=over ... Z<>=back" block is parsed where the items are
+a list of text "subheadings", it will produce this event structure:
+
+ <over-text indent="4" start_line="543">
+ <item-text>
+ ...stuff...
+ </item-text>
+ ...stuff (generally Para or Verbatim elements)...
+ <item-text>
+ ...more item-text and/or stuff...
+ </over-text>
+
+The I<indent> attribute is as with the other over-* events.
+
+For example, this Pod source:
+
+ =over
+
+ =item Foo
+
+ Stuff
+
+ =item Bar I<baz>!
+
+ Quux
+
+ =back
+
+produces this event structure:
+
+ <over-text indent="4" start_line="20">
+ <item-text start_line="22">
+ Foo
+ </item-text>
+ <Para start_line="24">
+ Stuff
+ </Para>
+ <item-text start_line="26">
+ Bar
+ <I>
+ baz
+ </I>
+ !
+ </item-text>
+ <Para start_line="28">
+ Quux
+ </Para>
+ </over-text>
+
+
+
+=item events with an element_name of over-block
+
+These events are are somewhat unlike the other over-*
+structures, as far as what their contents are. When
+an "=over ... Z<>=back" block is parsed where there are no items,
+it will produce this event structure:
+
+ <over-block indent="4" start_line="543">
+ ...stuff (generally Para or Verbatim elements)...
+ </over-block>
+
+The I<indent> attribute is as with the other over-* events.
+
+For example, this Pod source:
+
+ =over
+
+ For cutting off our trade with all parts of the world
+
+ For transporting us beyond seas to be tried for pretended offenses
+
+ He is at this time transporting large armies of foreign mercenaries to
+ complete the works of death, desolation and tyranny, already begun with
+ circumstances of cruelty and perfidy scarcely paralleled in the most
+ barbarous ages, and totally unworthy the head of a civilized nation.
+
+ =cut
+
+will produce this event structure:
+
+ <over-block indent="4" start_line="2">
+ <Para start_line="4">
+ For cutting off our trade with all parts of the world
+ </Para>
+ <Para start_line="6">
+ For transporting us beyond seas to be tried for pretended offenses
+ </Para>
+ <Para start_line="8">
+ He is at this time transporting large armies of [...more text...]
+ </Para>
+ </over-block>
+
+=item events with an element_name of item-bullet
+
+See L</"events with an element_name of over-bullet">, above.
+
+=item events with an element_name of item-number
+
+See L</"events with an element_name of over-number">, above.
+
+=item events with an element_name of item-text
+
+See L</"events with an element_name of over-text">, above.
+
+=item events with an element_name of for
+
+TODO...
+
+=item events with an element_name of Data
+
+TODO...
+
+=back
+
+
+
+=head1 More Pod::Simple Methods
+
+Pod::Simple provides a lot of methods that aren't generally interesting
+to the end user of an existing Pod formatter, but some of which you
+might find useful in writing a Pod formatter. They are listed below. The
+first several methods (the accept_* methods) are for declaring the
+capabilites of your parser, notably what C<=for I<targetname>> sections
+it's interested in, what extra NE<lt>...E<gt> codes it accepts beyond
+the ones described in the I<perlpod>.
+
+=over
+
+=item C<< $parser->accept_targets( I<SOMEVALUE> ) >>
+
+As the parser sees sections like:
+
+ =for html <img src="fig1.jpg">
+
+or
+
+ =begin html
+
+ <img src="fig1.jpg">
+
+ =end html
+
+...the parser will ignore these sections unless your subclass has
+specified that it wants to see sections targetted to "html" (or whatever
+the formatter name is).
+
+If you want to process all sections, even if they're not targetted for you,
+call this before you start parsing:
+
+ $parser->accept_targets('*');
+
+=item C<< $parser->accept_targets_as_text( I<SOMEVALUE> ) >>
+
+This is like accept_targets, except that it specifies also that the
+content of sections for this target should be treated as Pod text even
+if the target name in "=for I<targetname>" doesn't start with a ":".
+
+At time of writing, I don't think you'll need to use this.
+
+
+=item C<< $parser->accept_codes( I<Codename>, I<Codename>... ) >>
+
+This tells the parser that you accept additional formatting codes,
+beyond just the standard ones (I B C L F S X, plus the two weird ones
+you don't actually see in the parse tree, Z and E). For example, to also
+accept codes "N", "R", and "W":
+
+ $parser->accept_codes( qw( N R W ) );
+
+B<TODO: document how this interacts with =extend, and long element names>
+
+
+=item C<< $parser->accept_directive_as_data( I<directive_name> ) >>
+
+=item C<< $parser->accept_directive_as_verbatim( I<directive_name> ) >>
+
+=item C<< $parser->accept_directive_as_processed( I<directive_name> ) >>
+
+In the unlikely situation that you need to tell the parser that you will
+accept additional directives ("=foo" things), you need to first set the
+parset to treat its content as data (i.e., not really processed at
+all), or as verbatim (mostly just expanding tabs), or as processed text
+(parsing formatting codes like BE<lt>...E<gt>).
+
+For example, to accept a new directive "=method", you'd presumably
+use:
+
+ $parser->accept_directive_as_processed("method");
+
+so that you could have Pod lines like:
+
+ =method I<$whatever> thing B<um>
+
+Making up your own directives breaks compatibility with other Pod
+formatters, in a way that using "=for I<target> ..." lines doesn't;
+however, you may find this useful if you're making a Pod superset
+format where you don't need to worry about compatibility.
+
+
+=item C<< $parser->nbsp_for_S( I<BOOLEAN> ); >>
+
+Setting this attribute to a true value (and by default it is false) will
+turn "SE<lt>...E<gt>" sequences into sequences of words separated by
+C<\xA0> (non-breaking space) characters. For example, it will take this:
+
+ I like S<Dutch apple pie>, don't you?
+
+and treat it as if it were:
+
+ I like DutchE<nbsp>appleE<nbsp>pie, don't you?
+
+This is handy for output formats that don't have anything quite like an
+"SE<lt>...E<gt>" code, but which do have a code for non-breaking space.
+
+There is currently no method for going the other way; but I can
+probably provide one upon request.
+
+
+=item C<< $parser->version_report() >>
+
+This returns a string reporting the $VERSION value from your module (and
+its classname) as well as the $VERSION value of Pod::Simple. Note that
+L<perlpodspec> requires output formats (wherever possible) to note
+this detail in a comment in the output format. For example, for
+some kind of SGML output format:
+
+ print OUT "<!-- \n", $parser->version_report, "\n -->";
+
+
+=item C<< $parser->pod_para_count() >>
+
+This returns the count of Pod paragraphs seen so far.
+
+
+=item C<< $parser->line_count() >>
+
+This is the current line number being parsed. But you might find the
+"line_number" event attribute more accurate, when it is present.
+
+
+=item C<< $parser->nix_X_codes( I<SOMEVALUE> ) >>
+
+This attribute, when set to a true value (and it is false by default)
+ignores any "XE<lt>...E<gt>" sequences in the document being parsed.
+Many formats don't actually use the content of these codes, so have
+no reason to process them.
+
+
+=item C<< $parser->merge_text( I<SOMEVALUE> ) >>
+
+This attribute, when set to a true value (and it is false by default)
+makes sure that only one event (or token, or node) will be created
+for any single contiguous sequence of text. For example, consider
+this somewhat contrived example:
+
+ I just LOVE Z<>hotE<32>apple pie!
+
+When that is parsed and events are about to be called on it, it may
+actually seem to be four different text events, one right after another:
+one event for "I just LOVE ", one for "hot", one for " ", and one for
+"apple pie!". But if you have merge_text on, then you're guaranteed
+that it will be fired as one text event: "I just LOVE hot apple pie!".
+
+
+=item C<< $parser->code_handler( I<CODE_REF> ) >>
+
+This specifies code that should be called when a code line is seen
+(i.e., a line outside of the Pod). Normally this is undef, meaning
+that no code should be called. If you provide a routine, it should
+start out like this:
+
+ sub get_code_line { # or whatever you'll call it
+ my($line, $line_number, $parser) = @_;
+ ...
+ }
+
+Note, however, that sometimes the Pod events aren't processed in exactly
+the same order as the code lines are -- i.e., if you have a file with
+Pod, then code, then more Pod, sometimes the code will be processed (via
+whatever you have code_handler call) before the all of the preceding Pod
+has been processed.
+
+
+=item C<< $parser->cut_handler( I<CODE_REF> ) >>
+
+This is just like the code_handler attribute, except that it's for
+"=cut" lines, not code lines. The same caveats apply. "=cut" lines are
+unlikely to be interesting, but this is included for completeness.
+
+
+=item C<< $parser->whine( I<linenumber>, I<complaint string> ) >>
+
+This notes a problem in the Pod, which will be reported to in the "Pod
+Errors" section of the document and/or send to STDERR, depending on the
+values of the attributes C<no_whining>, C<no_errata_section>, and
+C<complain_stderr>.
+
+=item C<< $parser->scream( I<linenumber>, I<complaint string> ) >>
+
+This notes an error like C<whine> does, except that it is not
+suppressable with C<no_whining>. This should be used only for very
+serious errors.
+
+
+=item C<< $parser->source_dead(1) >>
+
+This aborts parsing of the current document, by switching on the flag
+that indicates that EOF has been seen. In particularly drastic cases,
+you might want to do this. It's rather nicer than just calling
+C<die>!
+
+=item C<< $parser->hide_line_numbers( I<SOMEVALUE> ) >>
+
+Some subclasses that indescriminately dump event attributes (well,
+except for ones beginning with "~") can use this object attribute for
+refraining to dump the "start_line" attribute.
+
+=item C<< $parser->no_whining( I<SOMEVALUE> ) >>
+
+This attribute, if set to true, will suppress reports of non-fatal
+error messages. The default value is false, meaning that complaints
+I<are> reported. How they get reported depends on the values of
+the attributes C<no_errata_section> and C<complain_stderr>.
+
+=item C<< $parser->no_errata_section( I<SOMEVALUE> ) >>
+
+This attribute, if set to true, will suppress generation of an errata
+section. The default value is false -- i.e., an errata section will be
+generated.
+
+=item C<< $parser->complain_stderr( I<SOMEVALUE> ) >>
+
+This attribute, if set to true will send complaints to STDERR. The
+default value is false -- i.e., complaints do not go to STDERR.
+
+=item C<< $parser->bare_output( I<SOMEVALUE> ) >>
+
+Some formatter subclasses use this as a flag for whether output should
+have prologue and epilogue code omitted. For example, setting this to
+true for an HTML formatter class should omit the
+"<html><head><title>...</title><body>..." prologue and the
+"</body></html>" epilogue.
+
+If you want to set this to true, you should probably also set
+C<no_whining> or at least C<no_errata_section> to true.
+
+=item C<< $parser->preserve_whitespace( I<SOMEVALUE> ) >>
+
+If you set this attribute to a true value, the parser will try to
+preserve whitespace in the output. This means that such formatting
+conventions as two spaces after periods will be preserved by the parser.
+This is primarily useful for output formats that treat whitespace as
+significant (such as text or *roff, but not HTML).
+
+=back
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple> -- event-based Pod-parsing framework
+
+L<Pod::Simple::Methody> -- like Pod::Simple, but each sort of event
+calls its own method (like C<start_head3>)
+
+L<Pod::Simple::PullParser> -- a Pod-parsing framework like Pod::Simple,
+but with a token-stream interface
+
+L<Pod::Simple::SimpleTree> -- a Pod-parsing framework like Pod::Simple,
+but with a tree interface
+
+L<Pod::Simple::Checker> -- a simple Pod::Simple subclass that reads
+documents, and then makes a plaintext report of any errors found in the
+document
+
+L<Pod::Simple::DumpAsXML> -- for dumping Pod documents as tidily
+indented XML, showing each event on its own line
+
+L<Pod::Simple::XMLOutStream> -- dumps a Pod document as XML (without
+introducing extra whitespace as Pod::Simple::DumpAsXML does).
+
+L<Pod::Simple::DumpAsText> -- for dumping Pod documents as tidily
+indented text, showing each event on its own line
+
+L<Pod::Simple::LinkSection> -- class for objects representing the values
+of the TODO and TODO attributes of LE<lt>...E<gt> elements
+
+L<Pod::Escapes> -- the module the Pod::Simple uses for evaluating
+EE<lt>...E<gt> content
+
+L<Pod::Simple::Text> -- a simple plaintext formatter for Pod
+
+L<Pod::Simple::TextContent> -- like Pod::Simple::Text, but
+makes no effort for indent or wrap the text being formatted
+
+L<perlpod|perlpod>
+
+L<perlpodspec|perlpodspec>
+
+L<perldoc>
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+
+=for notes
+Hm, my old podchecker version (1.2) says:
+ *** WARNING: node 'http://search.cpan.org/' contains non-escaped | or / at line 38 in file Subclassing.pod
+ *** WARNING: node 'http://lists.perl.org/showlist.cgi?name=pod-people' contains non-escaped | or / at line 41 in file Subclassing.pod
+Yes, L<...> is hard.
+
+
+=cut
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Text.pm b/ext/Pod-Simple/lib/Pod/Simple/Text.pm
new file mode 100644
index 0000000000..df82c0784c
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Text.pm
@@ -0,0 +1,152 @@
+
+require 5;
+package Pod::Simple::Text;
+use strict;
+use Carp ();
+use Pod::Simple::Methody ();
+use Pod::Simple ();
+use vars qw( @ISA $VERSION $FREAKYMODE);
+$VERSION = '2.02';
+@ISA = ('Pod::Simple::Methody');
+BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
+ ? \&Pod::Simple::DEBUG
+ : sub() {0}
+ }
+
+use Text::Wrap 98.112902 ();
+$Text::Wrap::wrap = 'overflow';
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->accept_target_as_text(qw( text plaintext plain ));
+ $new->nix_X_codes(1);
+ $new->nbsp_for_S(1);
+ $new->{'Thispara'} = '';
+ $new->{'Indent'} = 0;
+ $new->{'Indentstring'} = ' ';
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub handle_text { $_[0]{'Thispara'} .= $_[1] }
+
+sub start_Para { $_[0]{'Thispara'} = '' }
+sub start_head1 { $_[0]{'Thispara'} = '' }
+sub start_head2 { $_[0]{'Thispara'} = '' }
+sub start_head3 { $_[0]{'Thispara'} = '' }
+sub start_head4 { $_[0]{'Thispara'} = '' }
+
+sub start_Verbatim { $_[0]{'Thispara'} = '' }
+sub start_item_bullet { $_[0]{'Thispara'} = $FREAKYMODE ? '' : '* ' }
+sub start_item_number { $_[0]{'Thispara'} = $FREAKYMODE ? '' : "$_[1]{'number'}. " }
+sub start_item_text { $_[0]{'Thispara'} = '' }
+
+sub start_over_bullet { ++$_[0]{'Indent'} }
+sub start_over_number { ++$_[0]{'Indent'} }
+sub start_over_text { ++$_[0]{'Indent'} }
+sub start_over_block { ++$_[0]{'Indent'} }
+
+sub end_over_bullet { --$_[0]{'Indent'} }
+sub end_over_number { --$_[0]{'Indent'} }
+sub end_over_text { --$_[0]{'Indent'} }
+sub end_over_block { --$_[0]{'Indent'} }
+
+
+# . . . . . Now the actual formatters:
+
+sub end_head1 { $_[0]->emit_par(-4) }
+sub end_head2 { $_[0]->emit_par(-3) }
+sub end_head3 { $_[0]->emit_par(-2) }
+sub end_head4 { $_[0]->emit_par(-1) }
+sub end_Para { $_[0]->emit_par( 0) }
+sub end_item_bullet { $_[0]->emit_par( 0) }
+sub end_item_number { $_[0]->emit_par( 0) }
+sub end_item_text { $_[0]->emit_par(-2) }
+
+sub emit_par {
+ my($self, $tweak_indent) = splice(@_,0,2);
+ my $indent = ' ' x ( 2 * $self->{'Indent'} + 4 + ($tweak_indent||0) );
+ # Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0
+
+ $self->{'Thispara'} =~ tr{\xAD}{}d if Pod::Simple::ASCII;
+ my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n");
+ $out =~ tr{\xA0}{ } if Pod::Simple::ASCII;
+ print {$self->{'output_fh'}} $out, "\n";
+ $self->{'Thispara'} = '';
+
+ return;
+}
+
+# . . . . . . . . . . And then off by its lonesome:
+
+sub end_Verbatim {
+ my $self = shift;
+ if(Pod::Simple::ASCII) {
+ $self->{'Thispara'} =~ tr{\xA0}{ };
+ $self->{'Thispara'} =~ tr{\xAD}{}d;
+ }
+
+ my $i = ' ' x ( 2 * $self->{'Indent'} + 4);
+ #my $i = ' ' x (4 + $self->{'Indent'});
+
+ $self->{'Thispara'} =~ s/^/$i/mg;
+
+ print { $self->{'output_fh'} } '',
+ $self->{'Thispara'},
+ "\n\n"
+ ;
+ $self->{'Thispara'} = '';
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::Text -- format Pod as plaintext
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::Text -e \
+ "exit Pod::Simple::Text->filter(shift)->any_errata_seen" \
+ thingy.pod
+
+=head1 DESCRIPTION
+
+This class is a formatter that takes Pod and renders it as
+wrapped plaintext.
+
+Its wrapping is done by L<Text::Wrap>, so you can change
+C<$Text::Wrap::columns> as you like.
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::TextContent>, L<Pod::Text>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/TextContent.pm b/ext/Pod-Simple/lib/Pod/Simple/TextContent.pm
new file mode 100644
index 0000000000..3675b005ef
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/TextContent.pm
@@ -0,0 +1,87 @@
+
+
+require 5;
+package Pod::Simple::TextContent;
+use strict;
+use Carp ();
+use Pod::Simple ();
+use vars qw( @ISA $VERSION );
+$VERSION = '2.02';
+@ISA = ('Pod::Simple');
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->nix_X_codes(1);
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub _handle_element_start {
+ print {$_[0]{'output_fh'}} "\n" unless $_[1] =~ m/^[A-Z]$/s;
+ return;
+}
+
+sub _handle_text {
+ if( chr(65) eq 'A' ) { # in ASCIIworld
+ $_[1] =~ tr/\xAD//d;
+ $_[1] =~ tr/\xA0/ /;
+ }
+ print {$_[0]{'output_fh'}} $_[1];
+ return;
+}
+
+sub _handle_element_end {
+ print {$_[0]{'output_fh'}} "\n" unless $_[1] =~ m/^[A-Z]$/s;
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::TextContent -- get the text content of Pod
+
+=head1 SYNOPSIS
+
+ TODO
+
+ perl -MPod::Simple::TextContent -e \
+ "exit Pod::Simple::TextContent->filter(shift)->any_errata_seen" \
+ thingy.pod
+
+=head1 DESCRIPTION
+
+This class is that parses Pod and dumps just the text content. It is
+mainly meant for use by the Pod::Simple test suite, but you may find
+some other use for it.
+
+This is a subclass of L<Pod::Simple> and inherits all its methods.
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell>
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm b/ext/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm
new file mode 100644
index 0000000000..b031fe5869
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm
@@ -0,0 +1,103 @@
+
+use strict;
+package Pod::Simple::TiedOutFH;
+use Symbol ('gensym');
+use Carp ();
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub handle_on { # some horrible frightening things are encapsulated in here
+ my $class = shift;
+ $class = ref($class) || $class;
+
+ Carp::croak "Usage: ${class}->handle_on(\$somescalar)" unless @_;
+
+ my $x = (defined($_[0]) and ref($_[0]))
+ ? $_[0]
+ : ( \( $_[0] ) )[0]
+ ;
+ $$x = '' unless defined $$x;
+
+ #Pod::Simple::DEBUG and print "New $class handle on $x = \"$$x\"\n";
+
+ my $new = gensym();
+ tie *$new, $class, $x;
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub TIEHANDLE { # Ties to just a scalar ref
+ my($class, $scalar_ref) = @_;
+ $$scalar_ref = '' unless defined $$scalar_ref;
+ return bless \$scalar_ref, ref($class) || $class;
+}
+
+sub PRINT {
+ my $it = shift;
+ foreach my $x (@_) { $$$it .= $x }
+
+ #Pod::Simple::DEBUG > 10 and print " appended to $$it = \"$$$it\"\n";
+
+ return 1;
+}
+
+sub FETCH {
+ return ${$_[0]};
+}
+
+sub PRINTF {
+ my $it = shift;
+ my $format = shift;
+ $$$it .= sprintf $format, @_;
+ return 1;
+}
+
+sub FILENO { ${ $_[0] } + 100 } # just to produce SOME number
+
+sub CLOSE { 1 }
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+1;
+__END__
+
+Chole
+
+ * 1 large red onion
+ * 2 tomatillos
+ * 4 or 5 roma tomatoes (optionally with the pulp discarded)
+ * 1 tablespoons chopped ginger root (or more, to taste)
+ * 2 tablespoons canola oil (or vegetable oil)
+
+ * 1 tablespoon garam masala
+ * 1/2 teaspoon red chili powder, or to taste
+ * Salt, to taste (probably quite a bit)
+ * 2 (15-ounce) cans chick peas or garbanzo beans, drained and rinsed
+ * juice of one smallish lime
+ * a dash of balsamic vinegar (to taste)
+ * cooked rice, preferably long-grain white rice (whether plain,
+ basmati rice, jasmine rice, or even a mild pilaf)
+
+In a blender or food processor, puree the onions, tomatoes, tomatillos,
+and ginger root. You can even do it with a Braun hand "mixer", if you
+chop things finer to start with, and work at it.
+
+In a saucepan set over moderate heat, warm the oil until hot.
+
+Add the puree and the balsamic vinegar, and cook, stirring occasionally,
+for 20 to 40 minutes. (Cooking it longer will make it sweeter.)
+
+Add the Garam Masala, chili powder, and cook, stirring occasionally, for
+5 minutes.
+
+Add the salt and chick peas and cook, stirring, until heated through.
+
+Stir in the lime juice, and optionally one or two teaspoons of tahini.
+You can let it simmer longer, depending on how much softer you want the
+garbanzos to get.
+
+Serve over rice, like a curry.
+
+Yields 5 to 7 servings.
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/Transcode.pm b/ext/Pod-Simple/lib/Pod/Simple/Transcode.pm
new file mode 100644
index 0000000000..434f963388
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/Transcode.pm
@@ -0,0 +1,33 @@
+
+require 5;
+package Pod::Simple::Transcode;
+
+BEGIN {
+ if(defined &DEBUG) {;} # Okay
+ elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG; }
+ else { *DEBUG = sub () {0}; }
+}
+
+foreach my $class (
+ 'Pod::Simple::TranscodeSmart',
+ 'Pod::Simple::TranscodeDumb',
+ '',
+) {
+ $class or die "Couldn't load any encoding classes";
+ DEBUG and print "About to try loading $class...\n";
+ eval "require $class;";
+ if($@) {
+ DEBUG and print "Couldn't load $class: $@\n";
+ } else {
+ DEBUG and print "OK, loaded $class.\n";
+ @ISA = ($class);
+ last;
+ }
+}
+
+sub _blorp { return; } # just to avoid any "empty class" warning
+
+1;
+__END__
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm b/ext/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm
new file mode 100644
index 0000000000..d5eb7e5fb8
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm
@@ -0,0 +1,63 @@
+
+require 5;
+## This module is to be use()'d only by Pod::Simple::Transcode
+
+package Pod::Simple::TranscodeDumb;
+use strict;
+use vars qw($VERSION %Supported);
+$VERSION = '2.02';
+# This module basically pretends it knows how to transcode, except
+# only for null-transcodings! We use this when Encode isn't
+# available.
+
+%Supported = (
+ 'ascii' => 1,
+ 'ascii-ctrl' => 1,
+ 'iso-8859-1' => 1,
+ 'null' => 1,
+ 'latin1' => 1,
+ 'latin-1' => 1,
+ %Supported,
+);
+
+sub is_dumb {1}
+sub is_smart {0}
+
+sub all_encodings {
+ return sort keys %Supported;
+}
+
+sub encoding_is_available {
+ return exists $Supported{lc $_[1]};
+}
+
+sub encmodver {
+ return __PACKAGE__ . " v" .($VERSION || '?');
+}
+
+sub make_transcoder {
+ my($e) = $_[1];
+ die "WHAT ENCODING!?!?" unless $e;
+ my $x;
+ return sub {;
+ #foreach $x (@_) {
+ # if(Pod::Simple::ASCII and !Pod::Simple::UNICODE and $] > 5.005) {
+ # # We're in horrible gimp territory, so we need to knock out
+ # # all the highbit things
+ # $x =
+ # pack 'C*',
+ # map {; ($_ < 128) ? $_ : 0x7e }
+ # unpack "C*",
+ # $x
+ # ;
+ # }
+ #}
+ #
+ #return;
+ };
+}
+
+
+1;
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm b/ext/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm
new file mode 100644
index 0000000000..3fc26a4a26
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm
@@ -0,0 +1,42 @@
+
+require 5;
+use 5.008;
+## Anything before 5.8.0 is GIMPY!
+## This module is to be use()'d only by Pod::Simple::Transcode
+
+package Pod::Simple::TranscodeSmart;
+use strict;
+use Pod::Simple;
+require Encode;
+
+sub is_dumb {0}
+sub is_smart {1}
+
+sub all_encodings {
+ return Encode::->encodings(':all');
+}
+
+sub encoding_is_available {
+ return Encode::resolve_alias($_[1]);
+}
+
+sub encmodver {
+ return "Encode.pm v" .($Encode::VERSION || '?');
+}
+
+sub make_transcoder {
+ my($e) = $_[1];
+ die "WHAT ENCODING!?!?" unless $e;
+ my $x;
+ return sub {
+ foreach $x (@_) {
+ $x = Encode::decode($e, $x);
+ }
+ return;
+ };
+}
+
+
+1;
+
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/XHTML.pm b/ext/Pod-Simple/lib/Pod/Simple/XHTML.pm
new file mode 100644
index 0000000000..d130faaa96
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/XHTML.pm
@@ -0,0 +1,400 @@
+=pod
+
+=head1 NAME
+
+Pod::Simple::XHTML -- format Pod as validating XHTML
+
+=head1 SYNOPSIS
+
+ use Pod::Simple::XHTML;
+
+ my $parser = Pod::Simple::XHTML->new();
+
+ ...
+
+ $parser->parse_file('path/to/file.pod');
+
+=head1 DESCRIPTION
+
+This class is a formatter that takes Pod and renders it as XHTML
+validating HTML.
+
+This is a subclass of L<Pod::Simple::Methody> and inherits all its
+methods. The implementation is entirely different than
+L<Pod::Simple::HTML>, but it largely preserves the same interface.
+
+=cut
+
+package Pod::Simple::XHTML;
+use strict;
+use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
+$VERSION = '3.04';
+use Carp ();
+use Pod::Simple::Methody ();
+@ISA = ('Pod::Simple::Methody');
+
+BEGIN {
+ $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
+}
+
+my %entities = (
+ q{>} => 'gt',
+ q{<} => 'lt',
+ q{'} => '#39',
+ q{"} => 'quot',
+ q{&} => 'amp',
+);
+
+sub encode_entities {
+ return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES;
+ my $str = $_[0];
+ my $ents = join '', keys %entities;
+ $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge;
+ return $str;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=head1 METHODS
+
+Pod::Simple::XHTML offers a number of methods that modify the format of
+the HTML output. Call these after creating the parser object, but before
+the call to C<parse_file>:
+
+ my $parser = Pod::PseudoPod::HTML->new();
+ $parser->set_optional_param("value");
+ $parser->parse_file($file);
+
+=head2 perldoc_url_prefix
+
+In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
+to put before the "Foo%3a%3aBar". The default value is
+"http://search.cpan.org/perldoc?".
+
+=head2 perldoc_url_postfix
+
+What to put after "Foo%3a%3aBar" in the URL. This option is not set by
+default.
+
+=head2 title_prefix, title_postfix
+
+What to put before and after the title in the head. The values should
+already be &-escaped.
+
+=head2 html_css
+
+ $parser->html_css('path/to/style.css');
+
+The URL or relative path of a CSS file to include. This option is not
+set by default.
+
+=head2 html_javascript
+
+The URL or relative path of a JavaScript file to pull in. This option is
+not set by default.
+
+=head2 html_doctype
+
+A document type tag for the file. This option is not set by default.
+
+=head2 html_header_tags
+
+Additional arbitrary HTML tags for the header of the document. The
+default value is just a content type header tag:
+
+ <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+
+Add additional meta tags here, or blocks of inline CSS or JavaScript
+(wrapped in the appropriate tags).
+
+=head2 default_title
+
+Set a default title for the page if no title can be determined from the
+content. The value of this string should already be &-escaped.
+
+=head2 force_title
+
+Force a title for the page (don't try to determine it from the content).
+The value of this string should already be &-escaped.
+
+=head2 html_header, html_footer
+
+Set the HTML output at the beginning and end of each file. The default
+header includes a title, a doctype tag (if C<html_doctype> is set), a
+content tag (customized by C<html_header_tags>), a tag for a CSS file
+(if C<html_css> is set), and a tag for a Javascript file (if
+C<html_javascript> is set). The default footer simply closes the C<html>
+and C<body> tags.
+
+The options listed above customize parts of the default header, but
+setting C<html_header> or C<html_footer> completely overrides the
+built-in header or footer. These may be useful if you want to use
+template tags instead of literal HTML headers and footers or are
+integrating converted POD pages in a larger website.
+
+If you want no headers or footers output in the HTML, set these options
+to the empty string.
+
+=head2 index
+
+TODO -- Not implemented.
+
+Whether to add a table-of-contents at the top of each page (called an
+index for the sake of tradition).
+
+
+=cut
+
+__PACKAGE__->_accessorize(
+ 'perldoc_url_prefix',
+ 'perldoc_url_postfix',
+ 'title_prefix', 'title_postfix',
+ 'html_css',
+ 'html_javascript',
+ 'html_doctype',
+ 'html_header_tags',
+ 'title', # Used internally for the title extracted from the content
+ 'default_title',
+ 'force_title',
+ 'html_header',
+ 'html_footer',
+ 'index',
+ 'batch_mode', # whether we're in batch mode
+ 'batch_mode_current_level',
+ # When in batch mode, how deep the current module is: 1 for "LWP",
+ # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
+);
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=head1 SUBCLASSING
+
+If the standard options aren't enough, you may want to subclass
+Pod::Simple::XHMTL. These are the most likely candidates for methods
+you'll want to override when subclassing.
+
+=cut
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->accept_targets( 'html', 'HTML' );
+ $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
+ $new->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">');
+ $new->nix_X_codes(1);
+ $new->codes_in_verbatim(1);
+ $new->{'scratch'} = '';
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=head2 handle_text
+
+This method handles the body of text within any element: it's the body
+of a paragraph, or everything between a "=begin" tag and the
+corresponding "=end" tag, or the text within an L entity, etc. You would
+want to override this if you are adding a custom element type that does
+more than just display formatted text. Perhaps adding a way to generate
+HTML tables from an extended version of POD.
+
+So, let's say you want add a custom element called 'foo'. In your
+subclass's C<new> method, after calling C<SUPER::new> you'd call:
+
+ $new->accept_targets_as_text( 'foo' );
+
+Then override the C<start_for> method in the subclass to check for when
+"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
+you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
+C<handle_text> method to check for the flag, and pass $text to your
+custom subroutine to construct the HTML output for 'foo' elements,
+something like:
+
+ sub handle_text {
+ my ($self, $text) = @_;
+ if ($self->{'in_foo'}) {
+ $self->{'scratch'} .= build_foo_html($text);
+ } else {
+ $self->{'scratch'} .= $text;
+ }
+ }
+
+=cut
+
+sub handle_text {
+ # escape special characters in HTML (<, >, &, etc)
+ $_[0]{'scratch'} .= $_[0]{'in_verbatim'} ? encode_entities( $_[1] ) : $_[1]
+}
+
+sub start_Para { $_[0]{'scratch'} = '<p>' }
+sub start_Verbatim { $_[0]{'scratch'} = '<pre><code>'; $_[0]{'in_verbatim'} = 1}
+
+sub start_head1 { $_[0]{'scratch'} = '<h1>' }
+sub start_head2 { $_[0]{'scratch'} = '<h2>' }
+sub start_head3 { $_[0]{'scratch'} = '<h3>' }
+sub start_head4 { $_[0]{'scratch'} = '<h4>' }
+
+sub start_item_bullet { $_[0]{'scratch'} = '<li>' }
+sub start_item_number { $_[0]{'scratch'} = "<li>$_[1]{'number'}. " }
+sub start_item_text { $_[0]{'scratch'} = '<li>' }
+
+sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
+sub start_over_text { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
+sub start_over_block { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
+sub start_over_number { $_[0]{'scratch'} = '<ol>'; $_[0]->emit }
+
+sub end_over_bullet { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
+sub end_over_text { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
+sub end_over_block { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
+sub end_over_number { $_[0]{'scratch'} .= '</ol>'; $_[0]->emit }
+
+# . . . . . Now the actual formatters:
+
+sub end_Para { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
+sub end_Verbatim {
+ $_[0]{'scratch'} .= '</code></pre>';
+ $_[0]{'in_verbatim'} = 0;
+ $_[0]->emit;
+}
+
+sub end_head1 { $_[0]{'scratch'} .= '</h1>'; $_[0]->emit }
+sub end_head2 { $_[0]{'scratch'} .= '</h2>'; $_[0]->emit }
+sub end_head3 { $_[0]{'scratch'} .= '</h3>'; $_[0]->emit }
+sub end_head4 { $_[0]{'scratch'} .= '</h4>'; $_[0]->emit }
+
+sub end_item_bullet { $_[0]{'scratch'} .= '</li>'; $_[0]->emit }
+sub end_item_number { $_[0]{'scratch'} .= '</li>'; $_[0]->emit }
+sub end_item_text { $_[0]->emit }
+
+# This handles =begin and =for blocks of all kinds.
+sub start_for {
+ my ($self, $flags) = @_;
+ $self->{'scratch'} .= '<div';
+ $self->{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'});
+ $self->{'scratch'} .= '>';
+ $self->emit;
+
+}
+sub end_for {
+ my ($self) = @_;
+ $self->{'scratch'} .= '</div>';
+ $self->emit;
+}
+
+sub start_Document {
+ my ($self) = @_;
+ if (defined $self->html_header) {
+ $self->{'scratch'} .= $self->html_header;
+ $self->emit unless $self->html_header eq "";
+ } else {
+ my ($doctype, $title, $metatags);
+ $doctype = $self->html_doctype || '';
+ $title = $self->force_title || $self->title || $self->default_title || '';
+ $metatags = $self->html_header_tags || '';
+ if ($self->html_css) {
+ $metatags .= "\n<link rel='stylesheet' href='" .
+ $self->html_css . "' type='text/css'>";
+ }
+ if ($self->html_javascript) {
+ $metatags .= "\n<script type='text/javascript' src='" .
+ $self->html_javascript . "'></script>";
+ }
+ $self->{'scratch'} .= <<"HTML";
+$doctype
+<html>
+<head>
+<title>$title</title>
+$metatags
+</head>
+<body>
+HTML
+ $self->emit;
+ }
+}
+
+sub end_Document {
+ my ($self) = @_;
+ if (defined $self->html_footer) {
+ $self->{'scratch'} .= $self->html_footer;
+ $self->emit unless $self->html_footer eq "";
+ } else {
+ $self->{'scratch'} .= "</body>\n</html>";
+ $self->emit;
+ }
+}
+
+# Handling code tags
+sub start_B { $_[0]{'scratch'} .= '<b>' }
+sub end_B { $_[0]{'scratch'} .= '</b>' }
+
+sub start_C { $_[0]{'scratch'} .= '<code>' }
+sub end_C { $_[0]{'scratch'} .= '</code>' }
+
+sub start_E { $_[0]{'scratch'} .= '&' }
+sub end_E { $_[0]{'scratch'} .= ';' }
+
+sub start_F { $_[0]{'scratch'} .= '<i>' }
+sub end_F { $_[0]{'scratch'} .= '</i>' }
+
+sub start_I { $_[0]{'scratch'} .= '<i>' }
+sub end_I { $_[0]{'scratch'} .= '</i>' }
+
+sub start_L {
+ my ($self, $flags) = @_;
+ my $url;
+ if ($flags->{'type'} eq 'url') {
+ $url = $flags->{'to'};
+ } elsif ($flags->{'type'} eq 'pod') {
+ $url .= $self->perldoc_url_prefix || '';
+ $url .= $flags->{'to'} || '';
+ $url .= '/' . $flags->{'section'} if ($flags->{'section'});
+ $url .= $self->perldoc_url_postfix || '';
+# require Data::Dumper;
+# print STDERR Data::Dumper->Dump([$flags]);
+ }
+
+ $self->{'scratch'} .= '<a href="'. $url . '">';
+}
+sub end_L { $_[0]{'scratch'} .= '</a>' }
+
+sub start_S { $_[0]{'scratch'} .= '<nobr>' }
+sub end_S { $_[0]{'scratch'} .= '</nobr>' }
+
+sub emit {
+ my($self) = @_;
+ my $out = $self->{'scratch'} . "\n";
+ print {$self->{'output_fh'}} $out, "\n";
+ $self->{'scratch'} = '';
+ return;
+}
+
+# Bypass built-in E<> handling to preserve entity encoding
+sub _treat_Es {}
+
+1;
+
+__END__
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::Methody>
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003-2005 Allison Randal.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself. The full text of the license
+can be found in the LICENSE file included with this module.
+
+This library is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Allison Randal <allison@perl.org>
+
+=cut
+
diff --git a/ext/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm b/ext/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm
new file mode 100644
index 0000000000..1e7ec15d9a
--- /dev/null
+++ b/ext/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm
@@ -0,0 +1,157 @@
+
+require 5;
+package Pod::Simple::XMLOutStream;
+use strict;
+use Carp ();
+use Pod::Simple ();
+use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS);
+$VERSION = '2.02';
+BEGIN {
+ @ISA = ('Pod::Simple');
+ *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG;
+}
+
+$ATTR_PAD = "\n" unless defined $ATTR_PAD;
+ # Don't mess with this unless you know what you're doing.
+
+$SORT_ATTRS = 0 unless defined $SORT_ATTRS;
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ #$new->accept_codes('VerbatimFormatted');
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub _handle_element_start {
+ # ($self, $element_name, $attr_hash_r)
+ my $fh = $_[0]{'output_fh'};
+ my($key, $value);
+ DEBUG and print "++ $_[1]\n";
+ print $fh "<", $_[1];
+ if($SORT_ATTRS) {
+ foreach my $key (sort keys %{$_[2]}) {
+ unless($key =~ m/^~/s) {
+ next if $key eq 'start_line' and $_[0]{'hide_line_numbers'};
+ _xml_escape($value = $_[2]{$key});
+ print $fh $ATTR_PAD, $key, '="', $value, '"';
+ }
+ }
+ } else { # faster
+ while(($key,$value) = each %{$_[2]}) {
+ unless($key =~ m/^~/s) {
+ next if $key eq 'start_line' and $_[0]{'hide_line_numbers'};
+ _xml_escape($value);
+ print $fh $ATTR_PAD, $key, '="', $value, '"';
+ }
+ }
+ }
+ print $fh ">";
+ return;
+}
+
+sub _handle_text {
+ DEBUG and print "== \"$_[1]\"\n";
+ if(length $_[1]) {
+ my $text = $_[1];
+ _xml_escape($text);
+ print {$_[0]{'output_fh'}} $text;
+ }
+ return;
+}
+
+sub _handle_element_end {
+ DEBUG and print "-- $_[1]\n";
+ print {$_[0]{'output_fh'}} "</", $_[1], ">";
+ return;
+}
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub _xml_escape {
+ foreach my $x (@_) {
+ # Escape things very cautiously:
+ $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
+ # Yes, stipulate the list without a range, so that this can work right on
+ # all charsets that this module happens to run under.
+ # Altho, hmm, what about that ord? Presumably that won't work right
+ # under non-ASCII charsets. Something should be done about that.
+ }
+ return;
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+1;
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::XMLOutStream -- turn Pod into XML
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::XMLOutStream -e \
+ "exit Pod::Simple::XMLOutStream->filter(shift)->any_errata_seen" \
+ thingy.pod
+
+=head1 DESCRIPTION
+
+Pod::Simple::XMLOutStream is a subclass of L<Pod::Simple> that parses
+Pod and turns it into XML.
+
+Pod::Simple::XMLOutStream inherits methods from
+L<Pod::Simple>.
+
+
+=head1 SEE ALSO
+
+L<Pod::Simple::DumpAsXML> is rather like this class; see its
+documentation for a discussion of the differences.
+
+L<Pod::Simple>, L<Pod::Simple::DumpAsXML>, L<Pod::SAX>
+
+L<Pod::Simple::Subclassing>
+
+The older (and possibly obsolete) libraries L<Pod::PXML>, L<Pod::XML>
+
+
+=head1 ABOUT EXTENDING POD
+
+TODO: An example or two of =extend, then point to Pod::Simple::Subclassing
+
+
+=head1 ASK ME!
+
+If you actually want to use Pod as a format that you want to render to
+XML (particularly if to an XML instance with more elements than normal
+Pod has), please email me (C<sburke@cpan.org>) and I'll probably have
+some recommendations.
+
+For reasons of concision and energetic laziness, some methods and
+options in this module (and the dozen modules it depends on) are
+undocumented; but one of those undocumented bits might be just what
+you're looking for.
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002-4 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+