summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSawyer X <xsawyerx@cpan.org>2019-05-24 17:56:15 +0300
committerSawyer X <xsawyerx@cpan.org>2019-05-24 17:56:15 +0300
commit314f4963bff4d23e773eee5559e5fd1de2dc6cbc (patch)
tree324b3aaca920eeebde91ac1b25d90a4d311c4b7d
parent58f4626762668e1c1948832073998af84b2c85d0 (diff)
downloadperl-314f4963bff4d23e773eee5559e5fd1de2dc6cbc.tar.gz
Bump Pod::Simple from 3.35 to 3.36
-rw-r--r--MANIFEST6
-rwxr-xr-xPorting/Maintainers.pl2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple.pm33
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm699
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Checker.pm6
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Debug.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/HTML.pm4
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm30
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm362
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm4
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Methody.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Progress.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/RTF.pm179
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Search.pm4
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod14
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Text.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm2
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm10
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm2
-rw-r--r--cpan/Pod-Simple/t/00about.t2
-rw-r--r--cpan/Pod-Simple/t/JustPod01.t219
-rw-r--r--cpan/Pod-Simple/t/JustPod02.t445
-rw-r--r--cpan/Pod-Simple/t/JustPod_corpus.t155
-rw-r--r--cpan/Pod-Simple/t/corpus/polish_utf8.txt19
-rw-r--r--cpan/Pod-Simple/t/corpus/polish_utf8.xml37
-rw-r--r--cpan/Pod-Simple/t/encod04.t79
-rw-r--r--cpan/Pod-Simple/t/fcodes_s.t36
-rw-r--r--cpan/Pod-Simple/t/github_issue_79.t73
-rw-r--r--cpan/Pod-Simple/t/html01.t12
-rw-r--r--cpan/Pod-Simple/t/perlcyg.pod2
-rw-r--r--cpan/Pod-Simple/t/rtf_utf8.t220
-rw-r--r--cpan/Pod-Simple/t/search50.t1
-rw-r--r--cpan/Pod-Simple/t/whine.t22
-rw-r--r--cpan/Pod-Simple/t/x_nixer.t2
-rw-r--r--cpan/Pod-Simple/t/xhtml01.t12
47 files changed, 2317 insertions, 408 deletions
diff --git a/MANIFEST b/MANIFEST
index 5ef609b4c0..5e993acbbe 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1613,6 +1613,7 @@ cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm turn Pod into XML
cpan/Pod-Simple/lib/Pod/Simple/HTML.pm convert Pod to HTML
cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm convert several Pod files to several HTML files
cpan/Pod-Simple/lib/Pod/Simple/HTMLLegacy.pm Pod::Simple::HTMLLegacy
+cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm represent "section" attributes of L codes
cpan/Pod-Simple/lib/Pod/Simple/Methody.pm turn Pod::Simple events into method calls
cpan/Pod-Simple/lib/Pod/Simple/Progress.pm Pod::Simple::Progress
@@ -1731,6 +1732,7 @@ cpan/Pod-Simple/t/fcodes_l.t Pod::Simple test file
cpan/Pod-Simple/t/fcodes_s.t Pod::Simple test file
cpan/Pod-Simple/t/for.t Pod::Simple test file
cpan/Pod-Simple/t/fornot.t Pod::Simple test file
+cpan/Pod-Simple/t/github_issue_79.t
cpan/Pod-Simple/t/heads.t Pod::Simple test file
cpan/Pod-Simple/t/html01.t Pod::Simple test file
cpan/Pod-Simple/t/html02.t Pod::Simple test file
@@ -1743,6 +1745,9 @@ cpan/Pod-Simple/t/junk1.pod Pod::Simple test file
cpan/Pod-Simple/t/junk1o.txt Pod::Simple test file
cpan/Pod-Simple/t/junk2.pod Pod::Simple test file
cpan/Pod-Simple/t/junk2o.txt Pod::Simple test file
+cpan/Pod-Simple/t/JustPod01.t
+cpan/Pod-Simple/t/JustPod02.t
+cpan/Pod-Simple/t/JustPod_corpus.t
cpan/Pod-Simple/t/linkclas.t Pod::Simple test file
cpan/Pod-Simple/t/output.t Pod::Simple test file
cpan/Pod-Simple/t/perlcyg.pod Pod::Simple test file
@@ -1755,6 +1760,7 @@ cpan/Pod-Simple/t/puller.t Pod::Simple test file
cpan/Pod-Simple/t/pulltitl.t Pod::Simple test file
cpan/Pod-Simple/t/reinit.t Pod::Simple test file
cpan/Pod-Simple/t/render.t Pod::Simple test file
+cpan/Pod-Simple/t/rtf_utf8.t
cpan/Pod-Simple/t/search05.t Pod::Simple test file
cpan/Pod-Simple/t/search10.t Pod::Simple test file
cpan/Pod-Simple/t/search12.t Pod::Simple test file
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index dfc7b50441..1a7031d668 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -926,7 +926,7 @@ use File::Glob qw(:case);
},
'Pod::Simple' => {
- 'DISTRIBUTION' => 'KHW/Pod-Simple-3.35.tar.gz',
+ 'DISTRIBUTION' => 'KHW/Pod-Simple-3.36.tar.gz',
'FILES' => q[cpan/Pod-Simple],
},
diff --git a/cpan/Pod-Simple/lib/Pod/Simple.pm b/cpan/Pod-Simple/lib/Pod/Simple.pm
index 20924153b6..a9db8c2a68 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple.pm
@@ -18,7 +18,7 @@ use vars qw(
);
@ISA = ('Pod::Simple::BlackBox');
-$VERSION = '3.35';
+$VERSION = '3.36';
@Known_formatting_codes = qw(I B C L E F S X Z);
%Known_formatting_codes = map(($_=>1), @Known_formatting_codes);
@@ -74,6 +74,9 @@ else { # EBCDIC on early Perl. We know what the values are for the code
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
__PACKAGE__->_accessorize(
+ '_output_is_for_JustPod', # For use only by Pod::Simple::JustPod,
+ # If non-zero, don't expand Z<> E<> S<> L<>,
+ # and count how many brackets in format codes
'nbsp_for_S', # Whether to map S<...>'s to \xA0 characters
'source_filename', # Filename of the source, for use in warnings
'source_dead', # Whether to consider this parser's source dead
@@ -168,6 +171,7 @@ sub encoding {
BEGIN {
*pretty = \&Pod::Simple::BlackBox::pretty;
*stringify_lol = \&Pod::Simple::BlackBox::stringify_lol;
+ *my_qr = \&Pod::Simple::BlackBox::my_qr;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -339,10 +343,9 @@ sub unaccept_targets {
# XXX Probably it is an error that the digit '9' is excluded from these re's.
# Broken for early Perls on EBCDIC
-my $xml_name_re = eval "qr/[^-.0-8:A-Z_a-z[:^ascii:]]/";
-if (! defined $xml_name_re) {
- $xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/;
-}
+my $xml_name_re = my_qr('[^-.0-8:A-Z_a-z[:^ascii:]]', '9');
+$xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/
+ unless $xml_name_re;
sub accept_code { shift->accept_codes(@_) } # alias
@@ -652,12 +655,13 @@ sub _make_treelet {
$treelet = $self->_treelet_from_formatting_codes(@_);
}
- if( $self->_remap_sequences($treelet) ) {
+ if( ! $self->{'_output_is_for_JustPod'} # Retain these as-is for pod output
+ && $self->_remap_sequences($treelet) )
+ {
$self->_treat_Zs($treelet); # Might as well nix these first
$self->_treat_Ls($treelet); # L has to precede E and S
$self->_treat_Es($treelet);
$self->_treat_Ss($treelet); # S has to come after E
-
$self->_wrap_up($treelet); # Nix X's and merge texties
} else {
@@ -1080,9 +1084,14 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences
# By here, $treelet->[$i] is definitely an L node
my $ell = $treelet->[$i];
- DEBUG > 1 and print STDERR "Ogling L node $ell\n";
+ DEBUG > 1 and print STDERR "Ogling L node " . pretty($ell) . "\n";
- # bitch if it's empty
+ # bitch if it's empty or is just '/'
+ if (@{$ell} == 3 and $ell->[2] =~ m!\A\s*/\s*\z!) {
+ $self->whine( $start_line, "L<> contains only '/'" );
+ $treelet->[$i] = 'L</>'; # just make it a text node
+ next; # and move on
+ }
if( @{$ell} == 2
or (@{$ell} == 3 and $ell->[2] eq '')
) {
@@ -1289,6 +1298,7 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences
$section_name = [splice @ell_content];
$section_name->[ 0] =~ s/^\"//s;
$section_name->[-1] =~ s/\"$//s;
+ $ell->[1]{'~tolerated'} = 1;
}
# Turn L<Foo Bar> into L</Foo Bar>.
@@ -1296,8 +1306,8 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences
and grep !ref($_) && m/ /s, @ell_content
) {
$section_name = [splice @ell_content];
+ $ell->[1]{'~deprecated'} = 1;
# That's support for the now-deprecated syntax.
- # (Maybe generate a warning eventually?)
# Note that it deliberately won't work on L<...|Foo Bar>
}
@@ -1347,7 +1357,7 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences
# And update children to be the link-text:
@$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : '');
- DEBUG > 2 and print STDERR "End of L-parsing for this node $treelet->[$i]\n";
+ DEBUG > 2 and print STDERR "End of L-parsing for this node " . pretty($treelet->[$i]) . "\n";
unshift @stack, $treelet->[$i]; # might as well recurse
}
@@ -1507,6 +1517,7 @@ sub _accessorize { # A simple-minded method-maker
$Carp::CarpLevel = 1, Carp::croak(
"Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
) unless (@_ == 1 or @_ == 2) and ref $_[0];
+
(@_ == 1) ? $_[0]->{$attrname}
: ($_[0]->{$attrname} = $_[1]);
};
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
index 9fe3f702ef..7f30052b0d 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
@@ -22,8 +22,36 @@ use integer; # vroom!
use strict;
use Carp ();
use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
#use constant DEBUG => 7;
+
+sub my_qr ($$) {
+
+ # $1 is a pattern to compile and return. Older perls compile any
+ # syntactically valid property, even if it isn't legal. To cope with
+ # this, return an empty string unless the compiled pattern also
+ # successfully matches $2, which the caller furnishes.
+
+ my ($input_re, $should_match) = @_;
+ # XXX could have a third parameter $shouldnt_match for extra safety
+
+ my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : "";
+
+ my $re = eval "no warnings; $use_utf8 qr/$input_re/";
+ #print STDERR __LINE__, ": $input_re: $@\n" if $@;
+ return "" if $@;
+
+ my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/";
+ #print STDERR __LINE__, ": $input_re: $@\n" if $@;
+ return "" if $@;
+
+ #print STDERR __LINE__, ": SUCCESS: $re\n" if $matches;
+ return $re if $matches;
+
+ #print STDERR __LINE__, ": $re: didn't match\n";
+ return "";
+}
+
BEGIN {
require Pod::Simple;
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
@@ -32,8 +60,37 @@ BEGIN {
# Matches a character iff the character will have a different meaning
# if we choose CP1252 vs UTF-8 if there is no =encoding line.
# This is broken for early Perls on non-ASCII platforms.
-my $non_ascii_re = eval "qr/[[:^ascii:]]/";
-$non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re;
+my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6");
+$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re;
+
+# Use patterns understandable by Perl 5.6, if possible
+my $cs_re = my_qr('\p{IsCs}', "\x{D800}");
+my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # <reserved> code point unlikely
+ # to get assigned
+my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]',
+ "\x{250}");
+$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re;
+
+my $script_run_re = eval 'no warnings "experimental::script_run";
+ qr/(*script_run: ^ .* $ )/x';
+my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}");
+unless ($latin_re) {
+ # This was machine generated to be the ranges of the union of the above
+ # three properties, with things that were undefined by Unicode 4.1 filling
+ # gaps. That is the version in use when Perl advanced enough to
+ # successfully compile and execute the above pattern.
+ $latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}");
+}
+
+my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A");
+
+# Latin script code points not in the first release of Unicode
+my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}");
+
+# If this perl doesn't have the Deprecated property, there's only one code
+# point in it that we need be concerned with.
+my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}");
+$deprecated_re = qr/\x{149}/ unless $deprecated_re;
my $utf8_bom;
if (($] ge 5.007_003)) {
@@ -57,10 +114,10 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
my $cut_handler = $self->{'cut_handler'};
my $wl_handler = $self->{'whiteline_handler'};
$self->{'line_count'} ||= 0;
-
+
my $scratch;
- DEBUG > 4 and
+ DEBUG > 4 and
print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
DEBUG > 5 and
@@ -71,9 +128,17 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
# paragraph buffer. Because we need to defer processing of =over
# directives and verbatim paragraphs. We call _ponder_paragraph_buffer
# to process this.
-
+
$self->{'pod_para_count'} ||= 0;
+ # An attempt to match the pod portions of a line. This is not fool proof,
+ # but is good enough to serve as part of the heuristic for guessing the pod
+ # encoding if not specified.
+ my $format_codes = join "", '[', grep { / ^ [A-Za-z] $/x }
+ keys %{$self->{accept_codes}};
+ $format_codes .= ']';
+ my $pod_chars_re = qr/ ^ = [A-Za-z]+ | $format_codes < /x;
+
my $line;
foreach my $source_line (@_) {
if( $self->{'source_dead'} ) {
@@ -97,7 +162,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
($line = $source_line) =~ tr/\n\r//d;
# If we don't have two vars, we'll end up with that there
# tr/// modding the (potentially read-only) original source line!
-
+
} else {
DEBUG > 2 and print STDERR "First line: [$source_line]\n";
@@ -106,7 +171,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
$self->_handle_encoding_line( "=encoding utf8" );
delete $self->{'_processed_encoding'};
$line =~ tr/\n\r//d;
-
+
} elsif( $line =~ s/^\xFE\xFF//s ) {
DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
$self->scream(
@@ -130,7 +195,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
next;
# TODO: implement somehow?
-
+
} else {
DEBUG > 2 and print STDERR "First line is BOM-less.\n";
($line = $source_line) =~ tr/\n\r//d;
@@ -144,8 +209,8 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
my $encoding;
- # No =encoding line, and we are at the first line in the input that
- # contains a non-ascii byte, that is one whose meaning varies depending
+ # No =encoding line, and we are at the first pod line in the input that
+ # contains a non-ascii byte, that is, one whose meaning varies depending
# on whether the file is encoded in UTF-8 or CP1252, which are the two
# possibilities permitted by the pod spec. (ASCII is assumed if the
# file only contains ASCII bytes.) In order to process this line, we
@@ -162,22 +227,28 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
# without conflict. CP 1252 uses most of them for graphic characters.
#
# Note that all ASCII-range bytes represent their corresponding code
- # points in CP1252 and UTF-8. In ASCII platform UTF-8 all other code
- # points require multiple (non-ASCII) bytes to represent. (A separate
- # paragraph for EBCDIC is below.) The multi-byte representation is
- # quite structured. If we find an isolated byte that requires multiple
- # bytes to represent in UTF-8, we know that the encoding is not UTF-8.
- # If we find a sequence of bytes that violates the UTF-8 structure, we
- # also can presume the encoding isn't UTF-8, and hence must be 1252.
+ # points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other
+ # code points require multiple (non-ASCII) bytes to represent. (A
+ # separate paragraph for EBCDIC is below.) The multi-byte
+ # representation is quite structured. If we find an isolated byte that
+ # would require multiple bytes to represent in UTF-8, we know that the
+ # encoding is not UTF-8. If we find a sequence of bytes that violates
+ # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and
+ # hence must be 1252.
#
# But there are ambiguous cases where we could guess wrong. If so, the
# user will end up having to supply an =encoding line. We use all
# readily available information to improve our chances of guessing
# right. The odds of something not being UTF-8, but still passing a
# UTF-8 validity test go down very rapidly with increasing length of the
- # sequence. Therefore we look at all the maximal length non-ascii
- # sequences on the line. If any of the sequences can't be UTF-8, we
- # quit there and choose CP1252. If all could be UTF-8, we guess UTF-8.
+ # sequence. Therefore we look at all non-ascii sequences on the line.
+ # If any of the sequences can't be UTF-8, we quit there and choose
+ # CP1252. If all could be UTF-8, we see if any of the code points
+ # represented are unlikely to be in pod. If so, we guess CP1252. If
+ # not, we check if the line is all in the same script; if not guess
+ # CP1252; otherwise UTF-8. For perls that don't have convenient script
+ # run testing, see if there is both Latin and non-Latin. If so, CP1252,
+ # otherwise UTF-8.
#
# On EBCDIC platforms, the situation is somewhat different. In
# UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
@@ -188,51 +259,188 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
# very unlikely to be in pod text. So if we encounter one of them, it
# means that it is quite likely CP1252 and not UTF-8. The net result is
# the same code below is used for both platforms.
- while ($line =~ m/($non_ascii_re+)/g) {
- my $non_ascii_seq = $1;
-
- if (length $non_ascii_seq == 1) {
- $encoding = 'CP1252';
- goto guessed;
- } elsif ($] ge 5.007_003) {
-
- # On Perls that have this function, we can see if the sequence is
- # valid UTF-8 or not.
- my $is_utf8;
- {
- no warnings 'utf8';
- $is_utf8 = utf8::decode($non_ascii_seq);
+ #
+ # XXX probably if the line has E<foo> that evaluates to illegal CP1252,
+ # then it is UTF-8. But we haven't processed E<> yet.
+
+ goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls
+
+ my $copy;
+
+ no warnings 'utf8';
+
+ if ($] ge 5.007_003) {
+ $copy = $line;
+
+ # On perls that have this function, we can use it to easily see if the
+ # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag
+ # needed below for script run detection
+ goto set_1252 if ! utf8::decode($copy);
+ }
+ elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows
+ # code page doing here anyway?
+ goto set_utf8;
+ }
+ else { # ASCII, no decode(): do it ourselves using the fundamental
+ # characteristics of UTF-8
+ use if $] le 5.006002, 'utf8';
+
+ my $char_ord;
+ my $needed; # How many continuation bytes to gobble up
+
+ # Initialize the translated line with a dummy character that will be
+ # deleted after everything else is done. This dummy makes sure that
+ # $copy will be in UTF-8. Doing it now avoids the bugs in early perls
+ # with upgrading in the middle
+ $copy = chr(0x100);
+
+ # Parse through the line
+ for (my $i = 0; $i < length $line; $i++) {
+ my $byte = substr($line, $i, 1);
+
+ # ASCII bytes are trivially dealt with
+ if ($byte !~ $non_ascii_re) {
+ $copy .= $byte;
+ next;
+ }
+
+ my $b_ord = ord $byte;
+
+ # Now figure out what this code point would be if the input is
+ # actually in UTF-8. If, in the process, we discover that it isn't
+ # well-formed UTF-8, we guess CP1252.
+ #
+ # Start the process. If it is UTF-8, we are at the first, start
+ # byte, of a multi-byte sequence. We look at this byte to figure
+ # out how many continuation bytes are needed, and to initialize the
+ # code point accumulator with the data from this byte.
+ #
+ # Normally the minimum continuation byte is 0x80, but in certain
+ # instances the minimum is a higher number. So the code below
+ # overrides this for those instances.
+ my $min_cont = 0x80;
+
+ if ($b_ord < 0xC2) { # A start byte < C2 is malformed
+ goto set_1252;
+ }
+ elsif ($b_ord <= 0xDF) {
+ $needed = 1;
+ $char_ord = $b_ord & 0x1F;
+ }
+ elsif ($b_ord <= 0xEF) {
+ $min_cont = 0xA0 if $b_ord == 0xE0;
+ $needed = 2;
+ $char_ord = $b_ord & (0x1F >> 1);
}
- if (! $is_utf8) {
- $encoding = 'CP1252';
- goto guessed;
+ elsif ($b_ord <= 0xF4) {
+ $min_cont = 0x90 if $b_ord == 0xF0;
+ $needed = 3;
+ $char_ord = $b_ord & (0x1F >> 2);
}
- } elsif (ord("A") == 65) { # An early Perl, ASCII platform
-
- # Without utf8::decode, it's a lot harder to do a rigorous check
- # (though some early releases had a different function that
- # accomplished the same thing). Since these are ancient Perls, not
- # likely to be in use today, we take the easy way out, and look at
- # just the first two bytes of the sequence to see if they are the
- # start of a UTF-8 character. In ASCII UTF-8, continuation bytes
- # must be between 0x80 and 0xBF. Start bytes can range from 0xC2
- # through 0xFF, but anything above 0xF4 is not Unicode, and hence
- # extremely unlikely to be in a pod.
- if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) {
- $encoding = 'CP1252';
- goto guessed;
+ else { # F4 is the highest start byte for legal Unicode; higher is
+ # unlikely to be in pod.
+ goto set_1252;
}
- # We don't bother doing anything special for EBCDIC on early Perls.
- # If there is a solitary variant, CP1252 will be chosen; otherwise
- # UTF-8.
- }
- } # End of loop through all variant sequences on the line
+ # ? not enough continuation bytes available
+ goto set_1252 if $i + $needed >= length $line;
+
+ # Accumulate the ordinal of the character from the remaining
+ # (continuation) bytes.
+ while ($needed-- > 0) {
+ my $cont = substr($line, ++$i, 1);
+ $b_ord = ord $cont;
+ goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF;
+
+ # In all cases, any next continuation bytes all have the same
+ # minimum legal value
+ $min_cont = 0x80;
+
+ # Accumulate this byte's contribution to the code point
+ $char_ord <<= 6;
+ $char_ord |= ($b_ord & 0x3F);
+ }
+
+ # Here, the sequence that formed this code point was valid UTF-8,
+ # so add the completed character to the output
+ $copy .= chr $char_ord;
+ } # End of loop through line
+
+ # Delete the dummy first character
+ $copy = substr($copy, 1);
+ }
+
+ # Here, $copy is legal UTF-8.
+
+ # If it can't be legal CP1252, no need to look further. (These bytes
+ # aren't valid in CP1252.) This test could have been placed higher in
+ # the code, but it seemed wrong to set the encoding to UTF-8 without
+ # making sure that the very first instance is well-formed. But what if
+ # it isn't legal CP1252 either? We have to choose one or the other, and
+ # It seems safer to favor the single-byte encoding over the multi-byte.
+ goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/;
+
+ # The C1 controls are not likely to appear in pod
+ goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/;
+
+ # Nor are surrogates nor unassigned, nor deprecated.
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re;
+ goto set_1252 if $cs_re && $copy =~ $cs_re;
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re;
+ goto set_1252 if $cn_re && $copy =~ $cn_re;
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re;
+ goto set_1252 if $copy =~ $deprecated_re;
+
+ # Nor are rare code points. But this is hard to determine. khw
+ # believes that IPA characters and the modifier letters are unlikely to
+ # be in pod (and certainly very unlikely to be the in the first line in
+ # the pod containing non-ASCII)
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re;
+ goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re;
+
+ # The first Unicode version included essentially every Latin character
+ # in modern usage. So, a Latin character not in the first release will
+ # unlikely be in pod.
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re;
+ goto set_1252 if $later_latin_re && $copy =~ $later_latin_re;
+
+ # On perls that handle script runs, if the UTF-8 interpretation yields
+ # a single script, we guess UTF-8, otherwise just having a mixture of
+ # scripts is suspicious, so guess CP1252. We first strip off, as best
+ # we can, the ASCII characters that look like they are pod directives,
+ # as these would always show as mixed with non-Latin text.
+ $copy =~ s/$pod_chars_re//g;
+
+ if ($script_run_re) {
+ goto set_utf8 if $copy =~ $script_run_re;
+ DEBUG > 8 and print STDERR __LINE__, ": not script run\n";
+ goto set_1252;
+ }
- # All sequences in the line could be UTF-8. Guess that.
+ # Even without script runs, but on recent enough perls and Unicodes, we
+ # can check if there is a mixture of both Latin and non-Latin. Again,
+ # having a mixture of scripts is suspicious, so assume CP1252
+
+ # If it's all non-Latin, there is no CP1252, as that is Latin
+ # characters and punct, etc.
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re;
+ goto set_utf8 if $copy !~ $latin_re;
+
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re;
+ goto set_utf8 if $copy =~ $every_char_is_latin_re;
+
+ DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n";
+
+ set_1252:
+ DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n";
+ $encoding = 'CP1252';
+ goto done_set;
+
+ set_utf8:
+ DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n";
$encoding = 'UTF-8';
- guessed:
+ done_set:
$self->_handle_encoding_line( "=encoding $encoding" );
delete $self->{'_processed_encoding'};
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
@@ -254,13 +462,13 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
$self->{'line_count'},
"=cut found outside a pod block. Skipping to next block."
);
-
+
## Before there were errata sections in the world, it was
## least-pessimal to abort processing the file. But now we can
## just barrel on thru (but still not start a pod block).
#splice @_;
#push @_, undef;
-
+
next;
} else {
$self->{'in_pod'} = $self->{'start_of_pod_block'}
@@ -273,7 +481,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
if $code_handler;
# Note: this may cause code to be processed out of order relative
# to pods, but in order relative to cuts.
-
+
# Note also that we haven't yet applied the transcoding to $line
# by time we call $code_handler!
@@ -284,11 +492,11 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
$self->{'line_count'} = $1 - 1;
}
-
+
next;
}
}
-
+
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# Else we're in pod mode:
@@ -308,12 +516,13 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
# ++$self->{'pod_para_count'};
$self->_ponder_paragraph_buffer();
# by now it's safe to consider the previous paragraph as done.
+ DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n";
$cut_handler->(map $_, $line, $self->{'line_count'}, $self)
if $cut_handler;
# TODO: add to docs: Note: this may cause cuts to be processed out
# of order relative to pods, but in order relative to code.
-
+
} elsif($line =~ m/^(\s*)$/s) { # it's a blank line
if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
$wl_handler->(map $_, $line, $self->{'line_count'}, $self)
@@ -324,29 +533,30 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n";
push @{$paras->[-1]}, $line;
} # otherwise it's not interesting
-
+
if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n";
}
-
+
$self->{'last_was_blank'} = 1;
-
+
} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
-
- if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
+
+ if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) {
# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
- my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
+ my $new = [$1, {'start_line' => $self->{'line_count'}}, $3];
+ $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " ";
# Note that in "=head1 foo", the WS is lost.
# Example: ['=head1', {'start_line' => 123}, ' foo']
-
+
++$self->{'pod_para_count'};
-
+
$self->_ponder_paragraph_buffer();
# by now it's safe to consider the previous paragraph as done.
-
+
push @$paras, $new; # the new incipient paragraph
DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
-
+
} elsif($line =~ m/^\s/s) {
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
@@ -379,7 +589,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
}
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
}
-
+
} # ends the big while loop
DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
@@ -390,7 +600,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
sub _handle_encoding_line {
my($self, $line) = @_;
-
+
return if $self->parse_characters;
# The point of this routine is to set $self->{'_transcoder'} as indicated.
@@ -492,7 +702,7 @@ sub _handle_encoding_line {
sub _handle_encoding_second_level {
# By time this is called, the encoding (if well formed) will already
- # have been acted one.
+ # have been acted on.
my($self, $para) = @_;
my @x = @$para;
my $content = join ' ', splice @x, 2;
@@ -500,7 +710,7 @@ sub _handle_encoding_second_level {
$content =~ s/\s+$//s;
DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
-
+
if (defined($self->{'_processed_encoding'})) {
#if($content ne $self->{'_processed_encoding'}) {
# Could it happen?
@@ -518,14 +728,14 @@ sub _handle_encoding_second_level {
} else {
DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n";
}
-
+
} else {
# Otherwise it's a syntax error
$self->whine( $para->[1]{'start_line'},
"Invalid =encoding syntax: $content"
);
}
-
+
return;
}
@@ -542,7 +752,7 @@ sub _gen_errata {
return() unless $self->{'errata'} and keys %{$self->{'errata'}};
my @out;
-
+
foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
push @out,
['=item', {'start_line' => $m}, "Around line $line:"],
@@ -555,7 +765,7 @@ sub _gen_errata {
)
;
}
-
+
# TODO: report of unknown entities? unrenderable characters?
unshift @out,
@@ -569,7 +779,7 @@ sub _gen_errata {
['=over', {'start_line' => $m, 'errata' => 1}, ''],
;
- push @out,
+ push @out,
['=back', {'start_line' => $m, 'errata' => 1}, ''],
;
@@ -610,7 +820,7 @@ sub _ponder_paragraph_buffer {
# Document,
# Data, Para, Verbatim
# B, C, longdirname (TODO -- wha?), etc. for all directives
- #
+ #
my $self = $_[0];
my $paras;
@@ -624,11 +834,11 @@ sub _ponder_paragraph_buffer {
# We have something in our buffer. So apparently the document has started.
unless($self->{'doc_has_started'}) {
$self->{'doc_has_started'} = 1;
-
+
my $starting_contentless;
$starting_contentless =
(
- !@$curr_open
+ !@$curr_open
and @$paras and ! grep $_->[0] ne '~end', @$paras
# i.e., if the paras is all ~ends
)
@@ -637,7 +847,7 @@ sub _ponder_paragraph_buffer {
$starting_contentless ? 'contentless' : 'contentful',
" document\n"
;
-
+
$self->_handle_element_start(
($scratch = 'Document'),
{
@@ -649,15 +859,28 @@ sub _ponder_paragraph_buffer {
my($para, $para_type);
while(@$paras) {
- last if @$paras == 1 and
- ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
- or $paras->[0][0] eq '=item' )
- ;
+ last if @$paras == 1
+ and ( $paras->[0][0] eq '=over'
+ or $paras->[0][0] eq '=item'
+ or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'}));
# Those're the three kinds of paragraphs that require lookahead.
# Actually, an "=item Foo" inside an <over type=text> region
# and any =item inside an <over type=block> region (rare)
# don't require any lookahead, but all others (bullets
# and numbers) do.
+ # The verbatim is different from the other two, because those might be
+ # like:
+ #
+ # =item
+ # ...
+ # =cut
+ # ...
+ # =item
+ #
+ # The =cut here finishes the paragraph but doesn't terminate the =over
+ # they should be in. (khw apologizes that he didn't comment at the time
+ # why the 'in_pod' works, and no longer remembers why, and doesn't think
+ # it is currently worth the effort to re-figure it out.)
# TODO: whinge about many kinds of directives in non-resolving =for regions?
# TODO: many? like what? =head1 etc?
@@ -667,7 +890,7 @@ sub _ponder_paragraph_buffer {
DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (",
$self->_dump_curr_open(), ")\n";
-
+
if($para_type eq '=for') {
next if $self->_ponder_for($para,$curr_open,$paras);
@@ -704,7 +927,7 @@ sub _ponder_paragraph_buffer {
} else {
# All non-magical codes!!!
-
+
# Here we start using $para_type for our own twisted purposes, to
# mean how it should get treated, not as what the element name
# should be.
@@ -744,10 +967,10 @@ sub _ponder_paragraph_buffer {
;
next;
}
-
-
+
+
my $over_type = $over->[1]{'~type'};
-
+
if(!$over_type) {
# Shouldn't happen1
die "Typeless over in stack, starting at line "
@@ -772,7 +995,7 @@ sub _ponder_paragraph_buffer {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
-
+
if($item_type eq 'text') {
# Nothing special needs doing for 'text'
} elsif($item_type eq 'number' or $item_type eq 'bullet') {
@@ -788,16 +1011,16 @@ sub _ponder_paragraph_buffer {
} else {
die "Unhandled item type $item_type"; # should never happen
}
-
+
# =item-text thingies don't need any assimilation, it seems.
} elsif($over_type eq 'number') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
-
+
my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
-
+
if($item_type eq 'bullet') {
# Hm, it's not numeric. Correct for this.
$para->[1]{'number'} = $expected_value;
@@ -822,7 +1045,7 @@ sub _ponder_paragraph_buffer {
} elsif($expected_value == $para->[1]{'number'}) {
DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
-
+
} else {
DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
" instead of the expected value of $expected_value\n";
@@ -833,7 +1056,7 @@ sub _ponder_paragraph_buffer {
);
$para->[1]{'number'} = $expected_value; # correcting!!
}
-
+
if(@$para == 2) {
# For the cases where we /didn't/ push to @$para
if($paras->[0][0] eq '~Para') {
@@ -850,13 +1073,13 @@ sub _ponder_paragraph_buffer {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
-
+
if($item_type eq 'bullet') {
# as expected!
if( $para->[1]{'~_freaky_para_hack'} ) {
DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
- push @$para, delete $para->[1]{'~_freaky_para_hack'};
+ push @$para, $para->[1]{'~_freaky_para_hack'};
}
} elsif($item_type eq 'number') {
@@ -944,15 +1167,15 @@ sub _ponder_paragraph_buffer {
my @fors = grep $_->[0] eq '=for', @$curr_open;
DEBUG > 1 and print STDERR "Containing fors: ",
join(',', map $_->[1]{'target'}, @fors), "\n";
-
+
if(! @fors) {
DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n";
-
+
#} elsif(grep $_->[1]{'~resolve'}, @fors) {
#} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
} elsif( $fors[-1][1]{'~resolve'} ) {
# Look to the immediately containing for
-
+
if($para_type eq 'Data') {
DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
$para->[0] = 'Para';
@@ -971,7 +1194,7 @@ sub _ponder_paragraph_buffer {
if($para_type eq 'Plain') {
$self->_ponder_Plain($para);
} elsif($para_type eq 'Verbatim') {
- $self->_ponder_Verbatim($para);
+ $self->_ponder_Verbatim($para);
} elsif($para_type eq 'Data') {
$self->_ponder_Data($para);
} else {
@@ -989,7 +1212,7 @@ sub _ponder_paragraph_buffer {
$self->_traverse_treelet_bit(@$para);
}
}
-
+
return;
}
@@ -1024,9 +1247,9 @@ sub _ponder_for {
}
DEBUG > 1 and
print STDERR "Faking out a =for $target as a =begin $target / =end $target\n";
-
+
$para->[0] = 'Data';
-
+
unshift @$paras,
['=begin',
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
@@ -1038,7 +1261,7 @@ sub _ponder_for {
$target,
],
;
-
+
return 1;
}
@@ -1055,20 +1278,20 @@ sub _ponder_begin {
DEBUG and print STDERR "Ignoring targetless =begin\n";
return 1;
}
-
+
my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
$para->[1]{'title'} = $title if ($title);
$para->[1]{'target'} = $target; # without any ':'
$content = $target; # strip off the title
-
+
$content =~ s/^:!/!:/s;
my $neg; # whether this is a negation-match
$neg = 1 if $content =~ s/^!//s;
my $to_resolve; # whether to process formatting codes
$to_resolve = 1 if $content =~ s/^://s;
-
+
my $dont_ignore; # whether this target matches us
-
+
foreach my $target_name (
split(',', $content, -1),
$neg ? () : '*'
@@ -1076,7 +1299,7 @@ sub _ponder_begin {
DEBUG > 2 and
print STDERR " Considering whether =begin $content matches $target_name\n";
next unless $self->{'accept_targets'}{$target_name};
-
+
DEBUG > 2 and
print STDERR " It DOES match the acceptable target $target_name!\n";
$to_resolve = 1
@@ -1139,7 +1362,7 @@ sub _ponder_end {
DEBUG and print STDERR "Ignoring targetless =end\n";
return 1;
}
-
+
unless($content =~ m/^\S+$/) { # i.e., unless it's one word
$self->whine(
$para->[1]{'start_line'},
@@ -1149,7 +1372,7 @@ sub _ponder_end {
DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
return 1;
}
-
+
unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
$self->whine(
$para->[1]{'start_line'},
@@ -1159,11 +1382,11 @@ sub _ponder_end {
DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
return 1;
}
-
+
unless($content eq $curr_open->[-1][1]{'target'}) {
$self->whine(
$para->[1]{'start_line'},
- "=end $content doesn't match =begin "
+ "=end $content doesn't match =begin "
. $curr_open->[-1][1]{'target'}
. ". (Stack: "
. $self->_dump_curr_open() . ')'
@@ -1180,7 +1403,7 @@ sub _ponder_end {
} else {
$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
# what's that for?
-
+
$self->{'content_seen'} ||= 1;
$self->_handle_element_end( my $scratch = 'for', $para->[1]);
}
@@ -1188,14 +1411,14 @@ sub _ponder_end {
pop @$curr_open;
return 1;
-}
+}
sub _ponder_doc_end {
my ($self,$para,$curr_open,$paras) = @_;
if(@$curr_open) { # Deal with things left open
DEBUG and print STDERR "Stack is nonempty at end-document: (",
$self->_dump_curr_open(), ")\n";
-
+
DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
unshift @$paras, $self->_closers_for_all_curr_open;
# Make sure there is exactly one ~end in the parastack, at the end:
@@ -1205,11 +1428,11 @@ sub _ponder_doc_end {
# generate errata, and then another to be at the end
# when that loop back around to process the errata.
return 1;
-
+
} else {
DEBUG and print STDERR "Okay, stack is empty now.\n";
}
-
+
# Try generating errata section, if applicable
unless($self->{'~tried_gen_errata'}) {
$self->{'~tried_gen_errata'} = 1;
@@ -1220,7 +1443,7 @@ sub _ponder_doc_end {
return 1; # I.e., loop around again to process these fake-o paragraphs
}
}
-
+
splice @$paras; # Well, that's that for this paragraph buffer.
DEBUG and print STDERR "Throwing end-document event.\n";
@@ -1278,8 +1501,9 @@ sub _ponder_over {
$para->[1]{'~type'} = $list_type;
push @$curr_open, $para;
# yes, we reuse the paragraph as a stack item
-
+
my $content = join ' ', splice @$para, 2;
+ $para->[1]{'~orig_content'} = $content;
my $overness;
if($content =~ m/^\s*$/s) {
$para->[1]{'indent'} = 4;
@@ -1301,13 +1525,13 @@ sub _ponder_over {
$para->[1]{'indent'} = 4;
}
DEBUG > 1 and print STDERR "=over found of type $list_type\n";
-
+
$self->{'content_seen'} ||= 1;
$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
return;
}
-
+
sub _ponder_back {
my ($self,$para,$curr_open,$paras) = @_;
# TODO: fire off </item-number> or </item-bullet> or </item-text> ??
@@ -1354,10 +1578,10 @@ sub _ponder_item {
;
return 1;
}
-
-
+
+
my $over_type = $over->[1]{'~type'};
-
+
if(!$over_type) {
# Shouldn't happen1
die "Typeless over in stack, starting at line "
@@ -1382,7 +1606,7 @@ sub _ponder_item {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
-
+
if($item_type eq 'text') {
# Nothing special needs doing for 'text'
} elsif($item_type eq 'number' or $item_type eq 'bullet') {
@@ -1398,16 +1622,16 @@ sub _ponder_item {
} else {
die "Unhandled item type $item_type"; # should never happen
}
-
+
# =item-text thingies don't need any assimilation, it seems.
} elsif($over_type eq 'number') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
-
+
my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
-
+
if($item_type eq 'bullet') {
# Hm, it's not numeric. Correct for this.
$para->[1]{'number'} = $expected_value;
@@ -1432,7 +1656,7 @@ sub _ponder_item {
} elsif($expected_value == $para->[1]{'number'}) {
DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
-
+
} else {
DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
" instead of the expected value of $expected_value\n";
@@ -1443,7 +1667,7 @@ sub _ponder_item {
);
$para->[1]{'number'} = $expected_value; # correcting!!
}
-
+
if(@$para == 2) {
# For the cases where we /didn't/ push to @$para
if($paras->[0][0] eq '~Para') {
@@ -1460,13 +1684,13 @@ sub _ponder_item {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
-
+
if($item_type eq 'bullet') {
# as expected!
if( $para->[1]{'~_freaky_para_hack'} ) {
DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
- push @$para, delete $para->[1]{'~_freaky_para_hack'};
+ push @$para, $para->[1]{'~_freaky_para_hack'};
}
} elsif($item_type eq 'number') {
@@ -1533,30 +1757,36 @@ sub _ponder_Verbatim {
$para->[1]{'xml:space'} = 'preserve';
- my $indent = $self->strip_verbatim_indent;
- if ($indent && ref $indent eq 'CODE') {
- my @shifted = (shift @{$para}, shift @{$para});
- $indent = $indent->($para);
- unshift @{$para}, @shifted;
- }
+ unless ($self->{'_output_is_for_JustPod'}) {
+ my $indent = $self->strip_verbatim_indent;
+ if ($indent && ref $indent eq 'CODE') {
+ my @shifted = (shift @{$para}, shift @{$para});
+ $indent = $indent->($para);
+ unshift @{$para}, @shifted;
+ }
+
+ for(my $i = 2; $i < @$para; $i++) {
+ foreach my $line ($para->[$i]) { # just for aliasing
+ # Strip indentation.
+ $line =~ s/^\Q$indent// if $indent;
- for(my $i = 2; $i < @$para; $i++) {
- foreach my $line ($para->[$i]) { # just for aliasing
- # Strip indentation.
- $line =~ s/^\Q$indent// if $indent
- && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
- while( $line =~
- # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
- # tabs are at every EIGHTH column. For portability, it has to be
- # one setting everywhere, and 8th wins.
- s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
- ) {}
+ # This is commented out because of github issue #85, and the
+ # current maintainers don't know why it was there in the first
+ # place.
+ #&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
+ while( $line =~
+ # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
+ # tabs are at every EIGHTH column. For portability, it has to be
+ # one setting everywhere, and 8th wins.
+ s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
+ ) {}
- # TODO: whinge about (or otherwise treat) unindented or overlong lines
+ # TODO: whinge about (or otherwise treat) unindented or overlong lines
+ }
}
}
-
+
# Now the VerbatimFormatted hoodoo...
if( $self->{'accept_codes'} and
$self->{'accept_codes'}{'VerbatimFormatted'}
@@ -1596,7 +1826,7 @@ sub _traverse_treelet_bit { # for use only by the routine above
my $scratch;
$self->_handle_element_start(($scratch=$name), shift @_);
-
+
while (@_) {
my $x = shift;
if (ref($x)) {
@@ -1606,7 +1836,7 @@ sub _traverse_treelet_bit { # for use only by the routine above
$self->_handle_text($x);
}
}
-
+
$self->_handle_element_end($scratch=$name);
return;
}
@@ -1651,7 +1881,7 @@ sub _closers_for_all_curr_open {
sub _verbatim_format {
my($it, $p) = @_;
-
+
my $formatting;
for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
@@ -1659,7 +1889,7 @@ sub _verbatim_format {
$p->[$i] .= "\n";
# Unlike with simple Verbatim blocks, we don't end up just doing
# a join("\n", ...) on the contents, so we have to append a
- # newline to ever line, and then nix the last one later.
+ # newline to every line, and then nix the last one later.
}
if( DEBUG > 4 ) {
@@ -1672,7 +1902,7 @@ sub _verbatim_format {
for(my $i = $#$p; $i > 2; $i--) {
# work backwards over the lines, except the first (#2)
-
+
#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
# look at a formatty line preceding a nonformatty one
@@ -1680,7 +1910,7 @@ sub _verbatim_format {
if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
DEBUG > 5 and print STDERR " It's a formatty line. ",
"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
-
+
if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n";
next;
@@ -1696,11 +1926,11 @@ sub _verbatim_format {
# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
# Example:
# What do you want? i like pie. [or whatever]
- # #:^^^^^^^^^^^^^^^^^ /////////////
-
+ # #:^^^^^^^^^^^^^^^^^ /////////////
+
DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
-
+
$formatting = ' ' . $1;
$formatting =~ s/\s+$//s; # nix trailing whitespace
unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
@@ -1716,7 +1946,7 @@ sub _verbatim_format {
}
# Make $formatting and the previous line be exactly the same length,
# with $formatting having a " " as the last character.
-
+
DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n";
@@ -1741,10 +1971,10 @@ sub _verbatim_format {
#print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
}
}
- my @nixed =
+ my @nixed =
splice @$p, $i-1, 2, @new_line; # replace myself and the next line
DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
-
+
DEBUG > 6 and print STDERR "New version of the above line is these tokens (",
scalar(@new_line), "):",
map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
@@ -1791,29 +2021,46 @@ sub _treelet_from_formatting_codes {
# [ 'B', {}, "pie" ],
# "!"
# ]
-
+ # This illustrates the general format of a treelet. It is an array:
+ # [0] is a scalar indicating its type. In the example above, the
+ # types are '~Top' and 'B'
+ # [1] is a hash of various flags about it, possibly empty
+ # [2] - [N] are an ordered list of the subcomponents of the treelet.
+ # Scalars are literal text, refs are sub-treelets, to
+ # arbitrary levels. Stringifying a treelet will recursively
+ # stringify the sub-treelets, concatentating everything
+ # together to form the exact text of the treelet.
+
my($self, $para, $start_line, $preserve_space) = @_;
-
+
my $treelet = ['~Top', {'start_line' => $start_line},];
-
+
unless ($preserve_space || $self->{'preserve_whitespace'}) {
$para =~ s/\s+/ /g; # collapse and trim all whitespace first.
$para =~ s/ $//;
$para =~ s/^ //;
}
-
+
# Only apparent problem the above code is that N<< >> turns into
# N<< >>. But then, word wrapping does that too! So don't do that!
-
+
+
+ # As a Start-code is encountered, the number of opening bracket '<'
+ # characters minus 1 is pushed onto @stack (so 0 means a single bracket,
+ # etc). When closing brackets are found in the text, at least this number
+ # (plus the 1) will be required to mean the Start-code is terminated. When
+ # those are found, @stack is popped.
my @stack;
+
my @lineage = ($treelet);
my $raw = ''; # raw content of L<> fcode before splitting/processing
# XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
- # into just 1 ' '. Is this the regex's doing or 'raw's?
+ # into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's
+ # the 'collapse and trim all whitespace first' lines just above.
my $inL = 0;
DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
-
+
# Here begins our frightening tokenizer RE. The following regex matches
# text in four main parts:
#
@@ -1846,7 +2093,11 @@ sub _treelet_from_formatting_codes {
|
# Match multiple-bracket end codes. $3 gets the whitespace that
# should be discarded before an end bracket but kept in other cases
- # and $4 gets the end brackets themselves.
+ # and $4 gets the end brackets themselves. ($3 can be empty if the
+ # construct is empty, like C<< >>, and all the white-space has been
+ # gobbled up already, considered to be space after the opening
+ # bracket. In this case we use look-behind to verify that there are
+ # at least 2 spaces in a row before the ">".)
(\s+|(?<=\s\s))(>{2,})
|
(\s?>) # $5: simple end-codes
@@ -1872,23 +2123,48 @@ sub _treelet_from_formatting_codes {
) {
DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
if(defined $1) {
+ my $bracket_count; # How many '<<<' in a row this has. Needed for
+ # Pod::Simple::JustPod
if(defined $2) {
DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
- push @stack, length($2) + 1;
- # length of the necessary complex end-code string
+ $bracket_count = length($2) + 1;
+ push @stack, $bracket_count; # length of the necessary complex
+ # end-code string
} else {
DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
push @stack, 0; # signal that we're looking for simple
+ $bracket_count = 1;
}
- push @lineage, [ substr($1,0,1), {}, ]; # new node object
- push @{ $lineage[-2] }, $lineage[-1];
- if ('L' eq substr($1,0,1)) {
- $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
- $inL = 1;
+ my $code = substr($1,0,1);
+ if ('L' eq $code) {
+ if ($inL) {
+ $raw .= $1;
+ $self->scream( $start_line,
+ 'Nested L<> are illegal. Pretending inner one is '
+ . 'X<...> so can continue looking for other errors.');
+ $code = "X";
+ }
+ else {
+ $raw = ""; # reset raw content accumulator
+ $inL = @stack;
+ }
} else {
$raw .= $1 if $inL;
}
-
+ push @lineage, [ $code, {}, ]; # new node object
+
+ # Tell Pod::Simple::JustPod how many brackets there were, but to save
+ # space, not in the most usual case of there was just 1. It can be
+ # inferred by the absence of this element. Similarly, if there is more
+ # than one bracket, extract the white space between the final bracket
+ # and the real beginning of the interior. Save that if it isn't just a
+ # single space
+ if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) {
+ $lineage[-1][1]{'~bracket_count'} = $bracket_count;
+ my $lspacer = substr($1, 1 + $bracket_count);
+ $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " ";
+ }
+ push @{ $lineage[-2] }, $lineage[-1];
} elsif(defined $4) {
DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n";
# This is where it gets messy...
@@ -1917,20 +2193,35 @@ sub _treelet_from_formatting_codes {
}
#print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
+ if ($3 ne " " && $self->{'_output_is_for_JustPod'}) {
+ if ($3 ne "") {
+ $lineage[-1][1]{'~rspacer'} = $3;
+ }
+ elsif ($lineage[-1][1]{'~lspacer'} eq " ") {
+
+ # Here we had something like C<< >> which was a false positive
+ delete $lineage[-1][1]{'~lspacer'};
+ }
+ else {
+ $lineage[-1][1]{'~rspacer'}
+ = substr($lineage[-1][1]{'~lspacer'}, -1, 1);
+ chop $lineage[-1][1]{'~lspacer'};
+ }
+ }
+
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
# Keep the element from being childless
-
- pop @stack;
- pop @lineage;
- unless (@stack) { # not in an L if there are no open fcodes
+ if ($inL == @stack) {
+ $lineage[-1][1]{'raw'} = $raw;
$inL = 0;
- if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
- $lineage[-1][-1][1]{'raw'} = $raw
- }
}
+
+ pop @stack;
+ pop @lineage;
+
$raw .= $3.$4 if $inL;
-
+
} elsif(defined $5) {
DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n";
@@ -1944,6 +2235,11 @@ sub _treelet_from_formatting_codes {
push @{ $lineage[-1] }, ''; # keep it from being really childless
}
+ if ($inL == @stack) {
+ $lineage[-1][1]{'raw'} = $raw;
+ $inL = 0;
+ }
+
pop @stack;
pop @lineage;
} else {
@@ -1951,12 +2247,6 @@ sub _treelet_from_formatting_codes {
push @{ $lineage[-1] }, $5;
}
- unless (@stack) { # not in an L if there are no open fcodes
- $inL = 0;
- if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
- $lineage[-1][-1][1]{'raw'} = $raw
- }
- }
$raw .= $5 if $inL;
} elsif(defined $6) {
@@ -1965,6 +2255,7 @@ sub _treelet_from_formatting_codes {
$raw .= $6 if $inL;
# XXX does not capture multiplace whitespaces -- 'raw' ends up with
# at most 1 leading/trailing whitespace, why not all of it?
+ # Answer, because we deliberately trimmed it above
} else {
# should never ever ever ever happen
@@ -2095,7 +2386,7 @@ sub pretty { # adopted from Class::Classless
# letters, but I don't know if it has always worked without bugs. It
# seemed safest just to list the characters.
# s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
- s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
+ s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
<$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
#<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
qq{"$_"};
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm b/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm
index 83415f8e25..b30dd66296 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm
@@ -9,7 +9,7 @@ use Carp ();
use Pod::Simple::Methody ();
use Pod::Simple ();
use vars qw( @ISA $VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
@ISA = ('Pod::Simple::Methody');
BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
? \&Pod::Simple::DEBUG
@@ -88,8 +88,10 @@ sub end_item_text { $_[0]->emit_par(-2) }
sub emit_par {
return unless $_[0]{'Errata_seen'};
my($self, $tweak_indent) = splice(@_,0,2);
- my $indent = ' ' x ( 2 * $self->{'Indent'} + ($tweak_indent||0) );
+ my $length = 2 * $self->{'Indent'} + ($tweak_indent||0);
+ my $indent = ' ' x ($length > 0 ? $length : 0);
# Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0
+ # 'Negative repeat count does nothing' since 5.22
$self->{'Thispara'} =~ s/$Pod::Simple::shy//g;
my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n");
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm b/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm
index 428cc72359..aa714db47a 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm
@@ -2,7 +2,7 @@ require 5;
package Pod::Simple::Debug;
use strict;
use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
sub import {
my($value,$variable);
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
index 71bef5070b..2de11f19fb 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
@@ -1,7 +1,7 @@
require 5;
package Pod::Simple::DumpAsText;
-$VERSION = '3.35';
+$VERSION = '3.36';
use Pod::Simple ();
BEGIN {@ISA = ('Pod::Simple')}
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
index 9d84878cb7..b68597fb68 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
@@ -1,7 +1,7 @@
require 5;
package Pod::Simple::DumpAsXML;
-$VERSION = '3.35';
+$VERSION = '3.36';
use Pod::Simple ();
BEGIN {@ISA = ('Pod::Simple')}
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm b/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm
index 9cdbed217e..977e92ff32 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm
@@ -9,7 +9,7 @@ use vars qw(
$Doctype_decl $Content_decl
);
@ISA = ('Pod::Simple::PullParser');
-$VERSION = '3.35';
+$VERSION = '3.36';
BEGIN {
if(defined &DEBUG) { } # no-op
elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
@@ -29,7 +29,7 @@ $LamePad = '' unless defined $LamePad;
$Linearization_Limit = 120 unless defined $Linearization_Limit;
# headings/items longer than that won't get an <a name="...">
-$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?'
+$Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
unless defined $Perldoc_URL_Prefix;
$Perldoc_URL_Postfix = ''
unless defined $Perldoc_URL_Postfix;
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm b/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
index 661266d0de..58cd1ee9a8 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
@@ -5,7 +5,7 @@ use strict;
use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION
$CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA
);
-$VERSION = '3.35';
+$VERSION = '3.36';
@ISA = (); # Yup, we're NOT a subclass of Pod::Simple::HTML!
# TODO: nocontents stylesheets. Strike some of the color variations?
@@ -720,22 +720,21 @@ sub _gen_css_wad {
}
# Now a few indexless variations:
- foreach my $variation (
- 'blkbluw', # black_with_blue_on_white
- 'whtpurk', # white_with_purple_on_black
- 'whtgrng', # white_with_green_on_grey
- 'grygrnw', # grey_with_green_on_white
- ) {
- my $outname = $variation;
+ for (my ($outfile, $variation) = each %{{
+ blkbluw => 'black_with_blue_on_white',
+ whtpurk => 'white_with_purple_on_black',
+ whtgrng => 'white_with_green_on_grey',
+ grygrnw => 'grey_with_green_on_white',
+ }}) {
my $this_css = join "\n",
- "/* This file is autogenerated. Do not edit. $outname */\n",
+ "/* This file is autogenerated. Do not edit. $outfile */\n",
"\@import url(\"./_$variation.css\");",
".indexgroup { display: none; }",
"\n",
;
- my $name = $outname;
+ my $name = $outfile;
$name =~ tr/-_/ /;
- $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css);
+ $self->add_css( "_$outfile.css", 0, $name, 0, 0, \$this_css);
}
return;
@@ -1110,12 +1109,15 @@ Example:
=item $batchconv = Pod::Simple::HTMLBatch->new;
-This TODO
-
+This creates a new batch converter. The method doesn't take parameters.
+To change the converter's attributes, use the L<"/ACCESSOR METHODS">
+below.
=item $batchconv->batch_convert( I<indirs>, I<outdir> );
-this TODO
+This searches the directories given in I<indirs> and writes
+HTML files for each of these to a corresponding directory
+in I<outdir>. The directory I<outdir> must exist.
=item $batchconv->batch_convert( undef , ...);
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm b/cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
new file mode 100644
index 0000000000..c7ad3d6977
--- /dev/null
+++ b/cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
@@ -0,0 +1,362 @@
+use 5;
+package Pod::Simple::JustPod;
+# ABSTRACT: Pod::Simple formatter that extracts POD from a file containing
+# other things as well
+use strict;
+use warnings;
+
+use Pod::Simple::Methody ();
+our @ISA = ('Pod::Simple::Methody');
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+
+ $new->accept_targets('*');
+ $new->keep_encoding_directive(1);
+ $new->preserve_whitespace(1);
+ $new->complain_stderr(1);
+ $new->_output_is_for_JustPod(1);
+
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub check_that_all_is_closed {
+
+ # Actually checks that the things we depend on being balanced in fact are,
+ # so that we can continue in spit of pod errors
+
+ my $self = shift;
+ while ($self->{inL}) {
+ $self->end_L(@_);
+ }
+ while ($self->{fcode_end} && @{$self->{fcode_end}}) {
+ $self->_end_fcode(@_);
+ }
+}
+
+sub handle_text {
+
+ # Add text to the output buffer. This is skipped if within a L<>, as we use
+ # the 'raw' attribute of that tag instead.
+
+ $_[0]{buffer} .= $_[1] unless $_[0]{inL} ;
+}
+
+sub spacer {
+
+ # Prints the white space following things like =head1. This is normally a
+ # blank, unless BlackBox has told us otherwise.
+
+ my ($self, $arg) = @_;
+ return unless $arg;
+
+ my $spacer = ($arg->{'~orig_spacer'})
+ ? $arg->{'~orig_spacer'}
+ : " ";
+ $self->handle_text($spacer);
+}
+
+sub _generic_start {
+
+ # Called from tags like =head1, etc.
+
+ my ($self, $text, $arg) = @_;
+ $self->check_that_all_is_closed();
+ $self->handle_text($text);
+ $self->spacer($arg);
+}
+
+sub start_Document { shift->_generic_start("=pod\n\n"); }
+sub start_head1 { shift->_generic_start('=head1', @_); }
+sub start_head2 { shift->_generic_start('=head2', @_); }
+sub start_head3 { shift->_generic_start('=head3', @_); }
+sub start_head4 { shift->_generic_start('=head4', @_); }
+sub start_encoding { shift->_generic_start('=encoding', @_); }
+# sub start_Para
+# sub start_Verbatim
+
+sub start_item_bullet { # Handle =item *
+ my ($self, $arg) = @_;
+ $self->check_that_all_is_closed();
+ $self->handle_text('=item');
+
+ # It can be that they said simply '=item', and it is inferred that it is to
+ # be a bullet.
+ if (! $arg->{'~orig_content'}) {
+ $self->handle_text("\n\n");
+ }
+ else {
+ $self->spacer($arg);
+ if ($arg->{'~_freaky_para_hack'}) {
+
+ # See Message Id <87y3gtcwa2.fsf@hope.eyrie.org>
+ my $item_text = $arg->{'~orig_content'};
+ my $trailing = quotemeta $arg->{'~_freaky_para_hack'};
+ $item_text =~ s/$trailing$//;
+ $self->handle_text($item_text);
+ }
+ else {
+ $self->handle_text("*\n\n");
+ }
+ }
+}
+
+sub start_item_number { # Handle '=item 2'
+ my ($self, $arg) = @_;
+ $self->check_that_all_is_closed();
+ $self->handle_text("=item");
+ $self->spacer($arg);
+ $self->handle_text("$arg->{'~orig_content'}\n\n");
+}
+
+sub start_item_text { # Handle '=item foo bar baz'
+ my ($self, $arg) = @_;
+ $self->check_that_all_is_closed();
+ $self->handle_text('=item');
+ $self->spacer($arg);
+}
+
+sub _end_item {
+ my $self = shift;
+ $self->check_that_all_is_closed();
+ $self->emit;
+}
+
+*end_item_bullet = *_end_item;
+*end_item_number = *_end_item;
+*end_item_text = *_end_item;
+
+sub _start_over { # Handle =over
+ my ($self, $arg) = @_;
+ $self->check_that_all_is_closed();
+ $self->handle_text("=over");
+
+ # The =over amount is optional
+ if ($arg->{'~orig_content'}) {
+ $self->spacer($arg);
+ $self->handle_text("$arg->{'~orig_content'}");
+ }
+ $self->handle_text("\n\n");
+}
+
+*start_over_bullet = *_start_over;
+*start_over_number = *_start_over;
+*start_over_text = *_start_over;
+*start_over_block = *_start_over;
+
+sub _end_over {
+ my $self = shift;
+ $self->check_that_all_is_closed();
+ $self->handle_text('=back');
+ $self->emit;
+}
+
+*end_over_bullet = *_end_over;
+*end_over_number = *_end_over;
+*end_over_text = *_end_over;
+*end_over_block = *_end_over;
+
+sub end_Document {
+ my $self = shift;
+ $self->emit; # Make sure buffer gets flushed
+ print {$self->{'output_fh'} } "=cut\n"
+}
+
+sub _end_generic {
+ my $self = shift;
+ $self->check_that_all_is_closed();
+ $self->emit;
+}
+
+*end_head1 = *_end_generic;
+*end_head2 = *_end_generic;
+*end_head3 = *_end_generic;
+*end_head4 = *_end_generic;
+*end_encoding = *_end_generic;
+*end_Para = *_end_generic;
+*end_Verbatim = *_end_generic;
+
+sub _start_fcode {
+ my ($type, $self, $flags) = @_;
+
+ # How many brackets is set by BlackBox unless the count is 1
+ my $bracket_count = (exists $flags->{'~bracket_count'})
+ ? $flags->{'~bracket_count'}
+ : 1;
+ $self->handle_text($type . ( "<" x $bracket_count));
+
+ my $rspacer = "";
+ if ($bracket_count > 1) {
+ my $lspacer = (exists $flags->{'~lspacer'})
+ ? $flags->{'~lspacer'}
+ : " ";
+ $self->handle_text($lspacer);
+
+ $rspacer = (exists $flags->{'~rspacer'})
+ ? $flags->{'~rspacer'}
+ : " ";
+ }
+
+ # BlackBox doesn't output things for for the ending code callbacks, so save
+ # what we need.
+ push @{$self->{'fcode_end'}}, [ $bracket_count, $rspacer ];
+}
+
+sub start_B { _start_fcode('B', @_); }
+sub start_C { _start_fcode('C', @_); }
+sub start_E { _start_fcode('E', @_); }
+sub start_F { _start_fcode('F', @_); }
+sub start_I { _start_fcode('I', @_); }
+sub start_S { _start_fcode('S', @_); }
+sub start_X { _start_fcode('X', @_); }
+sub start_Z { _start_fcode('Z', @_); }
+
+sub _end_fcode {
+ my $self = shift;
+ my $fcode_end = pop @{$self->{'fcode_end'}};
+ my $bracket_count = 1;
+ my $rspacer = "";
+
+ if (! defined $fcode_end) { # If BlackBox is working, this shouldn't
+ # happen, but verify
+ $self->whine($self->{line_count}, "Extra '>'");
+ }
+ else {
+ $bracket_count = $fcode_end->[0];
+ $rspacer = $fcode_end->[1];
+ }
+
+ $self->handle_text($rspacer) if $bracket_count > 1;
+ $self->handle_text(">" x $bracket_count);
+}
+
+*end_B = *_end_fcode;
+*end_C = *_end_fcode;
+*end_E = *_end_fcode;
+*end_F = *_end_fcode;
+*end_I = *_end_fcode;
+*end_S = *_end_fcode;
+*end_X = *_end_fcode;
+*end_Z = *_end_fcode;
+
+sub start_L {
+ _start_fcode('L', @_);
+ $_[0]->handle_text($_[1]->{raw});
+ $_[0]->{inL}++
+}
+
+sub end_L {
+ my $self = shift;
+ $self->{inL}--;
+ if ($self->{inL} < 0) { # If BlackBox is working, this shouldn't
+ # happen, but verify
+ $self->whine($self->{line_count}, "Extra '>' ending L<>");
+ $self->{inL} = 0;
+ }
+
+ $self->_end_fcode(@_);
+}
+
+sub emit {
+ my $self = shift;
+
+ if ($self->{buffer} ne "") {
+ print { $self->{'output_fh'} } "",$self->{buffer} ,"\n\n";
+
+ $self->{buffer} = "";
+ }
+
+ return;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Pod::Simple::JustPod -- just the Pod, the whole Pod, and nothing but the Pod
+
+=head1 SYNOPSIS
+
+ my $infile = "mixed_code_and_pod.pm";
+ my $outfile = "just_the_pod.pod";
+ open my $fh, ">$outfile" or die "Can't write to $outfile: $!";
+
+ my $parser = Pod::Simple::JustPod->new();
+ $parser->output_fh($fh);
+ $parser->parse_file($infile);
+ close $fh or die "Can't close $outfile: $!";
+
+=head1 DESCRIPTION
+
+This class returns a copy of its input, translated into Perl's internal
+encoding (UTF-8), and with all the non-Pod lines removed.
+
+This is a subclass of L<Pod::Simple::Methody> and inherits all its methods.
+And since, that in turn is a subclass of L<Pod::Simple>, you can use any of
+its methods. This means you can output to a string instead of a file, or
+you can parse from an array.
+
+This class strives to return the Pod lines of the input completely unchanged,
+except for any necessary translation into Perl's internal encoding, and it makes
+no effort to return trailing spaces on lines; these likely will be stripped.
+If the input pod is well-formed with no warnings nor errors generated, the
+extracted pod should generate the same documentation when formatted by a Pod
+formatter as the original file does.
+
+By default, warnings are output to STDERR
+
+=head1 SEE ALSO
+
+L<Pod::Simple>, L<Pod::Simple::Methody>
+
+=head1 SUPPORT
+
+Questions or discussion about POD and Pod::Simple should be sent to the
+L<mailto:pod-people@perl.org> mail list. Send an empty email to
+L<mailto:pod-people-subscribe@perl.org> to subscribe.
+
+This module is managed in an open GitHub repository,
+L<https://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
+to clone L<git://github.com/theory/pod-simple.git> and send patches!
+
+Patches against Pod::Simple are welcome. Please send bug reports to
+L<mailto:<bug-pod-simple@rt.cpan.org>.
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2002 Sean M. Burke.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
+But don't bother him, he's retired.
+
+Pod::Simple is maintained by:
+
+=over
+
+=item * Allison Randal C<allison@perl.org>
+
+=item * Hans Dieter Pearcey C<hdp@cpan.org>
+
+=item * David E. Wheeler C<dwheeler@cpan.org>
+
+=back
+
+Pod::Simple::JustPod was developed by John SJ Anderson
+C<genehack@genehack.org>, with contributions from Karl Williamson
+C<khw@cpan.org>.
+
+=cut
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm b/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm
index 04612f202e..4b8e34fdd1 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm
@@ -2,13 +2,11 @@
require 5;
package Pod::Simple::LinkSection;
# Based somewhat dimly on Array::Autojoin
-use vars qw($VERSION );
-$VERSION = '3.35';
use strict;
use Pod::Simple::BlackBox;
use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
use overload( # So it'll stringify nice
'""' => \&Pod::Simple::BlackBox::stringify_lol,
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm b/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm
index 67b8706741..993f6e4a67 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm
@@ -4,7 +4,7 @@ package Pod::Simple::Methody;
use strict;
use Pod::Simple ();
use vars qw(@ISA $VERSION);
-$VERSION = '3.35';
+$VERSION = '3.36';
@ISA = ('Pod::Simple');
# Yes, we could use named variables, but I want this to be impose
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm b/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm
index 0c18a5b37d..fa983240d2 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm
@@ -1,7 +1,7 @@
require 5;
package Pod::Simple::Progress;
-$VERSION = '3.35';
+$VERSION = '3.36';
use strict;
# Objects of this class are used for noting progress of an
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm b/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm
index 7c326ec6ae..672c6fc47e 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm
@@ -1,6 +1,6 @@
require 5;
package Pod::Simple::PullParser;
-$VERSION = '3.35';
+$VERSION = '3.36';
use Pod::Simple ();
BEGIN {@ISA = ('Pod::Simple')}
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm b/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm
index d3066a8e87..b3196e49cb 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm
@@ -5,7 +5,7 @@ use Pod::Simple::PullParserToken ();
use strict;
use vars qw(@ISA $VERSION);
@ISA = ('Pod::Simple::PullParserToken');
-$VERSION = '3.35';
+$VERSION = '3.36';
sub new { # Class->new(tagname);
my $class = shift;
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm b/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm
index d938e0adb2..01670470b1 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm
@@ -5,7 +5,7 @@ use Pod::Simple::PullParserToken ();
use strict;
use vars qw(@ISA $VERSION);
@ISA = ('Pod::Simple::PullParserToken');
-$VERSION = '3.35';
+$VERSION = '3.36';
sub new { # Class->new(tagname, optional_attrhash);
my $class = shift;
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm b/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm
index a11ce0fd92..5cdd3baa0d 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm
@@ -5,7 +5,7 @@ use Pod::Simple::PullParserToken ();
use strict;
use vars qw(@ISA $VERSION);
@ISA = ('Pod::Simple::PullParserToken');
-$VERSION = '3.35';
+$VERSION = '3.36';
sub new { # Class->new(text);
my $class = shift;
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm b/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm
index c6618168e6..75044d6fab 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm
@@ -3,7 +3,7 @@ require 5;
package Pod::Simple::PullParserToken;
# Base class for tokens gotten from Pod::Simple::PullParser's $parser->get_token
@ISA = ();
-$VERSION = '3.35';
+$VERSION = '3.36';
use strict;
sub new { # Class->new('type', stuff...); ## Overridden in derived classes anyway
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm b/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm
index 153c3d3e28..9c4a8e3835 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm
@@ -8,24 +8,67 @@ package Pod::Simple::RTF;
use strict;
use vars qw($VERSION @ISA %Escape $WRAP %Tagmap);
-$VERSION = '3.35';
+$VERSION = '3.36';
use Pod::Simple::PullParser ();
BEGIN {@ISA = ('Pod::Simple::PullParser')}
use Carp ();
BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
+sub to_uni ($) { # Convert native code point to Unicode
+ my $x = shift;
+
+ # Broken for early EBCDICs
+ $x = chr utf8::native_to_unicode(ord $x) if $] ge 5.007_003
+ && ord("A") != 65;
+ return $x;
+}
+
+# We escape out 'F' so that we can send RTF files thru the mail without the
+# slightest worry that paragraphs beginning with "From" will get munged.
+# We also escape '\', '{', '}', and '_'
+my $map_to_self = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEGHIJKLMNOPQRSTUVWXYZ[]^`abcdefghijklmnopqrstuvwxyz|~';
+
$WRAP = 1 unless defined $WRAP;
+%Escape = (
+
+ # Start with every character mapping to its hex equivalent
+ map( (chr($_) => sprintf("\\'%02x", $_)), 0 .. 0xFF),
+
+ # Override most ASCII printables with themselves (or on non-ASCII platforms,
+ # their ASCII values. This is because the output is UTF-16, which is always
+ # based on Unicode code points)
+ map( ( substr($map_to_self, $_, 1)
+ => to_uni(substr($map_to_self, $_, 1))), 0 .. length($map_to_self) - 1),
+
+ # And some refinements:
+ "\r" => "\n",
+ "\cj" => "\n",
+ "\n" => "\n\\line ",
+
+ "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay)
+ "\f" => "\n\\page\n", # Formfeed
+ "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen
+ $Pod::Simple::nbsp => "\\~", # Latin-1 non-breaking space
+ $Pod::Simple::shy => "\\-", # Latin-1 soft (optional) hyphen
-# These are broken for early Perls on EBCDIC; they could be fixed to work
-# better there, but not worth it. These are part of a larger [...] class, so
-# are just the strings to substitute into it, as opposed to compiled patterns.
-my $cntrl = '[:cntrl:]';
-$cntrl = '\x00-\x1F\x7F' unless eval "qr/[$cntrl]/";
+ # CRAZY HACKS:
+ "\n" => "\\line\n",
+ "\r" => "\n",
+ "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1
+ "\cc" => "}",
+);
-my $not_ascii = '[:^ascii:]';
-$not_ascii = '\x80-\xFF' unless eval "qr/[$not_ascii]/";
+# Generate a string of all the characters in %Escape that don't map to
+# themselves. First, one without the hyphen, then one with.
+my $escaped_sans_hyphen = "";
+$escaped_sans_hyphen .= $_ for grep { $_ ne $Escape{$_} && $_ ne '-' }
+ sort keys %Escape;
+my $escaped = "-$escaped_sans_hyphen";
+# Then convert to patterns
+$escaped_sans_hyphen = qr/[\Q$escaped_sans_hyphen \E]/;
+$escaped= qr/[\Q$escaped\E]/;
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -158,6 +201,13 @@ sub run {
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Match something like an identifier. Prefer XID if available, then plain ID,
+# then just ASCII
+my $id_re = Pod::Simple::BlackBox::my_qr('[\'_\p{XIDS}][\'\p{XIDC}]+', "ab");
+$id_re = Pod::Simple::BlackBox::my_qr('[\'_\p{IDS}][\'\p{IDC}]+', "ab")
+ unless $id_re;
+$id_re = qr/['_a-zA-Z]['a-zA-Z0-9_]+/ unless $id_re;
+
sub do_middle { # the main work
my $self = $_[0];
my $fh = $self->{'output_fh'};
@@ -172,7 +222,7 @@ sub do_middle { # the main work
if( ($type = $token->type) eq 'text' ) {
if( $self->{'rtfverbatim'} ) {
DEBUG > 1 and print STDERR " $type " , $token->text, " in verbatim!\n";
- rtf_esc_codely($scratch = $token->text);
+ rtf_esc(0, $scratch = $token->text); # 0 => Don't escape hyphen
print $fh $scratch;
next;
}
@@ -195,13 +245,13 @@ sub do_middle { # the main work
|
# or starting alpha, but containing anything strange:
(?:
- [a-zA-Z'${not_ascii}]+[\$\@\:_<>\(\\\*]\S+
+ ${id_re}[\$\@\:_<>\(\\\*]\S+
)
)
/\cb$1\cc/xsg
;
- rtf_esc($scratch);
+ rtf_esc(1, $scratch); # 1 => escape hyphen
$scratch =~
s/(
[^\r\n]{65} # Snare 65 characters from a line
@@ -311,7 +361,7 @@ sub do_middle { # the main work
print $fh $token->attr('number'), ". \n";
} elsif ($tagname eq 'item-bullet') {
print $fh "\\'", ord("_"), "\n";
- #for funky testing: print $fh '', rtf_esc("\x{4E4B}\x{9053}");
+ #for funky testing: print $fh '', rtf_esc(1, "\x{4E4B}\x{9053}");
}
} elsif( $type eq 'end' ) {
@@ -465,7 +515,7 @@ sub doc_start {
# catches the most common case, at least
DEBUG and print STDERR "Title0: <$title>\n";
- $title = rtf_esc($title);
+ $title = rtf_esc(1, $title); # 1 => escape hyphen
DEBUG and print STDERR "Title1: <$title>\n";
$title = '\lang1024\noproof ' . $title
if $is_obviously_module_name;
@@ -489,90 +539,69 @@ END
#-------------------------------------------------------------------------
use integer;
-sub rtf_esc {
- my $x; # scratch
- if(!defined wantarray) { # void context: alter in-place!
- for(@_) {
- s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
- s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
- }
- return;
- } elsif(wantarray) { # return an array
- return map {; ($x = $_) =~
- s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
- $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
- $x;
- } @_;
- } else { # return a single scalar
- ($x = ((@_ == 1) ? $_[0] : join '', @_)
- ) =~ s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
- # Escape \, {, }, -, control chars, and 7f-ff.
- $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+
+my $question_mark_code_points =
+ Pod::Simple::BlackBox::my_qr('([^\x00-\x{D7FF}\x{E000}-\x{10FFFF}])',
+ "\x{110000}");
+my $plane0 =
+ Pod::Simple::BlackBox::my_qr('([\x{100}-\x{FFFF}])', "\x{100}");
+my $other_unicode =
+ Pod::Simple::BlackBox::my_qr('([\x{10000}-\x{10FFFF}])', "\x{10000}");
+
+sub esc_uni($) {
+ use if $] le 5.006002, 'utf8';
+
+ my $x = shift;
+
+ # The output is expected to be UTF-16. Surrogates and above-Unicode get
+ # mapped to '?'
+ $x =~ s/$question_mark_code_points/?/g if $question_mark_code_points;
+
+ # Non-surrogate Plane 0 characters get mapped to their code points. But
+ # the standard calls for a 16bit SIGNED value.
+ $x =~ s/$plane0/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg
+ if $plane0;
+
+ # Use surrogate pairs for the rest
+ $x =~ s/$other_unicode/'\\uc1\\u' . ((ord($1) >> 10) + 0xD7C0 - 65536) . '\\u' . (((ord$1) & 0x03FF) + 0xDC00 - 65536) . '?'/eg if $other_unicode;
+
return $x;
- }
}
-sub rtf_esc_codely {
- # Doesn't change "-" to hard-hyphen, nor apply computerese style-smarts.
- # We don't want to change the "-" to hard-hyphen, because we want to
+sub rtf_esc ($$) {
+ # The parameter is true if we should escape hyphens
+ my $escape_re = ((shift) ? $escaped : $escaped_sans_hyphen);
+
+ # When false, it doesn't change "-" to hard-hyphen.
+ # We don't want to change the "-" to hard-hyphen, because we want to
# be able to paste this into a file and run it without there being
# dire screaming about the mysterious hard-hyphen character (which
# looks just like a normal dash character).
-
+ # XXX The comments used to claim that when false it didn't apply computerese
+ # style-smarts, but khw didn't see this actually
+
my $x; # scratch
if(!defined wantarray) { # void context: alter in-place!
for(@_) {
- s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
- s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ s/($escape_re)/$Escape{$1}/g; # ESCAPER
+ $_ = esc_uni($_);
}
return;
} elsif(wantarray) { # return an array
return map {; ($x = $_) =~
- s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
- $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ s/($escape_re)/$Escape{$1}/g; # ESCAPER
+ $x = esc_uni($x);
$x;
} @_;
} else { # return a single scalar
($x = ((@_ == 1) ? $_[0] : join '', @_)
- ) =~ s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
+ ) =~ s/($escape_re)/$Escape{$1}/g; # ESCAPER
# Escape \, {, }, -, control chars, and 7f-ff.
- $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+ $x = esc_uni($x);
return $x;
}
}
-%Escape = (
- (($] lt 5.007_003) # Broken for non-ASCII on early Perls
- ? (map( (chr($_),chr($_)), # things not apparently needing escaping
- 0x20 .. 0x7E ),
- map( (chr($_),sprintf("\\'%02x", $_)), # apparently escapeworthy things
- 0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46))
- : (map( (chr(utf8::unicode_to_native($_)),chr(utf8::unicode_to_native($_))),
- 0x20 .. 0x7E ),
- map( (chr($_),sprintf("\\'%02x", utf8::unicode_to_native($_))),
- 0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46))),
-
- # We get to escape out 'F' so that we can send RTF files thru the mail
- # without the slightest worry that paragraphs beginning with "From"
- # will get munged.
-
- # And some refinements:
- "\r" => "\n",
- "\cj" => "\n",
- "\n" => "\n\\line ",
-
- "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay)
- "\f" => "\n\\page\n", # Formfeed
- "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen
- $Pod::Simple::nbsp => "\\~", # Latin-1 non-breaking space
- $Pod::Simple::shy => "\\-", # Latin-1 soft (optional) hyphen
-
- # CRAZY HACKS:
- "\n" => "\\line\n",
- "\r" => "\n",
- "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1
- "\cc" => "}",
-);
1;
__END__
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Search.pm b/cpan/Pod-Simple/lib/Pod/Simple/Search.pm
index df499cacf2..37c8e24c6b 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Search.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Search.pm
@@ -3,7 +3,7 @@ package Pod::Simple::Search;
use strict;
use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY);
-$VERSION = '3.35'; ## Current version of this package
+$VERSION = '3.36'; ## Current version of this package
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level
use Carp ();
@@ -546,7 +546,7 @@ sub _limit_glob_to_limit_re {
sub _actual_filenames {
my $dir = shift;
my $fn = lc shift;
- opendir my $dh, $dir or return;
+ opendir my ($dh), $dir or return;
return map { File::Spec->catdir($dir, $_) }
grep { lc $_ eq $fn } readdir $dh;
}
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm b/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm
index bff5af84c4..a6cdc8693c 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm
@@ -5,7 +5,7 @@ use strict;
use Carp ();
use Pod::Simple ();
use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS);
-$VERSION = '3.35';
+$VERSION = '3.36';
BEGIN {
@ISA = ('Pod::Simple');
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG;
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod b/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod
index 88f85e86de..f9cb09a52e 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod
@@ -98,9 +98,14 @@ nodes that represent preformatted text (from verbatim sections).
TODO intro... mention that events are supplied for implicits, like for
missing >'s
-
In the following section, we use XML to represent the event structure
-associated with a particular construct. That is, TODO
+associated with a particular construct. That is, an opening tag
+represents the element start, the attributes of that opening tag are
+the attributes given to the callback, and the closing tag represents
+the end element.
+
+Three callback methods must be supplied by a class extending
+L<Pod::Simple> to receive the corresponding event:
=over
@@ -112,8 +117,9 @@ associated with a particular construct. That is, TODO
=back
-TODO describe
-
+Here's the comprehensive list of values you can expect as
+I<element_name> in your implementation of C<_handle_element_start>
+and C<_handle_element_end>::
=over
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Text.pm b/cpan/Pod-Simple/lib/Pod/Simple/Text.pm
index 66e15f48cc..35166d7309 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Text.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Text.pm
@@ -6,7 +6,7 @@ use Carp ();
use Pod::Simple::Methody ();
use Pod::Simple ();
use vars qw( @ISA $VERSION $FREAKYMODE);
-$VERSION = '3.35';
+$VERSION = '3.36';
@ISA = ('Pod::Simple::Methody');
BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
? \&Pod::Simple::DEBUG
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm b/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm
index 980612b313..5db95ccc17 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm
@@ -6,7 +6,7 @@ use strict;
use Carp ();
use Pod::Simple ();
use vars qw( @ISA $VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
@ISA = ('Pod::Simple');
sub new {
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm b/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm
index a7364dfa58..277a321b44 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm
@@ -4,7 +4,7 @@ package Pod::Simple::TiedOutFH;
use Symbol ('gensym');
use Carp ();
use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm b/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm
index a4bb29ffdb..6b4a43fdbd 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm
@@ -3,7 +3,7 @@ require 5;
package Pod::Simple::Transcode;
use strict;
use vars qw($VERSION @ISA);
-$VERSION = '3.35';
+$VERSION = '3.36';
BEGIN {
if(defined &DEBUG) {;} # Okay
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm b/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm
index c206905657..dfded058d2 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm
@@ -5,7 +5,7 @@ require 5;
package Pod::Simple::TranscodeDumb;
use strict;
use vars qw($VERSION %Supported);
-$VERSION = '3.35';
+$VERSION = '3.36';
# This module basically pretends it knows how to transcode, except
# only for null-transcodings! We use this when Encode isn't
# available.
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm b/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm
index e4d4f7eb60..c0ae1c6ac0 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm
@@ -9,7 +9,7 @@ use strict;
use Pod::Simple;
require Encode;
use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
sub is_dumb {0}
sub is_smart {1}
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm b/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
index 8c2cf1a01b..10f9d52cae 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm
@@ -45,7 +45,7 @@ declare the output character set as UTF-8 before parsing, like so:
package Pod::Simple::XHTML;
use strict;
use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
-$VERSION = '3.35';
+$VERSION = '3.36';
use Pod::Simple::Methody ();
@ISA = ('Pod::Simple::Methody');
@@ -92,7 +92,7 @@ the call to C<parse_file>:
In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
to put before the "Foo%3a%3aBar". The default value is
-"http://search.cpan.org/perldoc?".
+"https://metacpan.org/pod/".
=head2 perldoc_url_postfix
@@ -247,7 +247,7 @@ sub new {
my $self = shift;
my $new = $self->SUPER::new(@_);
$new->{'output_fh'} ||= *STDOUT{IO};
- $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
+ $new->perldoc_url_prefix('https://metacpan.org/pod/');
$new->man_url_prefix('http://man.he.net/man');
$new->html_charset('ISO-8859-1');
$new->nix_X_codes(1);
@@ -685,8 +685,8 @@ sub emit {
Resolves a POD link target (typically a module or POD file name) and section
name to a URL. The resulting link will be returned for the above examples as:
- http://search.cpan.org/perldoc?Net::Ping#INSTALL
- http://search.cpan.org/perldoc?perlpodspec
+ https://metacpan.org/pod/Net::Ping#INSTALL
+ https://metacpan.org/pod/perlpodspec
#SYNOPSIS
Note that when there is only a section argument the URL will simply be a link
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm b/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm
index 62fe39549d..856b308bcb 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm
@@ -5,7 +5,7 @@ use strict;
use Carp ();
use Pod::Simple ();
use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS);
-$VERSION = '3.35';
+$VERSION = '3.36';
BEGIN {
@ISA = ('Pod::Simple');
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG;
diff --git a/cpan/Pod-Simple/t/00about.t b/cpan/Pod-Simple/t/00about.t
index e5e7038e38..70fcffe12e 100644
--- a/cpan/Pod-Simple/t/00about.t
+++ b/cpan/Pod-Simple/t/00about.t
@@ -23,7 +23,7 @@ Pod::Simple
Pod::Simple::BlackBox Pod::Simple::Checker Pod::Simple::DumpAsText
Pod::Simple::DumpAsXML Pod::Simple::HTML Pod::Simple::HTMLBatch
Pod::Simple::HTMLLegacy Pod::Simple::LinkSection Pod::Simple::Methody
-Pod::Simple::Progress Pod::Simple::PullParser
+Pod::Simple::JustPod Pod::Simple::Progress Pod::Simple::PullParser
Pod::Simple::PullParserEndToken Pod::Simple::PullParserStartToken
Pod::Simple::PullParserTextToken Pod::Simple::PullParserToken
Pod::Simple::RTF Pod::Simple::Search Pod::Simple::SimpleTree
diff --git a/cpan/Pod-Simple/t/JustPod01.t b/cpan/Pod-Simple/t/JustPod01.t
new file mode 100644
index 0000000000..c74b3370cb
--- /dev/null
+++ b/cpan/Pod-Simple/t/JustPod01.t
@@ -0,0 +1,219 @@
+#! user/bin/perl -w
+
+# t/JustPod01.t - check basics of Pod::Simple::JustPod
+
+BEGIN {
+ chdir 't' if -d 't';
+}
+
+use strict;
+use lib '../lib';
+use Test::More tests => 2;
+
+use warnings;
+use utf8;
+
+use_ok('Pod::Simple::JustPod') or exit;
+
+my $parser = Pod::Simple::JustPod->new();
+
+my $input;
+while ( <DATA> ) { $input .= $_ }
+
+my $output;
+$parser->output_string( \$output );
+$parser->parse_string_document( $input );
+
+# Strip off text before =pod in the input
+$input =~ s/^.*(=pod.*)$/$1/mgs;
+
+my $msg = "got expected output";
+if ($output eq $input) {
+ pass($msg);
+}
+elsif ($ENV{PERL_TEST_DIFF}) {
+ fail($msg);
+ require File::Temp;
+ my $orig_file = File::Temp->new();
+ local $/ = "\n";
+ chomp $input;
+ print $orig_file $input, "\n";
+ close $orig_file || die "Can't close orig_file: $!";
+
+ chomp $output;
+ my $parsed_file = File::Temp->new();
+ print $parsed_file $output, "\n";
+ close $parsed_file || die "Can't close parsed_file";
+
+ my $diff = File::Temp->new();
+ system("$ENV{PERL_TEST_DIFF} $orig_file $parsed_file > $diff");
+
+ open my $fh, "<", $diff || die "Can't open $diff";
+ my @diffs = <$fh>;
+ diag(@diffs);
+}
+else {
+ eval { require Text::Diff; };
+ if ($@) {
+ is($output, $input, $msg);
+ diag("Set environment variable PERL_TEST_DIFF=diff_tool or install"
+ . " Text::Diff to see just the differences.");
+ }
+ else {
+ fail($msg);
+ diag Text::Diff::diff(\$input, \$output, { STYLE => 'Unified' });
+ }
+}
+
+
+__DATA__
+package utf8::all;
+use strict;
+use warnings;
+use 5.010; # state
+# ABSTRACT: turn on Unicode - all of it
+our $VERSION = '0.010'; # VERSION
+
+
+use Import::Into;
+use parent qw(Encode charnames utf8 open warnings feature);
+
+sub import {
+ my $target = caller;
+ 'utf8'->import::into($target);
+ 'open'->import::into($target, qw{:encoding(UTF-8) :std});
+ 'charnames'->import::into($target, qw{:full :short});
+ 'warnings'->import::into($target, qw{FATAL utf8});
+ 'feature'->import::into($target, qw{unicode_strings}) if $^V >= v5.11.0;
+ 'feature'->import::into($target, qw{unicode_eval fc}) if $^V >= v5.16.0;
+
+ {
+ no strict qw(refs); ## no critic (TestingAndDebugging::ProhibitNoStrict)
+ *{$target . '::readdir'} = \&_utf8_readdir;
+ }
+
+ # utf8 in @ARGV
+ state $have_encoded_argv = 0;
+ _encode_argv() unless $have_encoded_argv++;
+
+ $^H{'utf8::all'} = 1;
+
+ return;
+}
+
+sub _encode_argv {
+ $_ = Encode::decode('UTF-8', $_) for @ARGV;
+ return;
+}
+
+sub _utf8_readdir(*) { ## no critic (Subroutines::ProhibitSubroutinePrototypes)
+ my $handle = shift;
+ if (wantarray) {
+ my @all_files = CORE::readdir($handle);
+ $_ = Encode::decode('UTF-8', $_) for @all_files;
+ return @all_files;
+ }
+ else {
+ my $next_file = CORE::readdir($handle);
+ $next_file = Encode::decode('UTF-8', $next_file);
+ return $next_file;
+ }
+}
+
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+utf8::all - turn on Unicode - all of it
+
+=head1 VERSION
+
+version 0.010
+
+=head1 SYNOPSIS
+
+ use utf8::all; # Turn on UTF-8. All of it.
+
+ open my $in, '<', 'contains-utf8'; # UTF-8 already turned on here
+ print length 'føø bār'; # 7 UTF-8 characters
+ my $utf8_arg = shift @ARGV; # @ARGV is UTF-8 too!
+
+=head1 DESCRIPTION
+
+L<utf8> allows you to write your Perl encoded in UTF-8. That means UTF-8
+strings, variable names, and regular expressions. C<utf8::all> goes further, and
+makes C<@ARGV> encoded in UTF-8, and filehandles are opened with UTF-8 encoding
+turned on by default (including STDIN, STDOUT, STDERR), and charnames are
+imported so C<\N{...}> sequences can be used to compile Unicode characters based
+on names. If you I<don't> want UTF-8 for a particular filehandle, you'll have to
+set C<binmode $filehandle>.
+
+The pragma is lexically-scoped, so you can do the following if you had some
+reason to:
+
+ {
+ use utf8::all;
+ open my $out, '>', 'outfile';
+ my $utf8_str = 'føø bār';
+ print length $utf8_str, "\n"; # 7
+ print $out $utf8_str; # out as utf8
+ }
+ open my $in, '<', 'outfile'; # in as raw
+ my $text = do { local $/; <$in>};
+ print length $text, "\n"; # 10, not 7!
+
+=head1 INTERACTION WITH AUTODIE
+
+If you use L<autodie>, which is a great idea, you need to use at least version
+B<2.12>, released on L<June 26, 2012|https://metacpan.org/source/PJF/autodie-2.12/Changes#L3>.
+Otherwise, autodie obliterates the IO layers set by the L<open> pragma. See
+L<RT #54777|https://rt.cpan.org/Ticket/Display.html?id=54777> and
+L<GH #7|https://github.com/doherty/utf8-all/issues/7>.
+
+=head1 AVAILABILITY
+
+The project homepage is L<http://metacpan.org/release/utf8-all/>.
+
+The latest version of this module is available from the Comprehensive Perl
+Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
+site near you, or see L<https://metacpan.org/module/utf8::all/>.
+
+=head1 SOURCE
+
+The development version is on github at L<http://github.com/doherty/utf8-all>
+and may be cloned from L<git://github.com/doherty/utf8-all.git>
+
+=head1 BUGS AND LIMITATIONS
+
+You can make new bug reports, and view existing ones, through the
+web interface at L<https://github.com/doherty/utf8-all/issues>.
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+Michael Schwern <mschwern@cpan.org>
+
+=item *
+
+Mike Doherty <doherty@cpan.org>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2009 by Michael Schwern <mschwern@cpan.org>.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/cpan/Pod-Simple/t/JustPod02.t b/cpan/Pod-Simple/t/JustPod02.t
new file mode 100644
index 0000000000..8205aecaa0
--- /dev/null
+++ b/cpan/Pod-Simple/t/JustPod02.t
@@ -0,0 +1,445 @@
+use strict;
+use warnings;
+
+use Test::More;
+
+BEGIN { plan tests => 1 }
+
+use Pod::Simple::JustPod;
+
+my @orig = <DATA>;
+my $parsed;
+
+my $parser = Pod::Simple::JustPod->new();
+$parser->output_string(\$parsed);
+$parser->parse_lines(@orig, undef);
+
+my $orig = join "", @orig;
+
+my $msg = "Verify parsed pod sufficiently matches original";
+if ($parsed eq $orig) {
+ pass($msg);
+}
+elsif ($ENV{PERL_TEST_DIFF}) {
+ fail($msg);
+ require File::Temp;
+ my $orig_file = File::Temp->new();
+ local $/ = "\n";
+ chomp $orig;
+ print $orig_file $orig, "\n";
+ close $orig_file || die "Can't close orig_file: $!";
+
+ chomp $parsed;
+ my $parsed_file = File::Temp->new();
+ print $parsed_file $parsed, "\n";
+ close $parsed_file || die "Can't close parsed_file";
+
+ my $diff = File::Temp->new();
+ system("$ENV{PERL_TEST_DIFF} $orig_file $parsed_file > $diff");
+
+ open my $fh, "<", $diff || die "Can't open $diff";
+ my @diffs = <$fh>;
+ diag(@diffs);
+}
+else {
+ eval { require Text::Diff; };
+ if ($@) {
+ is($parsed, $orig, $msg);
+ diag("Set environment variable PERL_TEST_DIFF=diff_tool or install"
+ . " Text::Diff to see just the differences.");
+ }
+ else {
+ fail($msg);
+ diag Text::Diff::diff(\$orig, \$parsed, { STYLE => 'Unified' });
+ }
+}
+
+# The data is adapted from a test file from pod2lators. Extra spaces are
+# added in places to make sure they get retained, and some extra tests
+__DATA__
+=pod
+
+=encoding ASCII
+
+=head1 NAME
+
+basic.pod - Test of various basic POD features in translators.
+
+=head1 HEADINGS
+
+Try a few different levels of headings, with embedded formatting codes and
+other interesting bits.
+
+=head1 This C<is> a "level 1" heading
+
+=head2 ``Level'' "2 I<heading>
+
+=head3 Level 3 B<heading I<with C<weird F<stuff "" (double quote)>>>>
+
+=head4 Level "4 C<heading>
+
+Now try again with B<intermixed> F<text>.
+
+=head1 This C<is> a "level 1" heading
+
+Text.
+
+=head2 ``Level'' 2 I<heading>
+
+Text.
+
+=head3 Level 3 B<heading I<with C<weird F<stuff>>>>
+
+Text.
+
+=head4 Level "4 C<heading>
+
+Text.
+
+=head1 LINKS
+
+These are all taken from the Pod::Parser tests.
+
+Try out I<LOTS> of different ways of specifying references:
+
+Reference the L<manpage/section>
+
+Reference the L<"manpage"/section>
+
+Reference the L<manpage/"section">
+
+Now try it using the new "|" stuff ...
+
+Reference the L<thistext|manpage/section>|
+
+Reference the L<thistext | manpage / section>|
+
+Reference the L<thistext| manpage/ section>|
+
+Reference the L<thistext |manpage /section>|
+
+Reference the L<thistext|manpage/"section">|
+
+Reference the L<thistext|
+manpage/
+section>|
+
+And then throw in a few new ones of my own.
+
+L<foo>
+
+L<foo|bar>
+
+L<foo/bar>
+
+L<foo/"baz boo">
+
+L</bar>
+
+L</"baz boo">
+
+L</baz boo>
+
+L<foo bar/baz boo>
+
+L<"boo var baz">
+
+L<bar baz>
+
+L</boo>, L</bar>, and L</baz>
+
+L<fooZ<>bar>
+
+L<Testing I<italics>|foo/bar>
+
+L<foo/I<Italic> text>
+
+L<fooE<verbar>barZ<>/Section C<with> I<B<other> markup>>
+
+=head1 OVER AND ITEMS
+
+Taken from Pod::Parser tests, this is a test to ensure that multiline
+=item paragraphs get indented appropriately.
+
+=over 4
+
+=item This
+is
+a
+test.
+
+=back
+
+There should be whitespace now before this line.
+
+Taken from Pod::Parser tests, this is a test to ensure the nested =item
+paragraphs get indented appropriately.
+
+=over 2
+
+=item 1
+
+First section.
+
+=over 2
+
+=item a
+
+this is item a
+
+=item b
+
+this is item b
+
+=back
+
+=item 2
+
+Second section.
+
+=over 2
+
+=item a
+
+this is item a
+
+=item b
+
+this is item b
+
+=item c
+
+=item d
+
+This is item c & d.
+
+=back
+
+=back
+
+Now some additional weirdness of our own. Make sure that multiple tags
+for one paragraph are properly compacted.
+
+=over 4
+
+=item "foo"
+
+=item B<bar>
+
+=item C<baz>
+
+There shouldn't be any spaces between any of these item tags; this idiom
+is used in perlfunc.
+
+=item Some longer item text
+
+Just to make sure that we test paragraphs where the item text doesn't fit
+in the margin of the paragraph (and make sure that this paragraph fills a
+few lines).
+
+Let's also make it multiple paragraphs to be sure that works.
+
+=back
+
+Test use of =over without =item as a block "quote" or block paragraph.
+
+=over 4
+
+This should be indented four spaces but otherwise formatted the same as
+any other regular text paragraph. Make sure it's long enough to see the
+results of the formatting.....
+
+=back
+
+Now try the same thing nested, and make sure that the indentation is reset
+back properly.
+
+=over 4
+
+=over 4
+
+This paragraph should be doubly indented.
+
+=back
+
+This paragraph should only be singly indented.
+
+=over 4
+
+=item
+
+This is an item in the middle of a block-quote, which should be allowed.
+
+=item
+
+We're also testing tagless item commands.
+
+=back
+
+Should be back to the single level of indentation.
+
+=back
+
+Should be back to regular indentation.
+
+Now also check the transformation of * into real bullets for man pages.
+
+=over
+
+=item *
+
+An item. We're also testing using =over without a number, and making sure
+that item text wraps properly.
+
+=item *
+
+Another item.
+
+=back
+
+and now test the numbering of item blocks.
+
+=over 4
+
+=item 1.
+
+First item.
+
+=item 2.
+
+Second item.
+
+=back
+
+=head1 FORMATTING CODES
+
+Another test taken from Pod::Parser.
+
+This is a test to see if I can do not only C<$self> and C<method()>, but
+also C<< $self->method() >> and C<< $self->{FIELDNAME} >> and
+C<< $Foo <=> $Bar >> without resorting to escape sequences. If
+I want to refer to the right-shift operator I can do something
+like C<<< $x >> 3 >>> or even C<<<< $y >> 5 >>>>.
+
+Now for the grand finale of C<< $self->method()->{FIELDNAME} = {FOO=>BAR} >>.
+And I also want to make sure that newlines work like this
+C<<<
+$self->{FOOBAR} >> 3 and [$b => $a]->[$a <=> $b]
+>>>
+
+Of course I should still be able to do all this I<with> escape sequences
+too: C<$self-E<gt>method()> and C<$self-E<gt>{FIELDNAME}> and
+C<{FOO=E<gt>BAR}>.
+
+Dont forget C<$self-E<gt>method()-E<gt>{FIELDNAME} = {FOO=E<gt>BAR}>.
+
+And make sure that C<0> works too!
+
+Now, if I use << or >> as my delimiters, then I have to use whitespace.
+So things like C<<$self->method()>> and C<<$self->{FIELDNAME}>> wont end
+up doing what you might expect since the first > will still terminate
+the first < seen.
+
+Lets make sure these work for empty ones too, like C<<< >>>,
+C<<<<
+>>>>, and C<< >> >> (just to be obnoxious)
+
+The statement: C<This is dog kind's I<finest> hour!> is a parody of a
+quotation from Winston Churchill.
+
+The following tests are added to those:
+
+Make sure that a few othZ<>er odd I<Z<>things> still work. This should be
+a vertical bar: E<verbar>. Here's a test of a few more special escapes
+that have to be supported:
+
+=over 3
+
+=item E<amp>
+
+An ampersand.
+
+=item E<apos>
+
+An apostrophe.
+
+=item E<lt>
+
+A less-than sign.
+
+=item E<gt>
+
+A greater-than sign.
+
+=item E<quot>
+
+A double quotation mark.
+
+=item E<sol>
+
+A forward slash.
+
+=back
+
+Try to get this bit of text over towards the edge so S<|that all of this
+text inside SE<lt>E<gt> won't|> be wrapped. Also test the
+|sameE<nbsp>thingE<nbsp>withE<nbsp>non-breakingS< spaces>.|
+
+There is a soft hyE<shy>phen in hyphen at hy-phen.
+
+This is a test of an X<index entry>index entry.
+
+=head1 VERBATIM
+
+Throw in a few verbatim paragraphs.
+
+ use Term::ANSIColor;
+ print color 'bold blue';
+ print "This text is bold blue.\n";
+ print color 'reset';
+ print "This text is normal.\n";
+ print colored ("Yellow on magenta.\n", 'yellow on_magenta');
+ print "This text is normal.\n";
+ print colored ['yellow on_magenta'], "Yellow on magenta.\n";
+
+ use Term::ANSIColor qw(uncolor);
+ print uncolor '01;31', "\n";
+
+But this isn't verbatim (make sure it wraps properly), and the next
+paragraph is again:
+
+ use Term::ANSIColor qw(:constants);
+ print BOLD, BLUE, "This text is in bold blue.\n", RESET;
+
+ use Term::ANSIColor qw(:constants); $Term::ANSIColor::AUTORESET = 1; print BOLD BLUE "This text is in bold blue.\n"; print "This text is normal.\n";
+
+(Ugh, that's obnoxiously long.) Try different spacing:
+
+ Starting with a tab.
+Not
+starting
+with
+a
+tab. But this should still be verbatim.
+ As should this.
+
+This isn't.
+
+ This is. And this: is an internal tab. It should be:
+ |--| <= lined up with that.
+
+(Tricky, but tabs should be expanded before the translator starts in on
+the text since otherwise text with mixed tabs and spaces will get messed
+up.)
+
+ And now we test verbatim paragraphs right before a heading. Older
+ versions of Pod::Man generated two spaces between paragraphs like this
+ and the heading. (In order to properly test this, one may have to
+ visually inspect the nroff output when run on the generated *roff
+ text, unfortunately.)
+
+=head1 CONCLUSION
+
+That's all, folks!
+
+=cut
diff --git a/cpan/Pod-Simple/t/JustPod_corpus.t b/cpan/Pod-Simple/t/JustPod_corpus.t
new file mode 100644
index 0000000000..31acaaf7b8
--- /dev/null
+++ b/cpan/Pod-Simple/t/JustPod_corpus.t
@@ -0,0 +1,155 @@
+# Testing Pod::Simple::JustPod against *.pod in /t
+use strict;
+
+BEGIN {
+ if($ENV{PERL_CORE}) {
+ chdir 't';
+ @INC = '../lib';
+ }
+
+ use Config;
+ if ($Config::Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+}
+
+use File::Find;
+use File::Spec;
+use Test::More;
+
+use Pod::Simple::JustPod;
+
+my @test_files;
+
+BEGIN {
+ sub source_path {
+ my $file = shift;
+ if ($ENV{PERL_CORE}) {
+ require File::Spec;
+ my $updir = File::Spec->updir;
+ my $dir = File::Spec->catdir($updir, 'lib', 'Pod', 'Simple', 't');
+ return File::Spec->catdir($dir, $file);
+ }
+ else {
+ return $file;
+ }
+ }
+
+ my @test_dirs = (
+ File::Spec->catdir( source_path('t') ) ,
+ File::Spec->catdir( File::Spec->updir, 't') ,
+ );
+
+ my $test_dir;
+ foreach( @test_dirs ) {
+ $test_dir = $_ and last if -e;
+ }
+
+ die "Can't find the test dir" unless $test_dir;
+ print "# TESTDIR: $test_dir\n";
+
+ sub wanted {
+ push @test_files, $File::Find::name
+ if $File::Find::name =~ /\.pod$/;
+ }
+ find(\&wanted , $test_dir );
+
+ plan tests => scalar @test_files;
+}
+
+foreach my $file (@test_files) {
+ my $parser = Pod::Simple::JustPod->new();
+ $parser->complain_stderr(0);
+
+ my $input;
+ open( IN , '<' , $file ) or die "$file: $!";
+ $input .= $_ while (<IN>);
+ close( IN );
+
+ my $output;
+ $parser->output_string( \$output );
+ $parser->parse_string_document( $input );
+
+ if ($parser->any_errata_seen()) {
+ pass("Skip '$file' because of pod errors");
+ my $errata = $parser->errata_seen();
+ foreach my $line_number (sort { $a <=> $b } keys %$errata) {
+ foreach my $err_msg (sort @{$errata->{$line_number}}) {
+ diag "$file: $line_number: $err_msg";
+ }
+ }
+ next;
+ }
+
+ my $encoding = $parser->encoding();
+ if (defined $encoding) {
+ eval { require Encode; };
+ $input = Encode::decode($parser->encoding(), $input);
+ }
+
+ my @input = split "\n", $input;
+ my $stripped_input = "";
+ while (defined ($_ = shift @input)) {
+ if (/ ^ = [a-z]+ /x) {
+ my $line = "$_\n";
+
+ if ($stripped_input eq "" || $_ !~ /^=pod/) {
+ $stripped_input .= $line;
+ }
+ while (defined ($_ = shift @input)) {
+ $stripped_input .= "$_\n";
+ last if / ^ =cut /x;
+ }
+ }
+ }
+ $stripped_input =~ s/ ^ =cut \n (.) /$1/mgx;
+
+ $input = $stripped_input if $stripped_input ne "";
+ if ($input !~ / ^ =pod /x) {
+ $input =~ s/ ^ \s+ //x;
+ $input = "=pod\n\n$input";
+ }
+ if ($input !~ / =cut $ /x) {
+ $input =~ s/ \s+ $ //x;
+ $input .= "\n\n=cut\n";
+ }
+
+ my $msg = "got expected output for $file";
+ if ($output eq $input) {
+ pass($msg);
+ }
+ elsif ($ENV{PERL_TEST_DIFF}) {
+ fail($msg);
+ require File::Temp;
+ my $orig_file = File::Temp->new();
+ local $/ = "\n";
+ chomp $input;
+ print $orig_file $input, "\n";
+ close $orig_file || die "Can't close orig_file: $!";
+
+ chomp $output;
+ my $parsed_file = File::Temp->new();
+ print $parsed_file $output, "\n";
+ close $parsed_file || die "Can't close parsed_file";
+
+ my $diff = File::Temp->new();
+ system("$ENV{PERL_TEST_DIFF} $orig_file $parsed_file > $diff");
+
+ open my $fh, "<", $diff || die "Can't open $diff";
+ my @diffs = <$fh>;
+ diag(@diffs);
+ }
+ else {
+ eval { require Text::Diff; };
+ if ($@) {
+ is($output, $input, $msg);
+ diag("Set environment variable PERL_TEST_DIFF=diff_tool or install"
+ . " Text::Diff to see just the differences.");
+ }
+ else {
+ fail($msg);
+ diag Text::Diff::diff(\$input, \$output, { STYLE => 'Unified' });
+ }
+ }
+}
diff --git a/cpan/Pod-Simple/t/corpus/polish_utf8.txt b/cpan/Pod-Simple/t/corpus/polish_utf8.txt
index 32c763ee7a..95b1224842 100644
--- a/cpan/Pod-Simple/t/corpus/polish_utf8.txt
+++ b/cpan/Pod-Simple/t/corpus/polish_utf8.txt
@@ -8,7 +8,16 @@ WŚRÓD NOCNEJ CISZY -- explicitly utf8 test document in Polish
=head1 DESCRIPTION
This is a test Pod document in UTF8. Its content is the lyrics to
-the Polish Christmas carol "Wśród nocnej ciszy".
+the Polish Christmas carol "Wśród nocnej ciszy", except it includes
+a few lines to test RTF specially.
+
+ff is a character in the upper half of Plane 0, so should be negative in RTF
+𝔸 is a character in Plane 1, so should be expressed as a surrogate pair in RTF
+
+All the ASCII printables
+ !"#$%&\'()*+,-./0123456789:;<=>?@
+ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`
+abcdefghijklmnopqrstuvwxyz{|}~
Wśród nocnej ciszy głos się rozchodzi: /
Wstańcie, pasterze, Bóg się nam rodzi! /
@@ -38,6 +47,14 @@ Chleba i wina.
And now as verbatim text:
+ ff upper half, Plane 0
+ 𝔸 Plane 1
+
+ All the ASCII printables
+ !"#$%&\'()*+,-./0123456789:;<=>?@
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`
+ abcdefghijklmnopqrstuvwxyz{|}~
+
Wśród nocnej ciszy głos się rozchodzi:
Wstańcie, pasterze, Bóg się nam rodzi!
Czym prędzej się wybierajcie,
diff --git a/cpan/Pod-Simple/t/corpus/polish_utf8.xml b/cpan/Pod-Simple/t/corpus/polish_utf8.xml
index 2778571c95..2eccfe76d4 100644
--- a/cpan/Pod-Simple/t/corpus/polish_utf8.xml
+++ b/cpan/Pod-Simple/t/corpus/polish_utf8.xml
@@ -13,35 +13,54 @@
</head1>
<Para start_line="10">
This is a test Pod document in UTF8. Its content is the lyrics to the
- Polish Christmas carol &#34;W&#347;r&#243;d nocnej ciszy&#34;.
+ Polish Christmas carol &#34;W&#347;r&#243;d nocnej ciszy&#34;, except
+ it includes a few lines to test RTF specially.
</Para>
- <Para start_line="13">
+ <Para start_line="14">
+ &#64256; is a character in the upper half of Plane 0, so should be negative
+ in RTF &#120120; is a character in Plane 1, so should be expressed as a
+ surrogate pair in RTF
+ </Para>
+ <Para start_line="17">
+ All the ASCII printables
+ !&#34;#$%&#38;\&#39;()*+,-./0123456789:;&#60;=&#62;?@
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghijklmnopqrstuvwxyz{|}~
+ </Para>
+ <Para start_line="22">
W&#347;r&#243;d nocnej ciszy g&#322;os si&#281; rozchodzi: / Wsta&#324;cie,
pasterze, B&#243;g si&#281; nam rodzi! / Czym pr&#281;dzej si&#281;
wybierajcie, / Do Betlejem pospieszajcie / Przywita&#263; Pana.
</Para>
- <Para start_line="19">
+ <Para start_line="28">
Poszli, znale&#378;li Dzieci&#261;tko w &#380;&#322;obie / Z wszystkimi
znaki danymi sobie. / Jako Bogu cze&#347;&#263; Mu dali, / A
witaj&#261;c zawo&#322;ali / Z wielkiej rado&#347;ci:
</Para>
- <Para start_line="25">
+ <Para start_line="34">
Ach, witaj Zbawco z dawno &#380;&#261;dany, / Wiele tysi&#281;cy lat
wygl&#261;dany / Na Ciebie kr&#243;le, prorocy / Czekali, a Ty&#347;
tej nocy / Nam si&#281; objawi&#322;.
</Para>
- <Para start_line="31">
+ <Para start_line="40">
I my czekamy na Ciebie, Pana, / A skoro przyjdziesz na g&#322;os
kap&#322;ana, / Padniemy na twarz przed Tob&#261;, / Wierz&#261;c,
&#380;e&#347; jest pod os&#322;on&#261; / Chleba i wina.
</Para>
- <head2 start_line="37">
+ <head2 start_line="46">
As Verbatim
</head2>
- <Para start_line="39">
+ <Para start_line="48">
And now as verbatim text:
</Para>
- <VerbatimFormatted start_line="41" xml:space="preserve">
+ <VerbatimFormatted start_line="50" xml:space="preserve">
+ &#64256; upper half, Plane 0
+ &#120120; Plane 1
+
+ All the ASCII printables
+ !&#34;#$%&#38;\&#39;()*+,-./0123456789:;&#60;=&#62;?@
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`
+ abcdefghijklmnopqrstuvwxyz{|}~
+
W&#347;r&#243;d nocnej ciszy g&#322;os si&#281; rozchodzi:
Wsta&#324;cie, pasterze, B&#243;g si&#281; nam rodzi!
Czym pr&#281;dzej si&#281; wybierajcie,
@@ -66,7 +85,7 @@
Wierz&#261;c, &#380;e&#347; jest pod os&#322;on&#261;
Chleba i wina.
</VerbatimFormatted>
- <Para start_line="65">
+ <Para start_line="82">
[end]
</Para>
</Document>
diff --git a/cpan/Pod-Simple/t/encod04.t b/cpan/Pod-Simple/t/encod04.t
index 88727cca52..8f41f98a6c 100644
--- a/cpan/Pod-Simple/t/encod04.t
+++ b/cpan/Pod-Simple/t/encod04.t
@@ -12,14 +12,14 @@ BEGIN {
use strict;
use Test;
BEGIN {
- if ($] lt 5.007_003) {
- plan tests => 5, todo => [4, 5]; # Need utf8::decode() to pass #5
- # and isn't available in this
- # release
- }
- else {
- plan tests => 5, todo => [4];
- }
+ plan tests => 6, todo => [];
+}
+
+# fail with the supplied diagnostic
+
+sub my_nok {
+ my ($diag) = @_;
+ ok (1, 0, $diag);
}
ok 1;
@@ -61,16 +61,13 @@ if( $guess ) {
if( grep m{Dash $dash}, @output_lines ) {
ok 1;
} else {
- ok 0;
- print STDERR "# failed to find expected control character in output\n"
+ my_nok "failed to find expected control character in output";
}
} else {
- ok 0;
- print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n";
+ my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'";
}
} else {
- ok 0;
- print STDERR "# parser failed to detect non-ASCII bytes in input\n";
+ my_nok "parser failed to detect non-ASCII bytes in input";
}
@@ -95,18 +92,18 @@ else {
if( $guess eq 'CP1252' ) {
ok 1;
} else {
- ok 0;
- print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n";
+ my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'";
}
} else {
- ok 0;
- print STDERR "# parser failed to detect non-ASCII bytes in input\n";
+ my_nok "parser failed to detect non-ASCII bytes in input";
}
}
-# Initial accented character followed by 'smart' apostrophe causes heuristic
-# to choose UTF8 (a somewhat contrived example)
+# Initial accented character (E acute) followed by 'smart' apostrophe is legal
+# CP1252, which should be preferred over UTF-8 because the latter
+# interpretation would be "JOS" . \N{LATIN SMALL LETTER TURNED ALPHA} . "S
+# PLACE", and that \N{} letter is an IPA one.
@output_lines = split m/[\r\n]+/, Pod::Simple::XMLOutStream->_out( qq{
@@ -127,12 +124,10 @@ else {
if( $guess eq 'CP1252' ) {
ok 1;
} else {
- ok 0;
- print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n";
+ my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'";
}
} else {
- ok 0;
- print STDERR "# parser failed to detect non-ASCII bytes in input\n";
+ my_nok "parser failed to detect non-ASCII bytes in input";
}
}
@@ -160,12 +155,40 @@ else {
if( $guess eq 'CP1252' ) {
ok 1;
} else {
- ok 0;
- print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n";
+ my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'";
+ }
+ } else {
+ my_nok "parser failed to detect non-ASCII bytes in input";
+ }
+}
+
+# The following is a real word example of something in CP1252 expressible in
+# UTF-8, but doesn't make sense in UTF-8, contributed by Bo Lindbergh.
+# Muvrarášša is a Sami word
+
+@output_lines = split m/[\r\n]+/, Pod::Simple::XMLOutStream->_out( qq{
+
+=head1 NAME
+
+Muvrar\xE1\x9A\x9Aa is a mountain in Norway
+
+=cut
+
+} );
+
+if (ord("A") != 65) { # ASCII-platform dependent test skipped on this platform
+ ok (1);
+}
+else {
+ ($guess) = "@output_lines" =~ m{Non-ASCII.*?Assuming ([\w-]+)};
+ if( $guess ) {
+ if( $guess eq 'CP1252' ) {
+ ok 1;
+ } else {
+ my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'";
}
} else {
- ok 0;
- print STDERR "# parser failed to detect non-ASCII bytes in input\n";
+ my_nok "parser failed to detect non-ASCII bytes in input";
}
}
diff --git a/cpan/Pod-Simple/t/fcodes_s.t b/cpan/Pod-Simple/t/fcodes_s.t
index 977756593d..fd48ec07bf 100644
--- a/cpan/Pod-Simple/t/fcodes_s.t
+++ b/cpan/Pod-Simple/t/fcodes_s.t
@@ -43,17 +43,17 @@ skip( $unless_ascii,
skip( $unless_ascii,
$x->_out( sub { $_[0]->nbsp_for_S(1) },
qq{=pod\n\nI like S<L</"bric-a-brac a gogo">>.\n}),
-'<Document><Para>I like <L content-implicit="yes" section="bric-a-brac a gogo" type="pod">&#34;bric-a-brac&#160;a&#160;gogo&#34;</L>.</Para></Document>'
+'<Document><Para>I like <L content-implicit="yes" raw="/&#34;bric-a-brac a gogo&#34;" section="bric-a-brac a gogo" type="pod">&#34;bric-a-brac&#160;a&#160;gogo&#34;</L>.</Para></Document>'
);
skip( $unless_ascii,
$x->_out( sub { $_[0]->nbsp_for_S(1) },
qq{=pod\n\nI like S<L<Stuff like that|/"bric-a-brac a gogo">>.\n}),
-'<Document><Para>I like <L section="bric-a-brac a gogo" type="pod">Stuff&#160;like&#160;that</L>.</Para></Document>'
+'<Document><Para>I like <L raw="Stuff like that|/&#34;bric-a-brac a gogo&#34;" section="bric-a-brac a gogo" type="pod">Stuff&#160;like&#160;that</L>.</Para></Document>'
);
skip( $unless_ascii,
$x->_out( sub { $_[0]->nbsp_for_S(1) },
qq{=pod\n\nI like S<L<Stuff I<like that>|/"bric-a-brac a gogo">>.\n}),
-'<Document><Para>I like <L section="bric-a-brac a gogo" type="pod">Stuff&#160;<I>like&#160;that</I></L>.</Para></Document>'
+'<Document><Para>I like <L raw="Stuff I&#60;like that&#62;|/&#34;bric-a-brac a gogo&#34;" section="bric-a-brac a gogo" type="pod">Stuff&#160;<I>like&#160;that</I></L>.</Para></Document>'
);
&ok( $x->_duo( sub { $_[0]->nbsp_for_S(1) },
@@ -219,7 +219,7 @@ ok(
# Test HTML output of links.
use Pod::Simple::HTML;
-my $PERLDOC = "http://search.cpan.org/perldoc";
+my $PERLDOC = "https://metacpan.org/pod";
my $MANURL = "http://man.he.net/man";
sub x ($) {
Pod::Simple::HTML->_out(
@@ -230,12 +230,12 @@ sub x ($) {
ok(
x(qq{L<Net::Ping>\n}),
- qq{\n<p><a href="$PERLDOC?Net%3A%3APing" class="podlinkpod"\n>Net::Ping</a></p>\n}
+ qq{\n<p><a href="$PERLDOC/Net%3A%3APing" class="podlinkpod"\n>Net::Ping</a></p>\n}
);
ok(
x(qq{Be sure to read the L<Net::Ping> docs\n}),
- qq{\n<p>Be sure to read the <a href="$PERLDOC?Net%3A%3APing" class="podlinkpod"\n>Net::Ping</a> docs</p>\n}
+ qq{\n<p>Be sure to read the <a href="$PERLDOC/Net%3A%3APing" class="podlinkpod"\n>Net::Ping</a> docs</p>\n}
);
ok(
@@ -250,7 +250,7 @@ ok(
ok(
x(qq{L<Net::Ping/Ping-pong>\n}),
- qq{\n<p><a href="$PERLDOC?Net%3A%3APing#Ping-pong" class="podlinkpod"\n>&#34;Ping-pong&#34; in Net::Ping</a></p>\n}
+ qq{\n<p><a href="$PERLDOC/Net%3A%3APing#Ping-pong" class="podlinkpod"\n>&#34;Ping-pong&#34; in Net::Ping</a></p>\n}
);
ok(
@@ -270,7 +270,7 @@ ok(
ok(
x(qq{L<Net::Ping/Ping-E<112>ong>\n}),
- qq{\n<p><a href="$PERLDOC?Net%3A%3APing#Ping-pong" class="podlinkpod"\n>&#34;Ping-pong&#34; in Net::Ping</a></p>\n}
+ qq{\n<p><a href="$PERLDOC/Net%3A%3APing#Ping-pong" class="podlinkpod"\n>&#34;Ping-pong&#34; in Net::Ping</a></p>\n}
);
ok(
@@ -315,17 +315,17 @@ ok(
ok(
x(qq{L<Perl Error Messages|perldiag>\n}),
- qq{\n<p><a href="$PERLDOC?perldiag" class="podlinkpod"\n>Perl Error Messages</a></p>\n}
+ qq{\n<p><a href="$PERLDOC/perldiag" class="podlinkpod"\n>Perl Error Messages</a></p>\n}
);
ok(
x(qq{L<Perl\nError\nMessages|perldiag>\n}),
- qq{\n<p><a href="$PERLDOC?perldiag" class="podlinkpod"\n>Perl Error Messages</a></p>\n}
+ qq{\n<p><a href="$PERLDOC/perldiag" class="podlinkpod"\n>Perl Error Messages</a></p>\n}
);
ok(
x(qq{L<Perl\nError\t Messages|perldiag>\n}),
- qq{\n<p><a href="$PERLDOC?perldiag" class="podlinkpod"\n>Perl Error Messages</a></p>\n}
+ qq{\n<p><a href="$PERLDOC/perldiag" class="podlinkpod"\n>Perl Error Messages</a></p>\n}
);
ok(
@@ -352,12 +352,12 @@ sub o ($) {
ok(
o(qq{L<Net::Ping>}),
- qq{<p><a href="$PERLDOC?Net::Ping">Net::Ping</a></p>\n\n}
+ qq{<p><a href="$PERLDOC/Net::Ping">Net::Ping</a></p>\n\n}
);
ok(
o(qq{Be sure to read the L<Net::Ping> docs}),
- qq{<p>Be sure to read the <a href="$PERLDOC?Net::Ping">Net::Ping</a> docs</p>\n\n}
+ qq{<p>Be sure to read the <a href="$PERLDOC/Net::Ping">Net::Ping</a> docs</p>\n\n}
);
ok(
@@ -372,7 +372,7 @@ ok(
ok(
o(qq{L<Net::Ping/Ping-pong>}),
- qq{<p><a href="$PERLDOC?Net::Ping#Ping-pong">&quot;Ping-pong&quot; in Net::Ping</a></p>\n\n}
+ qq{<p><a href="$PERLDOC/Net::Ping#Ping-pong">&quot;Ping-pong&quot; in Net::Ping</a></p>\n\n}
);
ok(
@@ -392,7 +392,7 @@ ok(
ok(
o(qq{L<Net::Ping/Ping-E<112>ong>}),
- qq{<p><a href="$PERLDOC?Net::Ping#Ping-pong">&quot;Ping-pong&quot; in Net::Ping</a></p>\n\n}
+ qq{<p><a href="$PERLDOC/Net::Ping#Ping-pong">&quot;Ping-pong&quot; in Net::Ping</a></p>\n\n}
);
ok(
@@ -437,17 +437,17 @@ ok(
ok(
o(qq{L<Perl Error Messages|perldiag>}),
- qq{<p><a href="$PERLDOC?perldiag">Perl Error Messages</a></p>\n\n}
+ qq{<p><a href="$PERLDOC/perldiag">Perl Error Messages</a></p>\n\n}
);
ok(
o(qq{L<Perl\nError\nMessages|perldiag>}),
- qq{<p><a href="$PERLDOC?perldiag">Perl Error Messages</a></p>\n\n}
+ qq{<p><a href="$PERLDOC/perldiag">Perl Error Messages</a></p>\n\n}
);
ok(
o(qq{L<Perl\nError\t Messages|perldiag>}),
- qq{<p><a href="$PERLDOC?perldiag">Perl Error Messages</a></p>\n\n}
+ qq{<p><a href="$PERLDOC/perldiag">Perl Error Messages</a></p>\n\n}
);
ok(
diff --git a/cpan/Pod-Simple/t/github_issue_79.t b/cpan/Pod-Simple/t/github_issue_79.t
new file mode 100644
index 0000000000..a56b428c2a
--- /dev/null
+++ b/cpan/Pod-Simple/t/github_issue_79.t
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Test::More;
+
+BEGIN {
+ eval { require Test::Deep; };
+ plan skip_all => 'Fails with Can\'t locate object method "print" via package "IO::File" at t/github_issue_79.t line 33' if $] le 5.012005;
+ plan skip_all => 'Need Test::Deep to test' if $@;
+ Test::Deep->import('cmp_deeply');
+}
+
+{
+package DumpAsXML::Enh;
+
+use parent 'Pod::Simple::DumpAsXML';
+
+sub new {
+ my ( $class ) = @_;
+ my $self = $class->SUPER::new();
+ $self->code_handler( sub { pop( @_ )->_handle_line( 'code', @_ ); } );
+ $self->cut_handler( sub { pop( @_ )->_handle_line( 'cut', @_ ); } );
+ $self->pod_handler( sub { pop( @_ )->_handle_line( 'pod', @_ ); } );
+ $self->whiteline_handler( sub { pop( @_ )->_handle_line( 'white', @_ ); } );
+ return $self;
+};
+
+sub _handle_line {
+ my ( $self, $elem, $text, $line ) = @_;
+ my $fh = $self->{ output_fh };
+ $fh->print( ' ' x $self->{ indent }, "<$elem start_line=\"$line\"/>\n" );
+};
+
+}
+
+my $output = '';
+my $parser = DumpAsXML::Enh->new();
+$parser->output_string( \$output );
+
+my $input = [
+ '=head1 DESCRIPTION',
+ '',
+ ' Verbatim paragraph.',
+ '',
+ '=cut',
+];
+my $expected_output = [
+ '<Document start_line="1">',
+ ' <head1 start_line="1">',
+ ' DESCRIPTION',
+ ' </head1>',
+ ' <VerbatimFormatted start_line="3" xml:space="preserve">',
+ ' Verbatim paragraph.',
+ ' </VerbatimFormatted>',
+ ' <cut start_line="5"/>',
+ '</Document>',
+];
+
+$parser->parse_lines( @$input, undef );
+
+my $actual_output = [ split( "\n", $output ) ];
+cmp_deeply( $actual_output, $expected_output ) or do {
+ diag( 'actual output:' );
+ diag( "|$_" ) for @$actual_output;
+ diag( 'expected output:' );
+ diag( "|$_" ) for @$expected_output;
+};
+
+done_testing;
+exit( 0 );
+
diff --git a/cpan/Pod-Simple/t/html01.t b/cpan/Pod-Simple/t/html01.t
index b4caa39dc6..8d8e528320 100644
--- a/cpan/Pod-Simple/t/html01.t
+++ b/cpan/Pod-Simple/t/html01.t
@@ -9,7 +9,7 @@ BEGIN {
use strict;
use Test;
-BEGIN { plan tests => 13 };
+BEGIN { plan tests => 14 };
#use Pod::Simple::Debug (10);
@@ -137,6 +137,16 @@ ok(
"\n<dl>\n<dt><a name=\"howdy\"\n>Foo</a></dt>\n</dl>\n",
);
+{ # Test that strip_verbatim_indent() works. github issue #i5
+ my $output;
+
+ my $obj = Pod::Simple::HTML->new;
+ $obj->strip_verbatim_indent(" ");
+ $obj->output_string(\$output);
+ $obj->parse_string_document("=pod\n\n First line\n 2nd line\n");
+ ok($output, qr!<pre>First line\n2nd line</pre>!s);
+}
+
print "# And one for the road...\n";
ok 1;
diff --git a/cpan/Pod-Simple/t/perlcyg.pod b/cpan/Pod-Simple/t/perlcyg.pod
index 6264a15788..2da4b28aa6 100644
--- a/cpan/Pod-Simple/t/perlcyg.pod
+++ b/cpan/Pod-Simple/t/perlcyg.pod
@@ -56,7 +56,7 @@ runtime behavior (see L</"TEST">).
=over 4
-=item * C<PATH>
+=item * C<PATH>
Set the C<PATH> environment variable so that Configure finds the Cygwin
versions of programs. Any Windows directories should be removed or
diff --git a/cpan/Pod-Simple/t/rtf_utf8.t b/cpan/Pod-Simple/t/rtf_utf8.t
new file mode 100644
index 0000000000..0d2d8ecf73
--- /dev/null
+++ b/cpan/Pod-Simple/t/rtf_utf8.t
@@ -0,0 +1,220 @@
+#!/usr/bin/perl -w
+
+# t/rtf_utf8.t - Check that RTF works with UTF-8 input
+
+BEGIN {
+ chdir 't' if -d 't';
+}
+
+my $expected = join "", <DATA>;
+
+use strict;
+use lib '../lib';
+use Test::More;
+use File::Spec;
+
+if ($] < 5.008) {
+ plan skip_all => "Doesn't work before 5.8";
+}
+else {
+ plan tests => 5;
+}
+
+for my $format (qw(RTF)) {
+ my $class = "Pod::Simple::RTF";
+ use_ok $class or next;
+ ok my $parser = $class->new, "Construct RTF parser";
+
+ my $output = '';
+ ok $parser->output_string(\$output), "Set RTF output string";
+ ok $parser->parse_file(File::Spec->catfile(qw(corpus polish_utf8.txt))),
+ "Parse to RTF via parse_file()";
+ $output =~ s/\\info.*?author \[see doc\]\}/VARIANT TEXT DELETED/s;
+ $output =~ s/$/\n/;
+
+ my $msg = "got expected output";
+ if ($output eq $expected) {
+ pass($msg);
+ }
+ elsif ($ENV{PERL_TEST_DIFF}) {
+ fail($msg);
+ require File::Temp;
+ my $orig_file = File::Temp->new();
+ local $/ = "\n";
+ chomp $expected;
+ print $orig_file $expected, "\n";
+ close $orig_file || die "Can't close orig_file: $!";
+
+ chomp $output;
+ my $parsed_file = File::Temp->new();
+ print $parsed_file $output, "\n";
+ close $parsed_file || die "Can't close parsed_file";
+
+ my $diff = File::Temp->new();
+ system("$ENV{PERL_TEST_DIFF} $orig_file $parsed_file > $diff");
+
+ open my $fh, "<", $diff || die "Can't open $diff";
+ my @diffs = <$fh>;
+ diag(@diffs);
+ }
+ else {
+ eval { require Text::Diff; };
+ if ($@) {
+ is($output, $expected, $msg);
+ diag("Set environment variable PERL_TEST_DIFF=diff_tool or install"
+ . " Text::Diff to see just the differences.");
+ }
+ else {
+ fail($msg);
+ diag Text::Diff::diff(\$expected, \$output, { STYLE => 'Unified' });
+ }
+ }
+}
+
+__DATA__
+{\rtf1\ansi\deff0
+
+{\fonttbl
+{\f0\froman Times New Roman;}
+{\f1\fmodern Courier New;}
+{\f2\fswiss Arial;}
+}
+
+{\stylesheet
+{\snext0 Normal;}
+{\*\cs10 \additive Default Paragraph Font;}
+{\*\cs16 \additive \i \sbasedon10 pod-I;}
+{\*\cs17 \additive \i\lang1024\noproof \sbasedon10 pod-F;}
+{\*\cs18 \additive \b \sbasedon10 pod-B;}
+{\*\cs19 \additive \f1\lang1024\noproof\sbasedon10 pod-C;}
+{\s20\ql \li0\ri0\sa180\widctlpar\f1\fs18\lang1024\noproof\sbasedon0 \snext0 pod-codeblock;}
+{\*\cs21 \additive \lang1024\noproof \sbasedon10 pod-computerese;}
+{\*\cs22 \additive \i\lang1024\noproof\sbasedon10 pod-L-pod;}
+{\*\cs23 \additive \i\lang1024\noproof\sbasedon10 pod-L-url;}
+{\*\cs24 \additive \i\lang1024\noproof\sbasedon10 pod-L-man;}
+
+{\*\cs25 \additive \f1\lang1024\noproof\sbasedon0 pod-codelbock-plain;}
+{\*\cs26 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-ital;}
+{\*\cs27 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold;}
+{\*\cs28 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold-ital;}
+
+{\s31\ql \keepn\sb90\sa180\f2\fs32\ul\sbasedon0 \snext0 pod-head1;}
+{\s32\ql \keepn\sb90\sa180\f2\fs28\ul\sbasedon0 \snext0 pod-head2;}
+{\s33\ql \keepn\sb90\sa180\f2\fs25\ul\sbasedon0 \snext0 pod-head3;}
+{\s34\ql \keepn\sb90\sa180\f2\fs22\ul\sbasedon0 \snext0 pod-head4;}
+}
+
+{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
+{VARIANT TEXT DELETED{\company [see doc]}{\operator [see doc]}
+}
+
+\deflang1033\plain\lang1033\widowctrl
+{\header\pard\qr\plain\f2\fs17
+W\uc1\u346?R\'d3D NOCNEJ CISZY \_\_ explicitly utf8 test document in Polish,
+p.\chpgn\par}
+\fs25
+
+
+
+{\pard\li0\s31\keepn\sb90\sa180\f2\fs32\ul{
+NAME
+}\par}
+
+{\pard\li0\sa180
+W\uc1\u346?R\'d3D NOCNEJ CISZY \_\_ explicitly utf8 test document
+in Polish
+\par}
+
+{\pard\li0\s31\keepn\sb90\sa180\f2\fs32\ul{
+DESCRIPTION
+}\par}
+
+{\pard\li0\sa180
+This is a test Pod document in UT\'468. Its content is the lyrics
+to the Polish Christmas carol "W\uc1\u347?r\'f3d nocnej ciszy", except
+it includes a few lines to test RT\'46 specially.
+\par}
+
+{\pard\li0\sa180
+\uc1\u-1280? is a character in the upper half of Plane 0, so should
+be negative in RT\'46 \uc1\u-10187\u-8904? is a character in Plane
+1, so should be expressed as a surrogate pair in RT\'46
+\par}
+
+{\pard\li0\sa180
+All the ASCII printables !"#$%&\'5c'()*+,\_./0123456789:;<=>?@ ABCDE\'46GHIJKLMNOPQRSTUVWXYZ[{
+\cs21\lang1024\noproof \'5c]^\'5f`} abcdefghijklmnopqrstuvwxyz\'7b|\'7d~
+\par}
+
+{\pard\li0\sa180
+W\uc1\u347?r\'f3d nocnej ciszy g\uc1\u322?os si\uc1\u281? rozchodzi:
+/ Wsta\uc1\u324?cie, pasterze, B\'f3g si\uc1\u281? nam rodzi! / Czym
+pr\uc1\u281?dzej si\uc1\u281? wybierajcie, / Do Betlejem pospieszajcie
+/ Przywita\uc1\u263? Pana.
+\par}
+
+{\pard\li0\sa180
+Poszli, znale\uc1\u378?li Dzieci\uc1\u261?tko w \uc1\u380?\uc1\u322?obie
+/ Z wszystkimi znaki danymi sobie. / Jako Bogu cze\uc1\u347?\uc1\u263?
+Mu dali, / A witaj\uc1\u261?c zawo\uc1\u322?ali / Z wielkiej rado\uc1\u347?ci:
+\par}
+
+{\pard\li0\sa180
+Ach, witaj Zbawco z dawno \uc1\u380?\uc1\u261?dany, / Wiele tysi\uc1\u281?cy
+lat wygl\uc1\u261?dany / Na Ciebie kr\'f3le, prorocy / Czekali, a
+Ty\uc1\u347? tej nocy / Nam si\uc1\u281? objawi\uc1\u322?.
+\par}
+
+{\pard\li0\sa180
+I my czekamy na Ciebie, Pana, / A skoro przyjdziesz na g\uc1\u322?os
+kap\uc1\u322?ana, / Padniemy na twarz przed Tob\uc1\u261?, / Wierz\uc1\u261?c,
+\uc1\u380?e\uc1\u347? jest pod os\uc1\u322?on\uc1\u261? / Chleba i
+wina.
+\par}
+
+{\pard\li0\s32\keepn\sb90\sa180\f2\fs28\ul{
+As Verbatim
+}\par}
+
+{\pard\li0\sa180
+And now as verbatim text:
+\par}
+
+{\pard\li0\plain\s20\sa180\f1\fs18\lang1024\noproof
+ \uc1\u-1280? upper half, Plane 0\line
+ \uc1\u-10187\u-8904? Plane 1\line
+\line
+ All the ASCII printables\line
+ !"#$%&\'5c'()*+,-./0123456789:;<=>?@\line
+ ABCDE\'46GHIJKLMNOPQRSTUVWXYZ[\'5c]^\'5f`\line
+ abcdefghijklmnopqrstuvwxyz\'7b|\'7d~\line
+\line
+ W\uc1\u347?r\'f3d nocnej ciszy g\uc1\u322?os si\uc1\u281? rozchodzi:\line
+ Wsta\uc1\u324?cie, pasterze, B\'f3g si\uc1\u281? nam rodzi!\line
+ Czym pr\uc1\u281?dzej si\uc1\u281? wybierajcie,\line
+ Do Betlejem pospieszajcie\line
+ Przywita\uc1\u263? Pana.\line
+\line
+ Poszli, znale\uc1\u378?li Dzieci\uc1\u261?tko w \uc1\u380?\uc1\u322?obie\line
+ Z wszystkimi znaki danymi sobie.\line
+ Jako Bogu cze\uc1\u347?\uc1\u263? Mu dali,\line
+ A witaj\uc1\u261?c zawo\uc1\u322?ali\line
+ Z wielkiej rado\uc1\u347?ci:\line
+\line
+ Ach, witaj Zbawco z dawno \uc1\u380?\uc1\u261?dany,\line
+ Wiele tysi\uc1\u281?cy lat wygl\uc1\u261?dany\line
+ Na Ciebie kr\'f3le, prorocy\line
+ Czekali, a Ty\uc1\u347? tej nocy\line
+ Nam si\uc1\u281? objawi\uc1\u322?.\line
+\line
+ I my czekamy na Ciebie, Pana,\line
+ A skoro przyjdziesz na g\uc1\u322?os kap\uc1\u322?ana,\line
+ Padniemy na twarz przed Tob\uc1\u261?,\line
+ Wierz\uc1\u261?c, \uc1\u380?e\uc1\u347? jest pod os\uc1\u322?on\uc1\u261?\line
+ Chleba i wina.
+\par}
+
+{\pard\li0\sa180
+[end]
+\par}
+}
diff --git a/cpan/Pod-Simple/t/search50.t b/cpan/Pod-Simple/t/search50.t
index 126f24a7b1..0dc9d75a29 100644
--- a/cpan/Pod-Simple/t/search50.t
+++ b/cpan/Pod-Simple/t/search50.t
@@ -23,6 +23,7 @@ ok $x->inc; # make sure inc=1 is the default
use Pod::Simple;
*pretty = \&Pod::Simple::BlackBox::pretty;
+*pretty = \&Pod::Simple::BlackBox::pretty; # avoid 'once' warning
my $found = 0;
$x->callback(sub {
diff --git a/cpan/Pod-Simple/t/whine.t b/cpan/Pod-Simple/t/whine.t
index b33f0a91ef..4ac76e5bd3 100644
--- a/cpan/Pod-Simple/t/whine.t
+++ b/cpan/Pod-Simple/t/whine.t
@@ -1,6 +1,6 @@
use strict;
use warnings;
-use Test::More tests => 4;
+use Test::More tests => 6;
{
package Pod::Simple::ErrorFinder;
@@ -51,3 +51,23 @@ sub errors { Pod::Simple::ErrorFinder->errors_for_input(@_) }
"warning for / in text part of L<>",
);
}
+
+{
+ my $input = "=pod\n\nnested LE<lt>E<sol>E<gt>: L<Nested L<http://foobar>|http://baz>\n";
+ my $errors = errors("$input");
+ is_deeply(
+ $errors,
+ { 3 => [ "Nested L<> are illegal. Pretending inner one is X<...> so can continue looking for other errors." ] },
+ "warning for nested L<>",
+ );
+}
+
+{
+ my $input = "=pod\n\nLE<lt>E<sol>E<gt> containing only slash: L< / >\n";
+ my $errors = errors("$input");
+ is_deeply(
+ $errors,
+ { 3 => [ "L<> contains only '/'" ] },
+ "warning for L< / > containing only a slash",
+ );
+}
diff --git a/cpan/Pod-Simple/t/x_nixer.t b/cpan/Pod-Simple/t/x_nixer.t
index 34018109c5..3787006266 100644
--- a/cpan/Pod-Simple/t/x_nixer.t
+++ b/cpan/Pod-Simple/t/x_nixer.t
@@ -184,7 +184,7 @@ ok( Pod::Simple::DumpAsXML->_out( \&nixy_mergy, "=pod\n\nZ<>F<C<Z<>fE<111>L<E<78
' <F>',
' <C>',
' fo',
- ' <L content-implicit="yes" section="Ping-pong" to="Net::Ping" type="pod">',
+ ' <L content-implicit="yes" raw="E&#60;78&#62;et::Ping/Ping-E&#60;112&#62;ong" section="Ping-pong" to="Net::Ping" type="pod">',
' &#34;Ping-pong&#34; in Net::Ping',
' </L>',
' o',
diff --git a/cpan/Pod-Simple/t/xhtml01.t b/cpan/Pod-Simple/t/xhtml01.t
index 01e6f189b4..7ee0865216 100644
--- a/cpan/Pod-Simple/t/xhtml01.t
+++ b/cpan/Pod-Simple/t/xhtml01.t
@@ -18,7 +18,7 @@ isa_ok ($parser, 'Pod::Simple::XHTML');
my $results;
-my $PERLDOC = "http://search.cpan.org/perldoc";
+my $PERLDOC = "https://metacpan.org/pod";
my $MANURL = "http://man.he.net/man";
initialize($parser, $results);
@@ -541,7 +541,7 @@ $parser->parse_string_document(<<'EOPOD');
A plain paragraph with a L<Newlines>.
EOPOD
is($results, <<"EOHTML", "Link entity in a paragraph");
-<p>A plain paragraph with a <a href="$PERLDOC?Newlines">Newlines</a>.</p>
+<p>A plain paragraph with a <a href="$PERLDOC/Newlines">Newlines</a>.</p>
EOHTML
@@ -552,7 +552,7 @@ $parser->parse_string_document(<<'EOPOD');
A plain paragraph with a L<perlport/Newlines>.
EOPOD
is($results, <<"EOHTML", "Link entity in a paragraph");
-<p>A plain paragraph with a <a href="$PERLDOC?perlport#Newlines">&quot;Newlines&quot; in perlport</a>.</p>
+<p>A plain paragraph with a <a href="$PERLDOC/perlport#Newlines">&quot;Newlines&quot; in perlport</a>.</p>
EOHTML
@@ -742,16 +742,16 @@ like $results, qr{\Q<meta http-equiv="Content-Type" content="text/html; charset=
# Test the link generation methods.
is $parser->resolve_pod_page_link('Net::Ping', 'INSTALL'),
- "$PERLDOC?Net::Ping#INSTALL",
+ "$PERLDOC/Net::Ping#INSTALL",
'POD link with fragment';
is $parser->resolve_pod_page_link('perlpodspec'),
- "$PERLDOC?perlpodspec", 'Simple POD link';
+ "$PERLDOC/perlpodspec", 'Simple POD link';
is $parser->resolve_pod_page_link(undef, 'SYNOPSIS'), '#SYNOPSIS',
'Simple fragment link';
is $parser->resolve_pod_page_link(undef, 'this that'), '#this-that',
'Fragment link with space';
is $parser->resolve_pod_page_link('perlpod', 'this that'),
- "$PERLDOC?perlpod#this-that",
+ "$PERLDOC/perlpod#this-that",
'POD link with fragment with space';
is $parser->resolve_man_page_link('crontab(5)', 'EXAMPLE CRON FILE'),