Upgrade to podlators-2.2.0

p4raw-id: //depot/perl@34758
author: Steve Hay <SteveHay@planit.com> 2008-11-07 08:47:26 +0000
committer: Steve Hay <SteveHay@planit.com> 2008-11-07 08:47:26 +0000
commit: 9f2f055aa1e8c86d97b5ea42473ab1747f518f3a (patch)
tree: e1212dc0d5e0cc17adc3b16631339cef8cf4e63a /lib/Pod
parent: 3327bb648bca3705820d35dd728f98436ea9f8aa (diff)
download: perl-9f2f055aa1e8c86d97b5ea42473ab1747f518f3a.tar.gz
11 files changed, 256 insertions, 18 deletions
diff --git a/lib/Pod/Man.pm b/lib/Pod/Man.pm
index 48fe20ed6a..85e4ac8c01 100644
--- a/lib/Pod/Man.pm
+++ b/lib/Pod/Man.pm
@@ -36,7 +36,7 @@ use POSIX qw(strftime);
 
 @ISA = qw(Pod::Simple);
 
-$VERSION = '2.20';
+$VERSION = '2.21';
 
 # Set the debugging level.  If someone has inserted a debug function into this
 # class already, use that.  Otherwise, use any Pod::Simple debug function
@@ -736,6 +736,19 @@ sub start_document {
         return;
     }
 
+    # If we were given the utf8 option, set an output encoding on our file
+    # handle.  Wrap in an eval in case we're using a version of Perl too old
+    # to understand this.
+    #
+    # This is evil because it changes the global state of a file handle that
+    # we may not own.  However, we can't just blindly encode all output, since
+    # there may be a pre-applied output encoding (such as from PERL_UNICODE)
+    # and then we would double-encode.  This seems to be the least bad
+    # approach.
+    if ($$self{utf8}) {
+        eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') };
+    }
+
     # Determine information for the preamble and then output it.
     my ($name, $section);
     if (defined $$self{name}) {
@@ -1608,6 +1621,12 @@ be warned that *roff source with literal UTF-8 characters is not supported
 by many implementations and may even result in segfaults and other bad
 behavior.
 
+Be aware that, when using this option, the input encoding of your POD
+source must be properly declared unless it is US-ASCII or Latin-1.  POD
+input without an C<=encoding> command will be assumed to be in Latin-1,
+and if it's actually in UTF-8, the output will be double-encoded.  See
+L<perlpod(1)> for more information on the C<=encoding> command.
+
 =back
 
 The standard Pod::Simple method parse_file() takes one argument naming the
@@ -1643,10 +1662,14 @@ invalid.  A quote specification must be one, two, or four characters long.
 
 =head1 BUGS
 
+Encoding handling assumes that PerlIO is available and does not work
+properly if it isn't.  The C<utf8> option is therefore not supported
+unless Perl is built with PerlIO support.
+
 There is currently no way to turn off the guesswork that tries to format
 unmarked text appropriately, and sometimes it isn't wanted (particularly
 when using POD to document something other than Perl).  Most of the work
-towards fixing this has now been done, however, and all that's still needed
+toward fixing this has now been done, however, and all that's still needed
 is a user interface.
 
 The NAME section should be recognized specially and index entries emitted
@@ -1668,6 +1691,12 @@ Pod::Man is excessively slow.
 
 =head1 CAVEATS
 
+If Pod::Man is given the C<utf8> option, the encoding of its output file
+handle will be forced to UTF-8 if possible, overriding any existing
+encoding.  This will be done even if the file handle is not created by
+Pod::Man and was passed in from outside.  This maintains consistency
+regardless of PERL_UNICODE and other settings.
+
 The handling of hyphens and em dashes is somewhat fragile, and one may get
 the wrong one under some circumstances.  This should only matter for
 B<troff> output.
diff --git a/lib/Pod/ParseLink.pm b/lib/Pod/ParseLink.pm
index d35478827d..7cb2d656f6 100644
--- a/lib/Pod/ParseLink.pm
+++ b/lib/Pod/ParseLink.pm
@@ -30,7 +30,7 @@ use Exporter;
 @ISA    = qw(Exporter);
 @EXPORT = qw(parselink);
 
-$VERSION = 1.08;
+$VERSION = '1.09';
 
 ##############################################################################
 # Implementation
@@ -140,8 +140,9 @@ and the section, anchor text, and inferred anchor text may contain any
 formatting codes.  Any double quotes around the section are removed as part
 of the parsing, as is any leading or trailing whitespace.
 
-If the text of the LE<lt>E<gt> escape is entirely enclosed in double quotes,
-it's interpreted as a link to a section for backwards compatibility.
+If the text of the LE<lt>E<gt> escape is entirely enclosed in double
+quotes, it's interpreted as a link to a section for backward
+compatibility.
 
 No attempt is made to resolve formatting codes.  This must be done after
 calling parselink() (since EE<lt>E<gt> formatting codes can be used to
diff --git a/lib/Pod/Text.pm b/lib/Pod/Text.pm
index 98dd434d6d..f363303869 100644
--- a/lib/Pod/Text.pm
+++ b/lib/Pod/Text.pm
@@ -37,7 +37,7 @@ use Pod::Simple ();
 # We have to export pod2text for backward compatibility.
 @EXPORT = qw(pod2text);
 
-$VERSION = 3.11;
+$VERSION = '3.12';
 
 ##############################################################################
 # Initialization
@@ -246,10 +246,19 @@ sub reformat {
 }
 
 # Output text to the output device.  Replace non-breaking spaces with spaces
-# and soft hyphens with nothing.
+# and soft hyphens with nothing, and then try to fix the output encoding if
+# necessary to match the input encoding unless UTF-8 output is forced.  This
+# preserves the traditional pass-through behavior of Pod::Text.
 sub output {
     my ($self, $text) = @_;
     $text =~ tr/\240\255/ /d;
+    unless ($$self{opt_utf8} || $$self{CHECKED_ENCODING}) {
+        my $encoding = $$self{encoding} || '';
+        if ($encoding) {
+            eval { binmode ($$self{output_fh}, ":encoding($encoding)") };
+        }
+        $$self{CHECKED_ENCODING} = 1;
+    }
     print { $$self{output_fh} } $text;
 }
 
@@ -272,6 +281,22 @@ sub start_document {
     $$self{MARGIN}  = $margin;  # Default left margin.
     $$self{PENDING} = [[]];     # Pending output.
 
+    # We have to redo encoding handling for each document.
+    delete $$self{CHECKED_ENCODING};
+
+    # If we were given the utf8 option, set an output encoding on our file
+    # handle.  Wrap in an eval in case we're using a version of Perl too old
+    # to understand this.
+    #
+    # This is evil because it changes the global state of a file handle that
+    # we may not own.  However, we can't just blindly encode all output, since
+    # there may be a pre-applied output encoding (such as from PERL_UNICODE)
+    # and then we would double-encode.  This seems to be the least bad
+    # approach.
+    if ($$self{opt_utf8}) {
+        eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') };
+    }
+
     return '';
 }
 
@@ -640,7 +665,7 @@ __END__
 Pod::Text - Convert POD data to formatted ASCII text
 
 =for stopwords
-alt stderr Allbery Sean Burke's Christiansen
+alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8
 
 =head1 SYNOPSIS
 
@@ -725,6 +750,19 @@ single space.  Defaults to true.
 Send error messages about invalid POD to standard error instead of
 appending a POD ERRORS section to the generated output.
 
+=item utf8
+
+By default, Pod::Text uses the same output encoding as the input encoding
+of the POD source (provided that Perl was built with PerlIO; otherwise, it
+doesn't encode its output).  If this option is given, the output encoding
+is forced to UTF-8.
+
+Be aware that, when using this option, the input encoding of your POD
+source must be properly declared unless it is US-ASCII or Latin-1.  POD
+input without an C<=encoding> command will be assumed to be in Latin-1,
+and if it's actually in UTF-8, the output will be double-encoded.  See
+L<perlpod(1)> for more information on the C<=encoding> command.
+
 =item width
 
 The column at which to wrap text on the right-hand side.  Defaults to 76.
@@ -759,6 +797,29 @@ invalid.  A quote specification must be one, two, or four characters long.
 
 =back
 
+=head1 BUGS
+
+Encoding handling assumes that PerlIO is available and does not work
+properly if it isn't.  The C<utf8> option is therefore not supported
+unless Perl is built with PerlIO support.
+
+=head1 CAVEATS
+
+If Pod::Text is given the C<utf8> option, the encoding of its output file
+handle will be forced to UTF-8 if possible, overriding any existing
+encoding.  This will be done even if the file handle is not created by
+Pod::Text and was passed in from outside.  This maintains consistency
+regardless of PERL_UNICODE and other settings.
+
+If the C<utf8> option is not given, the encoding of its output file handle
+will be forced to the detected encoding of the input POD, which preserves
+whatever the input text is.  This ensures backward compatibility with
+earlier, pre-Unicode versions of this module, without large numbers of
+Perl warnings.
+
+This is not ideal, but it seems to be the best compromise.  If it doesn't
+work for you, please let me know the details of how it broke.
+
 =head1 NOTES
 
 This is a replacement for an earlier Pod::Text module written by Tom
@@ -774,7 +835,7 @@ subclass of it does.  Look for L<Pod::Text::Termcap>.
 
 =head1 SEE ALSO
 
-L<Pod::Simple>, L<Pod::Text::Termcap>, L<pod2text(1)>
+L<Pod::Simple>, L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)>
 
 The current version of this module is always available from its web site at
 L<http://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
diff --git a/lib/Pod/Text/Color.pm b/lib/Pod/Text/Color.pm
index 6f8a78f54e..517f5d0458 100644
--- a/lib/Pod/Text/Color.pm
+++ b/lib/Pod/Text/Color.pm
@@ -25,7 +25,7 @@ use vars qw(@ISA $VERSION);
 
 @ISA = qw(Pod::Text);
 
-$VERSION = 2.04;
+$VERSION = '2.05';
 
 ##############################################################################
 # Overrides
diff --git a/lib/Pod/Text/Overstrike.pm b/lib/Pod/Text/Overstrike.pm
index 00b505d44d..a76fc28f8e 100644
--- a/lib/Pod/Text/Overstrike.pm
+++ b/lib/Pod/Text/Overstrike.pm
@@ -34,7 +34,7 @@ use vars qw(@ISA $VERSION);
 
 @ISA = qw(Pod::Text);
 
-$VERSION = 2.02;
+$VERSION = '2.03';
 
 ##############################################################################
 # Overrides
diff --git a/lib/Pod/Text/Termcap.pm b/lib/Pod/Text/Termcap.pm
index 51d39ae7e7..4a75b30251 100644
--- a/lib/Pod/Text/Termcap.pm
+++ b/lib/Pod/Text/Termcap.pm
@@ -26,7 +26,7 @@ use vars qw(@ISA $VERSION);
 
 @ISA = qw(Pod::Text);
 
-$VERSION = 2.04;
+$VERSION = '2.05';
 
 ##############################################################################
 # Overrides
diff --git a/lib/Pod/t/man-utf8.t b/lib/Pod/t/man-utf8.t
index a53208baad..8b44d6b290 100644
--- a/lib/Pod/t/man-utf8.t
+++ b/lib/Pod/t/man-utf8.t
@@ -39,6 +39,7 @@ print "ok 1\n";
 
 my $n = 2;
 eval { binmode (\*DATA, ':encoding(utf-8)') };
+eval { binmode (\*STDOUT, ':encoding(utf-8)') };
 while (<DATA>) {
     my %options;
     next until $_ eq "###\n";
@@ -57,7 +58,6 @@ while (<DATA>) {
     close TMP;
     my $parser = Pod::Man->new (%options) or die "Cannot create parser\n";
     open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n";
-    eval { binmode (\*OUT, ':encoding(utf-8)') };
     $parser->parse_from_file ('tmp.pod', \*OUT);
     close OUT;
     my $accents = 0;
diff --git a/lib/Pod/t/pod-spelling.t b/lib/Pod/t/pod-spelling.t
index c13fb9d587..41c902782e 100644
--- a/lib/Pod/t/pod-spelling.t
+++ b/lib/Pod/t/pod-spelling.t
@@ -1,14 +1,22 @@
 #!/usr/bin/perl
 #
 # t/pod-spelling.t -- Test POD spelling.
+#
+# Copyright 2008 Russ Allbery <rra@stanford.edu>
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the same terms as Perl itself.
 
 # Called to skip all tests with a reason.
 sub skip_all {
-    print "1..1\n";
-    print "ok 1 # skip - @_\n";
+    print "1..0 # Skipped: @_\n";
     exit;
 }
 
+# Skip all spelling tests unless flagged to run maintainer tests.
+skip_all "Spelling tests only run for maintainer"
+    unless $ENV{RRA_MAINTAINER_TESTS};
+
 # Make sure we have prerequisites.  hunspell is currently not supported due to
 # lack of support for contractions.
 eval 'use Test::Pod 1.00';
diff --git a/lib/Pod/t/text-encoding.t b/lib/Pod/t/text-encoding.t
new file mode 100644
index 0000000000..2d624808bf
--- /dev/null
+++ b/lib/Pod/t/text-encoding.t
@@ -0,0 +1,141 @@
+#!/usr/bin/perl -w
+#
+# text-encoding.t -- Test Pod::Text with various weird encoding combinations.
+#
+# Copyright 2002, 2004, 2006, 2007, 2008 by Russ Allbery <rra@stanford.edu>
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the same terms as Perl itself.
+
+BEGIN {
+    chdir 't' if -d 't';
+    if ($ENV{PERL_CORE}) {
+        @INC = '../lib';
+    } else {
+        unshift (@INC, '../blib/lib');
+    }
+    unshift (@INC, '../blib/lib');
+    $| = 1;
+    print "1..4\n";
+
+    # PerlIO encoding support requires Perl 5.8 or later.
+    if ($] < 5.008) {
+        my $n;
+        for $n (1..4) {
+            print "ok $n # skip -- Perl 5.8 required for UTF-8 support\n";
+        }
+        exit;
+    }
+}
+
+END {
+    print "not ok 1\n" unless $loaded;
+}
+
+use Pod::Text;
+
+$loaded = 1;
+print "ok 1\n";
+
+my $n = 2;
+eval { binmode (\*DATA, ':raw') };
+eval { binmode (\*STDOUT, ':raw') };
+while (<DATA>) {
+    my %opts;
+    $opts{utf8} = 1 if $n == 4;
+    my $parser = Pod::Text->new (%opts) or die "Cannot create parser\n";
+    next until $_ eq "###\n";
+    open (TMP, '> tmp.pod') or die "Cannot create tmp.pod: $!\n";
+    eval { binmode (\*TMP, ':raw') };
+    while (<DATA>) {
+        last if $_ eq "###\n";
+        print TMP $_;
+    }
+    close TMP;
+    open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n";
+    $parser->parse_from_file ('tmp.pod', \*OUT);
+    close OUT;
+    open (TMP, 'out.tmp') or die "Cannot open out.tmp: $!\n";
+    eval { binmode (\*TMP, ':raw') };
+    my $output;
+    {
+        local $/;
+        $output = <TMP>;
+    }
+    close TMP;
+    unlink ('tmp.pod', 'out.tmp');
+    my $expected = '';
+    while (<DATA>) {
+        last if $_ eq "###\n";
+        $expected .= $_;
+    }
+    if ($output eq $expected) {
+        print "ok $n\n";
+    } else {
+        print "not ok $n\n";
+        print "Expected\n========\n$expected\nOutput\n======\n$output\n";
+    }
+    $n++;
+}
+
+# Below the marker are bits of POD and corresponding expected text output.
+# This is used to test specific features or problems with Pod::Text.  The
+# input and output are separated by lines containing only ###.
+
+__DATA__
+
+###
+=head1 Test of SE<lt>E<gt>
+
+This is S<some whitespace>.
+###
+Test of S<>
+    This is some whitespace.
+
+###
+
+###
+=encoding utf-8
+
+=head1 I can eat glass
+
+=over 4
+
+=item Esperanto
+
+Mi povas manÄi vitron, Äi ne damaÄas min.
+
+=item Braille
+
+â â â â â â â â â â â â â â â â â â â â â â â â â â â â â â â â ¥â â â â â 
+
+=item Hindi
+
+à¤®à¥à¤ à¤à¤¾à¤à¤ à¤à¤¾ à¤¸à¤à¤¤à¤¾ à¤¹à¥à¤ à¤à¤° à¤®à¥à¤à¥ à¤à¤¸à¤¸à¥ à¤à¥à¤ à¤à¥à¤ à¤¨à¤¹à¥à¤ à¤ªà¤¹à¥à¤à¤à¤¤à¥.
+
+=back
+
+See L<http://www.columbia.edu/kermit/utf8.html>
+###
+I can eat glass
+    Esperanto
+        Mi povas manÄi vitron, Äi ne damaÄas min.
+
+    Braille
+        â â â â â â â â â â â â â â â â â â â â â â â
+        â â â â â â â â â ¥â â â â â 
+
+    Hindi
+        à¤®à¥à¤ à¤à¤¾à¤à¤ à¤à¤¾ à¤¸à¤à¤¤à¤¾ à¤¹à¥à¤ à¤à¤°
+        à¤®à¥à¤à¥ à¤à¤¸à¤¸à¥ à¤à¥à¤ à¤à¥à¤ à¤¨à¤¹à¥à¤
+        à¤ªà¤¹à¥à¤à¤à¤¤à¥.
+
+    See <http://www.columbia.edu/kermit/utf8.html>
+
+###
+
+###
+=head1 Beyoncé
+###
+BeyoncÃ©
+###
diff --git a/lib/Pod/t/text-options.t b/lib/Pod/t/text-options.t
index 24843e4502..e2146c26f4 100644
--- a/lib/Pod/t/text-options.t
+++ b/lib/Pod/t/text-options.t
@@ -2,7 +2,7 @@
 #
 # text-options.t -- Additional tests for Pod::Text options.
 #
-# Copyright 2002, 2004, 2006 by Russ Allbery <rra@stanford.edu>
+# Copyright 2002, 2004, 2006, 2008 by Russ Allbery <rra@stanford.edu>
 #
 # This program is free software; you may redistribute it and/or modify it
 # under the same terms as Perl itself.
diff --git a/lib/Pod/t/text-utf8.t b/lib/Pod/t/text-utf8.t
index 3d2904a426..806947827e 100755
--- a/lib/Pod/t/text-utf8.t
+++ b/lib/Pod/t/text-utf8.t
@@ -33,7 +33,6 @@ END {
 }
 
 use Pod::Text;
-use Pod::Simple;
 
 $loaded = 1;
 print "ok 1\n";
@@ -53,7 +52,6 @@ while (<DATA>) {
     }
     close TMP;
     open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n";
-    eval { binmode (\*OUT, ':encoding(utf-8)') };
     $parser->parse_from_file ('tmp.pod', \*OUT);
     close OUT;
     open (TMP, 'out.tmp') or die "Cannot open out.tmp: $!\n";
author	Steve Hay <SteveHay@planit.com>	2008-11-07 08:47:26 +0000
committer	Steve Hay <SteveHay@planit.com>	2008-11-07 08:47:26 +0000
commit	9f2f055aa1e8c86d97b5ea42473ab1747f518f3a (patch)
tree	e1212dc0d5e0cc17adc3b16631339cef8cf4e63a /lib/Pod
parent	3327bb648bca3705820d35dd728f98436ea9f8aa (diff)
download	perl-9f2f055aa1e8c86d97b5ea42473ab1747f518f3a.tar.gz