diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Pod/Man.pm | 33 | ||||
-rw-r--r-- | lib/Pod/ParseLink.pm | 7 | ||||
-rw-r--r-- | lib/Pod/Text.pm | 69 | ||||
-rw-r--r-- | lib/Pod/Text/Color.pm | 2 | ||||
-rw-r--r-- | lib/Pod/Text/Overstrike.pm | 2 | ||||
-rw-r--r-- | lib/Pod/Text/Termcap.pm | 2 | ||||
-rw-r--r-- | lib/Pod/t/man-utf8.t | 2 | ||||
-rw-r--r-- | lib/Pod/t/pod-spelling.t | 12 | ||||
-rw-r--r-- | lib/Pod/t/text-encoding.t | 141 | ||||
-rw-r--r-- | lib/Pod/t/text-options.t | 2 | ||||
-rwxr-xr-x | lib/Pod/t/text-utf8.t | 2 |
11 files changed, 256 insertions, 18 deletions
diff --git a/lib/Pod/Man.pm b/lib/Pod/Man.pm index 48fe20ed6a..85e4ac8c01 100644 --- a/lib/Pod/Man.pm +++ b/lib/Pod/Man.pm @@ -36,7 +36,7 @@ use POSIX qw(strftime); @ISA = qw(Pod::Simple); -$VERSION = '2.20'; +$VERSION = '2.21'; # Set the debugging level. If someone has inserted a debug function into this # class already, use that. Otherwise, use any Pod::Simple debug function @@ -736,6 +736,19 @@ sub start_document { return; } + # If we were given the utf8 option, set an output encoding on our file + # handle. Wrap in an eval in case we're using a version of Perl too old + # to understand this. + # + # This is evil because it changes the global state of a file handle that + # we may not own. However, we can't just blindly encode all output, since + # there may be a pre-applied output encoding (such as from PERL_UNICODE) + # and then we would double-encode. This seems to be the least bad + # approach. + if ($$self{utf8}) { + eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') }; + } + # Determine information for the preamble and then output it. my ($name, $section); if (defined $$self{name}) { @@ -1608,6 +1621,12 @@ be warned that *roff source with literal UTF-8 characters is not supported by many implementations and may even result in segfaults and other bad behavior. +Be aware that, when using this option, the input encoding of your POD +source must be properly declared unless it is US-ASCII or Latin-1. POD +input without an C<=encoding> command will be assumed to be in Latin-1, +and if it's actually in UTF-8, the output will be double-encoded. See +L<perlpod(1)> for more information on the C<=encoding> command. + =back The standard Pod::Simple method parse_file() takes one argument naming the @@ -1643,10 +1662,14 @@ invalid. A quote specification must be one, two, or four characters long. =head1 BUGS +Encoding handling assumes that PerlIO is available and does not work +properly if it isn't. The C<utf8> option is therefore not supported +unless Perl is built with PerlIO support. + There is currently no way to turn off the guesswork that tries to format unmarked text appropriately, and sometimes it isn't wanted (particularly when using POD to document something other than Perl). Most of the work -towards fixing this has now been done, however, and all that's still needed +toward fixing this has now been done, however, and all that's still needed is a user interface. The NAME section should be recognized specially and index entries emitted @@ -1668,6 +1691,12 @@ Pod::Man is excessively slow. =head1 CAVEATS +If Pod::Man is given the C<utf8> option, the encoding of its output file +handle will be forced to UTF-8 if possible, overriding any existing +encoding. This will be done even if the file handle is not created by +Pod::Man and was passed in from outside. This maintains consistency +regardless of PERL_UNICODE and other settings. + The handling of hyphens and em dashes is somewhat fragile, and one may get the wrong one under some circumstances. This should only matter for B<troff> output. diff --git a/lib/Pod/ParseLink.pm b/lib/Pod/ParseLink.pm index d35478827d..7cb2d656f6 100644 --- a/lib/Pod/ParseLink.pm +++ b/lib/Pod/ParseLink.pm @@ -30,7 +30,7 @@ use Exporter; @ISA = qw(Exporter); @EXPORT = qw(parselink); -$VERSION = 1.08; +$VERSION = '1.09'; ############################################################################## # Implementation @@ -140,8 +140,9 @@ and the section, anchor text, and inferred anchor text may contain any formatting codes. Any double quotes around the section are removed as part of the parsing, as is any leading or trailing whitespace. -If the text of the LE<lt>E<gt> escape is entirely enclosed in double quotes, -it's interpreted as a link to a section for backwards compatibility. +If the text of the LE<lt>E<gt> escape is entirely enclosed in double +quotes, it's interpreted as a link to a section for backward +compatibility. No attempt is made to resolve formatting codes. This must be done after calling parselink() (since EE<lt>E<gt> formatting codes can be used to diff --git a/lib/Pod/Text.pm b/lib/Pod/Text.pm index 98dd434d6d..f363303869 100644 --- a/lib/Pod/Text.pm +++ b/lib/Pod/Text.pm @@ -37,7 +37,7 @@ use Pod::Simple (); # We have to export pod2text for backward compatibility. @EXPORT = qw(pod2text); -$VERSION = 3.11; +$VERSION = '3.12'; ############################################################################## # Initialization @@ -246,10 +246,19 @@ sub reformat { } # Output text to the output device. Replace non-breaking spaces with spaces -# and soft hyphens with nothing. +# and soft hyphens with nothing, and then try to fix the output encoding if +# necessary to match the input encoding unless UTF-8 output is forced. This +# preserves the traditional pass-through behavior of Pod::Text. sub output { my ($self, $text) = @_; $text =~ tr/\240\255/ /d; + unless ($$self{opt_utf8} || $$self{CHECKED_ENCODING}) { + my $encoding = $$self{encoding} || ''; + if ($encoding) { + eval { binmode ($$self{output_fh}, ":encoding($encoding)") }; + } + $$self{CHECKED_ENCODING} = 1; + } print { $$self{output_fh} } $text; } @@ -272,6 +281,22 @@ sub start_document { $$self{MARGIN} = $margin; # Default left margin. $$self{PENDING} = [[]]; # Pending output. + # We have to redo encoding handling for each document. + delete $$self{CHECKED_ENCODING}; + + # If we were given the utf8 option, set an output encoding on our file + # handle. Wrap in an eval in case we're using a version of Perl too old + # to understand this. + # + # This is evil because it changes the global state of a file handle that + # we may not own. However, we can't just blindly encode all output, since + # there may be a pre-applied output encoding (such as from PERL_UNICODE) + # and then we would double-encode. This seems to be the least bad + # approach. + if ($$self{opt_utf8}) { + eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') }; + } + return ''; } @@ -640,7 +665,7 @@ __END__ Pod::Text - Convert POD data to formatted ASCII text =for stopwords -alt stderr Allbery Sean Burke's Christiansen +alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 =head1 SYNOPSIS @@ -725,6 +750,19 @@ single space. Defaults to true. Send error messages about invalid POD to standard error instead of appending a POD ERRORS section to the generated output. +=item utf8 + +By default, Pod::Text uses the same output encoding as the input encoding +of the POD source (provided that Perl was built with PerlIO; otherwise, it +doesn't encode its output). If this option is given, the output encoding +is forced to UTF-8. + +Be aware that, when using this option, the input encoding of your POD +source must be properly declared unless it is US-ASCII or Latin-1. POD +input without an C<=encoding> command will be assumed to be in Latin-1, +and if it's actually in UTF-8, the output will be double-encoded. See +L<perlpod(1)> for more information on the C<=encoding> command. + =item width The column at which to wrap text on the right-hand side. Defaults to 76. @@ -759,6 +797,29 @@ invalid. A quote specification must be one, two, or four characters long. =back +=head1 BUGS + +Encoding handling assumes that PerlIO is available and does not work +properly if it isn't. The C<utf8> option is therefore not supported +unless Perl is built with PerlIO support. + +=head1 CAVEATS + +If Pod::Text is given the C<utf8> option, the encoding of its output file +handle will be forced to UTF-8 if possible, overriding any existing +encoding. This will be done even if the file handle is not created by +Pod::Text and was passed in from outside. This maintains consistency +regardless of PERL_UNICODE and other settings. + +If the C<utf8> option is not given, the encoding of its output file handle +will be forced to the detected encoding of the input POD, which preserves +whatever the input text is. This ensures backward compatibility with +earlier, pre-Unicode versions of this module, without large numbers of +Perl warnings. + +This is not ideal, but it seems to be the best compromise. If it doesn't +work for you, please let me know the details of how it broke. + =head1 NOTES This is a replacement for an earlier Pod::Text module written by Tom @@ -774,7 +835,7 @@ subclass of it does. Look for L<Pod::Text::Termcap>. =head1 SEE ALSO -L<Pod::Simple>, L<Pod::Text::Termcap>, L<pod2text(1)> +L<Pod::Simple>, L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)> The current version of this module is always available from its web site at L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the diff --git a/lib/Pod/Text/Color.pm b/lib/Pod/Text/Color.pm index 6f8a78f54e..517f5d0458 100644 --- a/lib/Pod/Text/Color.pm +++ b/lib/Pod/Text/Color.pm @@ -25,7 +25,7 @@ use vars qw(@ISA $VERSION); @ISA = qw(Pod::Text); -$VERSION = 2.04; +$VERSION = '2.05'; ############################################################################## # Overrides diff --git a/lib/Pod/Text/Overstrike.pm b/lib/Pod/Text/Overstrike.pm index 00b505d44d..a76fc28f8e 100644 --- a/lib/Pod/Text/Overstrike.pm +++ b/lib/Pod/Text/Overstrike.pm @@ -34,7 +34,7 @@ use vars qw(@ISA $VERSION); @ISA = qw(Pod::Text); -$VERSION = 2.02; +$VERSION = '2.03'; ############################################################################## # Overrides diff --git a/lib/Pod/Text/Termcap.pm b/lib/Pod/Text/Termcap.pm index 51d39ae7e7..4a75b30251 100644 --- a/lib/Pod/Text/Termcap.pm +++ b/lib/Pod/Text/Termcap.pm @@ -26,7 +26,7 @@ use vars qw(@ISA $VERSION); @ISA = qw(Pod::Text); -$VERSION = 2.04; +$VERSION = '2.05'; ############################################################################## # Overrides diff --git a/lib/Pod/t/man-utf8.t b/lib/Pod/t/man-utf8.t index a53208baad..8b44d6b290 100644 --- a/lib/Pod/t/man-utf8.t +++ b/lib/Pod/t/man-utf8.t @@ -39,6 +39,7 @@ print "ok 1\n"; my $n = 2; eval { binmode (\*DATA, ':encoding(utf-8)') }; +eval { binmode (\*STDOUT, ':encoding(utf-8)') }; while (<DATA>) { my %options; next until $_ eq "###\n"; @@ -57,7 +58,6 @@ while (<DATA>) { close TMP; my $parser = Pod::Man->new (%options) or die "Cannot create parser\n"; open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n"; - eval { binmode (\*OUT, ':encoding(utf-8)') }; $parser->parse_from_file ('tmp.pod', \*OUT); close OUT; my $accents = 0; diff --git a/lib/Pod/t/pod-spelling.t b/lib/Pod/t/pod-spelling.t index c13fb9d587..41c902782e 100644 --- a/lib/Pod/t/pod-spelling.t +++ b/lib/Pod/t/pod-spelling.t @@ -1,14 +1,22 @@ #!/usr/bin/perl # # t/pod-spelling.t -- Test POD spelling. +# +# Copyright 2008 Russ Allbery <rra@stanford.edu> +# +# This program is free software; you may redistribute it and/or modify it +# under the same terms as Perl itself. # Called to skip all tests with a reason. sub skip_all { - print "1..1\n"; - print "ok 1 # skip - @_\n"; + print "1..0 # Skipped: @_\n"; exit; } +# Skip all spelling tests unless flagged to run maintainer tests. +skip_all "Spelling tests only run for maintainer" + unless $ENV{RRA_MAINTAINER_TESTS}; + # Make sure we have prerequisites. hunspell is currently not supported due to # lack of support for contractions. eval 'use Test::Pod 1.00'; diff --git a/lib/Pod/t/text-encoding.t b/lib/Pod/t/text-encoding.t new file mode 100644 index 0000000000..2d624808bf --- /dev/null +++ b/lib/Pod/t/text-encoding.t @@ -0,0 +1,141 @@ +#!/usr/bin/perl -w +# +# text-encoding.t -- Test Pod::Text with various weird encoding combinations. +# +# Copyright 2002, 2004, 2006, 2007, 2008 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you may redistribute it and/or modify it +# under the same terms as Perl itself. + +BEGIN { + chdir 't' if -d 't'; + if ($ENV{PERL_CORE}) { + @INC = '../lib'; + } else { + unshift (@INC, '../blib/lib'); + } + unshift (@INC, '../blib/lib'); + $| = 1; + print "1..4\n"; + + # PerlIO encoding support requires Perl 5.8 or later. + if ($] < 5.008) { + my $n; + for $n (1..4) { + print "ok $n # skip -- Perl 5.8 required for UTF-8 support\n"; + } + exit; + } +} + +END { + print "not ok 1\n" unless $loaded; +} + +use Pod::Text; + +$loaded = 1; +print "ok 1\n"; + +my $n = 2; +eval { binmode (\*DATA, ':raw') }; +eval { binmode (\*STDOUT, ':raw') }; +while (<DATA>) { + my %opts; + $opts{utf8} = 1 if $n == 4; + my $parser = Pod::Text->new (%opts) or die "Cannot create parser\n"; + next until $_ eq "###\n"; + open (TMP, '> tmp.pod') or die "Cannot create tmp.pod: $!\n"; + eval { binmode (\*TMP, ':raw') }; + while (<DATA>) { + last if $_ eq "###\n"; + print TMP $_; + } + close TMP; + open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n"; + $parser->parse_from_file ('tmp.pod', \*OUT); + close OUT; + open (TMP, 'out.tmp') or die "Cannot open out.tmp: $!\n"; + eval { binmode (\*TMP, ':raw') }; + my $output; + { + local $/; + $output = <TMP>; + } + close TMP; + unlink ('tmp.pod', 'out.tmp'); + my $expected = ''; + while (<DATA>) { + last if $_ eq "###\n"; + $expected .= $_; + } + if ($output eq $expected) { + print "ok $n\n"; + } else { + print "not ok $n\n"; + print "Expected\n========\n$expected\nOutput\n======\n$output\n"; + } + $n++; +} + +# Below the marker are bits of POD and corresponding expected text output. +# This is used to test specific features or problems with Pod::Text. The +# input and output are separated by lines containing only ###. + +__DATA__ + +### +=head1 Test of SE<lt>E<gt> + +This is S<some whitespace>. +### +Test of S<> + This is some whitespace. + +### + +### +=encoding utf-8 + +=head1 I can eat glass + +=over 4 + +=item Esperanto + +Mi povas manÄi vitron, Äi ne damaÄas min. + +=item Braille + +â â â â â â â â â â â â â â â â â â â â â â â â â â â â â â â â ¥â â â â â + +=item Hindi + +मà¥à¤ à¤à¤¾à¤à¤ à¤à¤¾ सà¤à¤¤à¤¾ हà¥à¤ à¤à¤° मà¥à¤à¥ à¤à¤¸à¤¸à¥ à¤à¥à¤ à¤à¥à¤ नहà¥à¤ पहà¥à¤à¤à¤¤à¥. + +=back + +See L<http://www.columbia.edu/kermit/utf8.html> +### +I can eat glass + Esperanto + Mi povas manÄi vitron, Äi ne damaÄas min. + + Braille + â â â â â â â â â â â â â â â â â â â â â â â + â â â â â â â â â ¥â â â â â + + Hindi + मà¥à¤ à¤à¤¾à¤à¤ à¤à¤¾ सà¤à¤¤à¤¾ हà¥à¤ à¤à¤° + मà¥à¤à¥ à¤à¤¸à¤¸à¥ à¤à¥à¤ à¤à¥à¤ नहà¥à¤ + पहà¥à¤à¤à¤¤à¥. + + See <http://www.columbia.edu/kermit/utf8.html> + +### + +### +=head1 Beyoncé +### +Beyoncé +### diff --git a/lib/Pod/t/text-options.t b/lib/Pod/t/text-options.t index 24843e4502..e2146c26f4 100644 --- a/lib/Pod/t/text-options.t +++ b/lib/Pod/t/text-options.t @@ -2,7 +2,7 @@ # # text-options.t -- Additional tests for Pod::Text options. # -# Copyright 2002, 2004, 2006 by Russ Allbery <rra@stanford.edu> +# Copyright 2002, 2004, 2006, 2008 by Russ Allbery <rra@stanford.edu> # # This program is free software; you may redistribute it and/or modify it # under the same terms as Perl itself. diff --git a/lib/Pod/t/text-utf8.t b/lib/Pod/t/text-utf8.t index 3d2904a426..806947827e 100755 --- a/lib/Pod/t/text-utf8.t +++ b/lib/Pod/t/text-utf8.t @@ -33,7 +33,6 @@ END { } use Pod::Text; -use Pod::Simple; $loaded = 1; print "ok 1\n"; @@ -53,7 +52,6 @@ while (<DATA>) { } close TMP; open (OUT, '> out.tmp') or die "Cannot create out.tmp: $!\n"; - eval { binmode (\*OUT, ':encoding(utf-8)') }; $parser->parse_from_file ('tmp.pod', \*OUT); close OUT; open (TMP, 'out.tmp') or die "Cannot open out.tmp: $!\n"; |