summaryrefslogtreecommitdiff
path: root/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
diff options
context:
space:
mode:
Diffstat (limited to 'cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm')
-rw-r--r--cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm43
1 files changed, 27 insertions, 16 deletions
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
index a1b570810d..ee6e747a6a 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
@@ -1,4 +1,3 @@
-
package Pod::Simple::BlackBox;
#
# "What's in the box?" "Pain."
@@ -23,7 +22,7 @@ use integer; # vroom!
use strict;
use Carp ();
use vars qw($VERSION );
-$VERSION = '3.28';
+$VERSION = '3.29';
#use constant DEBUG => 7;
BEGIN {
require Pod::Simple;
@@ -127,13 +126,25 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines)
# Try to guess encoding. Inlined for performance reasons.
if(!$self->{'parse_characters'} && !$self->{'encoding'}
&& ($self->{'in_pod'} || $line =~ /^=/s)
- && $line =~ /[^\x00-\x7f]/
+ && $line =~ /[[:^ascii:]]/
) {
- my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
+ my $encoding;
+ if (ord("A") != 65) {
+
+ # Hard to figure out on non-ASCII platform if UTF-8 or not. This
+ # won't work if it isn't UTF-8, so just assume it is and hope for the
+ # best. It's not clear that the other encodings work on non-ASCII
+ # platforms anyway.
+ $encoding = 'UTF-8';
+ }
+ else {
+ $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
+ }
$self->_handle_encoding_line( "=encoding $encoding" );
+ delete $self->{'_processed_encoding'};
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
- my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;
+ my ($word) = $line =~ /(\S*[[:^ascii:]]\S*)/;
$self->whine(
$self->{'line_count'},
@@ -376,8 +387,8 @@ sub _handle_encoding_line {
}
push @{ $self->{'encoding_command_statuses'} }, $enc_error;
if (defined($self->{'_processed_encoding'})) {
- # Should never happen
- die "Nested processed encoding.";
+ # Double declaration.
+ $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives');
}
$self->{'_processed_encoding'} = $orig;
@@ -1980,20 +1991,20 @@ sub pretty { # adopted from Class::Classless
$_ eq '0' # very common case
or(
m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
- and $_ ne '-0' # the strange case that that RE lets thru
+ and $_ ne '-0' # the strange case that RE lets thru
)
) { $_;
} else {
- if( chr(65) eq 'A' ) {
- s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
- #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
+ # Yes, explicitly name every character desired. There are shorcuts one
+ # could make, but I (Karl Williamson) was afraid that some Perl
+ # releases would have bugs in some of them. For example [A-Z] works
+ # even on EBCDIC platforms to match exactly the 26 uppercase English
+ # letters, but I don't know if it has always worked without bugs. It
+ # seemed safest just to list the characters.
+ # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
+ s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
<$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
- } else {
- # We're in some crazy non-ASCII world!
- s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
#<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
- <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
- }
qq{"$_"};
}
} @stuff;