diff options
Diffstat (limited to 'cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm')
-rw-r--r-- | cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm index a1b570810d..ee6e747a6a 100644 --- a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm +++ b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm @@ -1,4 +1,3 @@ - package Pod::Simple::BlackBox; # # "What's in the box?" "Pain." @@ -23,7 +22,7 @@ use integer; # vroom! use strict; use Carp (); use vars qw($VERSION ); -$VERSION = '3.28'; +$VERSION = '3.29'; #use constant DEBUG => 7; BEGIN { require Pod::Simple; @@ -127,13 +126,25 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) # Try to guess encoding. Inlined for performance reasons. if(!$self->{'parse_characters'} && !$self->{'encoding'} && ($self->{'in_pod'} || $line =~ /^=/s) - && $line =~ /[^\x00-\x7f]/ + && $line =~ /[[:^ascii:]]/ ) { - my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1'; + my $encoding; + if (ord("A") != 65) { + + # Hard to figure out on non-ASCII platform if UTF-8 or not. This + # won't work if it isn't UTF-8, so just assume it is and hope for the + # best. It's not clear that the other encodings work on non-ASCII + # platforms anyway. + $encoding = 'UTF-8'; + } + else { + $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1'; + } $self->_handle_encoding_line( "=encoding $encoding" ); + delete $self->{'_processed_encoding'}; $self->{'_transcoder'} && $self->{'_transcoder'}->($line); - my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/; + my ($word) = $line =~ /(\S*[[:^ascii:]]\S*)/; $self->whine( $self->{'line_count'}, @@ -376,8 +387,8 @@ sub _handle_encoding_line { } push @{ $self->{'encoding_command_statuses'} }, $enc_error; if (defined($self->{'_processed_encoding'})) { - # Should never happen - die "Nested processed encoding."; + # Double declaration. + $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives'); } $self->{'_processed_encoding'} = $orig; @@ -1980,20 +1991,20 @@ sub pretty { # adopted from Class::Classless $_ eq '0' # very common case or( m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s - and $_ ne '-0' # the strange case that that RE lets thru + and $_ ne '-0' # the strange case that RE lets thru ) ) { $_; } else { - if( chr(65) eq 'A' ) { - s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> - #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; + # Yes, explicitly name every character desired. There are shorcuts one + # could make, but I (Karl Williamson) was afraid that some Perl + # releases would have bugs in some of them. For example [A-Z] works + # even on EBCDIC platforms to match exactly the 26 uppercase English + # letters, but I don't know if it has always worked without bugs. It + # seemed safest just to list the characters. + # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> + s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; - } else { - # We're in some crazy non-ASCII world! - s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; - <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; - } qq{"$_"}; } } @stuff; |