diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2009-09-11 11:28:52 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2009-09-11 11:28:52 +0200 |
commit | 673c5bc2bbe759dd457a583e51e39e2108e383e0 (patch) | |
tree | e28729be5abefc5fbb041a012d74a1c4f476af17 /lib | |
parent | 94eb7880847287ea38f350b7e1f64fa31225e73a (diff) | |
download | perl-673c5bc2bbe759dd457a583e51e39e2108e383e0.tar.gz |
Move encoding::warnings to ext/
Diffstat (limited to 'lib')
-rw-r--r-- | lib/.gitignore | 1 | ||||
-rw-r--r-- | lib/encoding/warnings.pm | 239 | ||||
-rw-r--r-- | lib/encoding/warnings/t/1-warning.t | 36 | ||||
-rw-r--r-- | lib/encoding/warnings/t/2-fatal.t | 34 | ||||
-rw-r--r-- | lib/encoding/warnings/t/3-normal.t | 23 | ||||
-rw-r--r-- | lib/encoding/warnings/t/4-lexical.t | 49 |
6 files changed, 1 insertions, 381 deletions
diff --git a/lib/.gitignore b/lib/.gitignore index 4176521cc0..b267abf1cd 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -305,6 +305,7 @@ /auto /base.pm /constant.pm +/encoding /encoding.pm /fields.pm /lib.pm diff --git a/lib/encoding/warnings.pm b/lib/encoding/warnings.pm deleted file mode 100644 index 5e6aec0c8f..0000000000 --- a/lib/encoding/warnings.pm +++ /dev/null @@ -1,239 +0,0 @@ -package encoding::warnings; -$encoding::warnings::VERSION = '0.11'; - -use strict; -use 5.007; - -=head1 NAME - -encoding::warnings - Warn on implicit encoding conversions - -=head1 VERSION - -This document describes version 0.11 of encoding::warnings, released -June 5, 2007. - -=head1 SYNOPSIS - - use encoding::warnings; # or 'FATAL' to raise fatal exceptions - - utf8::encode($a = chr(20000)); # a byte-string (raw bytes) - $b = chr(20000); # a unicode-string (wide characters) - - # "Bytes implicitly upgraded into wide characters as iso-8859-1" - $c = $a . $b; - -=head1 DESCRIPTION - -=head2 Overview of the problem - -By default, there is a fundamental asymmetry in Perl's unicode model: -implicit upgrading from byte-strings to unicode-strings assumes that -they were encoded in I<ISO 8859-1 (Latin-1)>, but unicode-strings are -downgraded with UTF-8 encoding. This happens because the first 256 -codepoints in Unicode happens to agree with Latin-1. - -However, this silent upgrading can easily cause problems, if you happen -to mix unicode strings with non-Latin1 data -- i.e. byte-strings encoded -in UTF-8 or other encodings. The error will not manifest until the -combined string is written to output, at which time it would be impossible -to see where did the silent upgrading occur. - -=head2 Detecting the problem - -This module simplifies the process of diagnosing such problems. Just put -this line on top of your main program: - - use encoding::warnings; - -Afterwards, implicit upgrading of high-bit bytes will raise a warning. -Ex.: C<Bytes implicitly upgraded into wide characters as iso-8859-1 at -- line 7>. - -However, strings composed purely of ASCII code points (C<0x00>..C<0x7F>) -will I<not> trigger this warning. - -You can also make the warnings fatal by importing this module as: - - use encoding::warnings 'FATAL'; - -=head2 Solving the problem - -Most of the time, this warning occurs when a byte-string is concatenated -with a unicode-string. There are a number of ways to solve it: - -=over 4 - -=item * Upgrade both sides to unicode-strings - -If your program does not need compatibility for Perl 5.6 and earlier, -the recommended approach is to apply appropriate IO disciplines, so all -data in your program become unicode-strings. See L<encoding>, L<open> and -L<perlfunc/binmode> for how. - -=item * Downgrade both sides to byte-strings - -The other way works too, especially if you are sure that all your data -are under the same encoding, or if compatibility with older versions -of Perl is desired. - -You may downgrade strings with C<Encode::encode> and C<utf8::encode>. -See L<Encode> and L<utf8> for details. - -=item * Specify the encoding for implicit byte-string upgrading - -If you are confident that all byte-strings will be in a specific -encoding like UTF-8, I<and> need not support older versions of Perl, -use the C<encoding> pragma: - - use encoding 'utf8'; - -Similarly, this will silence warnings from this module, and preserve the -default behaviour: - - use encoding 'iso-8859-1'; - -However, note that C<use encoding> actually had three distinct effects: - -=over 4 - -=item * PerlIO layers for B<STDIN> and B<STDOUT> - -This is similar to what L<open> pragma does. - -=item * Literal conversions - -This turns I<all> literal string in your program into unicode-strings -(equivalent to a C<use utf8>), by decoding them using the specified -encoding. - -=item * Implicit upgrading for byte-strings - -This will silence warnings from this module, as shown above. - -=back - -Because literal conversions also work on empty strings, it may surprise -some people: - - use encoding 'big5'; - - my $byte_string = pack("C*", 0xA4, 0x40); - print length $a; # 2 here. - $a .= ""; # concatenating with a unicode string... - print length $a; # 1 here! - -In other words, do not C<use encoding> unless you are certain that the -program will not deal with any raw, 8-bit binary data at all. - -However, the C<Filter =E<gt> 1> flavor of C<use encoding> will I<not> -affect implicit upgrading for byte-strings, and is thus incapable of -silencing warnings from this module. See L<encoding> for more details. - -=back - -=head1 CAVEATS - -For Perl 5.9.4 or later, this module's effect is lexical. - -For Perl versions prior to 5.9.4, this module affects the whole script, -instead of inside its lexical block. - -=cut - -# Constants. -sub ASCII () { 0 } -sub LATIN1 () { 1 } -sub FATAL () { 2 } - -# Install a ${^ENCODING} handler if no other one are already in place. -sub import { - my $class = shift; - my $fatal = shift || ''; - - local $@; - return if ${^ENCODING} and ref(${^ENCODING}) ne $class; - return unless eval { require Encode; 1 }; - - my $ascii = Encode::find_encoding('us-ascii') or return; - my $latin1 = Encode::find_encoding('iso-8859-1') or return; - - # Have to undef explicitly here - undef ${^ENCODING}; - - # Install a warning handler for decode() - my $decoder = bless( - [ - $ascii, - $latin1, - (($fatal eq 'FATAL') ? 'Carp::croak' : 'Carp::carp'), - ], $class, - ); - - ${^ENCODING} = $decoder; - $^H{$class} = 1; -} - -sub unimport { - my $class = shift; - $^H{$class} = undef; - undef ${^ENCODING}; -} - -# Don't worry about source code literals. -sub cat_decode { - my $self = shift; - return $self->[LATIN1]->cat_decode(@_); -} - -# Warn if the data is not purely US-ASCII. -sub decode { - my $self = shift; - - DO_WARN: { - if ($] >= 5.009004) { - my $hints = (caller(0))[10]; - $hints->{ref($self)} or last DO_WARN; - } - - local $@; - my $rv = eval { $self->[ASCII]->decode($_[0], Encode::FB_CROAK()) }; - return $rv unless $@; - - require Carp; - no strict 'refs'; - $self->[FATAL]->( - "Bytes implicitly upgraded into wide characters as iso-8859-1" - ); - - } - - return $self->[LATIN1]->decode(@_); -} - -sub name { 'iso-8859-1' } - -1; - -__END__ - -=head1 SEE ALSO - -L<perlunicode>, L<perluniintro> - -L<open>, L<utf8>, L<encoding>, L<Encode> - -=head1 AUTHORS - -Audrey Tang - -=head1 COPYRIGHT - -Copyright 2004, 2005, 2006, 2007 by Audrey Tang E<lt>cpan@audreyt.orgE<gt>. - -This program is free software; you can redistribute it and/or modify it -under the same terms as Perl itself. - -See L<http://www.perl.com/perl/misc/Artistic.html> - -=cut diff --git a/lib/encoding/warnings/t/1-warning.t b/lib/encoding/warnings/t/1-warning.t deleted file mode 100644 index c7525aef1f..0000000000 --- a/lib/encoding/warnings/t/1-warning.t +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/perl -# $File: /member/local/autrijus/encoding-warnings//t/1-warning.t $ $Author: autrijus $ -# $Revision: #5 $ $Change: 6145 $ $DateTime: 2004-07-16T03:49:06.717424Z $ - -BEGIN { - unless (eval { require Encode } ) { - print "1..0 # Skip: no Encode\n"; - exit 0; - } -} - -use Test; -BEGIN { plan tests => 2 } - -use strict; -use encoding::warnings; -ok(encoding::warnings->VERSION); - -if ($] < 5.008) { - ok(1); - exit; -} - -my ($a, $b, $c, $ok); - -$SIG{__WARN__} = sub { - if ($_[0] =~ /upgraded/) { ok(1); exit } -}; - -utf8::encode($a = chr(20000)); -$b = chr(20000); -$c = $a . $b; - -ok($ok); - -__END__ diff --git a/lib/encoding/warnings/t/2-fatal.t b/lib/encoding/warnings/t/2-fatal.t deleted file mode 100644 index 4fc16a1df6..0000000000 --- a/lib/encoding/warnings/t/2-fatal.t +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/perl -# $File: /member/local/autrijus/encoding-warnings/t/2-fatal.t $ $Author: autrijus $ -# $Revision: #4 $ $Change: 1626 $ $DateTime: 2004-03-14T16:53:19.351256Z $ - -BEGIN { - unless (eval { require Encode } ) { - print "1..0 # Skip: no Encode\n"; - exit 0; - } -} - -use Test; -BEGIN { plan tests => 2 } - -use strict; -use encoding::warnings 'FATAL'; -ok(encoding::warnings->VERSION); - -if ($] < 5.008) { - ok(1); - exit; -} - -my ($a, $b, $c, $ok); - -$SIG{__DIE__} = sub { - if ($_[0] =~ /upgraded/) { ok(1); exit } -}; - -utf8::encode($a = chr(20000)); -$b = chr(20000); -$c = $a . $b; - -ok($ok); diff --git a/lib/encoding/warnings/t/3-normal.t b/lib/encoding/warnings/t/3-normal.t deleted file mode 100644 index f0e6446a56..0000000000 --- a/lib/encoding/warnings/t/3-normal.t +++ /dev/null @@ -1,23 +0,0 @@ -use Test; -BEGIN { plan tests => 2 } - -use strict; -use encoding::warnings 'FATAL'; -ok(encoding::warnings->VERSION); - -if ($] < 5.008) { - ok(1); - exit; -} - -my ($a, $b, $c, $ok); -$ok = 1; - -$SIG{__DIE__} = sub { $ok = 0 }; -$SIG{__WARN__} = sub { $ok = 0 }; - -$a = chr(20000); -$b = chr(20000); -$c = $a . $b; - -ok($ok); diff --git a/lib/encoding/warnings/t/4-lexical.t b/lib/encoding/warnings/t/4-lexical.t deleted file mode 100644 index e80c50411a..0000000000 --- a/lib/encoding/warnings/t/4-lexical.t +++ /dev/null @@ -1,49 +0,0 @@ -use strict; -use Test; -BEGIN { - use Config; - if ($Config::Config{'extensions'} !~ /\bEncode\b/) { - print "1..0 # Skip: Encode was not built\n"; - exit 0; - } - - plan tests => 3; -} - -{ - use encoding::warnings; - ok(encoding::warnings->VERSION); - - if ($] < 5.009004) { - ok('skipped'); - ok('skipped'); - exit; - } - - my ($a, $b, $c, $warned); - - local $SIG{__WARN__} = sub { - if ($_[0] =~ /upgraded/) { $warned = 1 } - }; - - utf8::encode($a = chr(20000)); - $b = chr(20000); - $c = $a . $b; - ok($warned); -} - -{ - my ($a, $b, $c, $warned); - - local $SIG{__WARN__} = sub { - if ($_[0] =~ /upgraded/) { $warned = 1 } - }; - - utf8::encode($a = chr(20000)); - $b = chr(20000); - $c = $a . $b; - ok(!$warned); -} - - -__END__ |