summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRafael Garcia-Suarez <rgarciasuarez@gmail.com>2009-09-11 11:28:52 +0200
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2009-09-11 11:28:52 +0200
commit673c5bc2bbe759dd457a583e51e39e2108e383e0 (patch)
treee28729be5abefc5fbb041a012d74a1c4f476af17 /lib
parent94eb7880847287ea38f350b7e1f64fa31225e73a (diff)
downloadperl-673c5bc2bbe759dd457a583e51e39e2108e383e0.tar.gz
Move encoding::warnings to ext/
Diffstat (limited to 'lib')
-rw-r--r--lib/.gitignore1
-rw-r--r--lib/encoding/warnings.pm239
-rw-r--r--lib/encoding/warnings/t/1-warning.t36
-rw-r--r--lib/encoding/warnings/t/2-fatal.t34
-rw-r--r--lib/encoding/warnings/t/3-normal.t23
-rw-r--r--lib/encoding/warnings/t/4-lexical.t49
6 files changed, 1 insertions, 381 deletions
diff --git a/lib/.gitignore b/lib/.gitignore
index 4176521cc0..b267abf1cd 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -305,6 +305,7 @@
/auto
/base.pm
/constant.pm
+/encoding
/encoding.pm
/fields.pm
/lib.pm
diff --git a/lib/encoding/warnings.pm b/lib/encoding/warnings.pm
deleted file mode 100644
index 5e6aec0c8f..0000000000
--- a/lib/encoding/warnings.pm
+++ /dev/null
@@ -1,239 +0,0 @@
-package encoding::warnings;
-$encoding::warnings::VERSION = '0.11';
-
-use strict;
-use 5.007;
-
-=head1 NAME
-
-encoding::warnings - Warn on implicit encoding conversions
-
-=head1 VERSION
-
-This document describes version 0.11 of encoding::warnings, released
-June 5, 2007.
-
-=head1 SYNOPSIS
-
- use encoding::warnings; # or 'FATAL' to raise fatal exceptions
-
- utf8::encode($a = chr(20000)); # a byte-string (raw bytes)
- $b = chr(20000); # a unicode-string (wide characters)
-
- # "Bytes implicitly upgraded into wide characters as iso-8859-1"
- $c = $a . $b;
-
-=head1 DESCRIPTION
-
-=head2 Overview of the problem
-
-By default, there is a fundamental asymmetry in Perl's unicode model:
-implicit upgrading from byte-strings to unicode-strings assumes that
-they were encoded in I<ISO 8859-1 (Latin-1)>, but unicode-strings are
-downgraded with UTF-8 encoding. This happens because the first 256
-codepoints in Unicode happens to agree with Latin-1.
-
-However, this silent upgrading can easily cause problems, if you happen
-to mix unicode strings with non-Latin1 data -- i.e. byte-strings encoded
-in UTF-8 or other encodings. The error will not manifest until the
-combined string is written to output, at which time it would be impossible
-to see where did the silent upgrading occur.
-
-=head2 Detecting the problem
-
-This module simplifies the process of diagnosing such problems. Just put
-this line on top of your main program:
-
- use encoding::warnings;
-
-Afterwards, implicit upgrading of high-bit bytes will raise a warning.
-Ex.: C<Bytes implicitly upgraded into wide characters as iso-8859-1 at
-- line 7>.
-
-However, strings composed purely of ASCII code points (C<0x00>..C<0x7F>)
-will I<not> trigger this warning.
-
-You can also make the warnings fatal by importing this module as:
-
- use encoding::warnings 'FATAL';
-
-=head2 Solving the problem
-
-Most of the time, this warning occurs when a byte-string is concatenated
-with a unicode-string. There are a number of ways to solve it:
-
-=over 4
-
-=item * Upgrade both sides to unicode-strings
-
-If your program does not need compatibility for Perl 5.6 and earlier,
-the recommended approach is to apply appropriate IO disciplines, so all
-data in your program become unicode-strings. See L<encoding>, L<open> and
-L<perlfunc/binmode> for how.
-
-=item * Downgrade both sides to byte-strings
-
-The other way works too, especially if you are sure that all your data
-are under the same encoding, or if compatibility with older versions
-of Perl is desired.
-
-You may downgrade strings with C<Encode::encode> and C<utf8::encode>.
-See L<Encode> and L<utf8> for details.
-
-=item * Specify the encoding for implicit byte-string upgrading
-
-If you are confident that all byte-strings will be in a specific
-encoding like UTF-8, I<and> need not support older versions of Perl,
-use the C<encoding> pragma:
-
- use encoding 'utf8';
-
-Similarly, this will silence warnings from this module, and preserve the
-default behaviour:
-
- use encoding 'iso-8859-1';
-
-However, note that C<use encoding> actually had three distinct effects:
-
-=over 4
-
-=item * PerlIO layers for B<STDIN> and B<STDOUT>
-
-This is similar to what L<open> pragma does.
-
-=item * Literal conversions
-
-This turns I<all> literal string in your program into unicode-strings
-(equivalent to a C<use utf8>), by decoding them using the specified
-encoding.
-
-=item * Implicit upgrading for byte-strings
-
-This will silence warnings from this module, as shown above.
-
-=back
-
-Because literal conversions also work on empty strings, it may surprise
-some people:
-
- use encoding 'big5';
-
- my $byte_string = pack("C*", 0xA4, 0x40);
- print length $a; # 2 here.
- $a .= ""; # concatenating with a unicode string...
- print length $a; # 1 here!
-
-In other words, do not C<use encoding> unless you are certain that the
-program will not deal with any raw, 8-bit binary data at all.
-
-However, the C<Filter =E<gt> 1> flavor of C<use encoding> will I<not>
-affect implicit upgrading for byte-strings, and is thus incapable of
-silencing warnings from this module. See L<encoding> for more details.
-
-=back
-
-=head1 CAVEATS
-
-For Perl 5.9.4 or later, this module's effect is lexical.
-
-For Perl versions prior to 5.9.4, this module affects the whole script,
-instead of inside its lexical block.
-
-=cut
-
-# Constants.
-sub ASCII () { 0 }
-sub LATIN1 () { 1 }
-sub FATAL () { 2 }
-
-# Install a ${^ENCODING} handler if no other one are already in place.
-sub import {
- my $class = shift;
- my $fatal = shift || '';
-
- local $@;
- return if ${^ENCODING} and ref(${^ENCODING}) ne $class;
- return unless eval { require Encode; 1 };
-
- my $ascii = Encode::find_encoding('us-ascii') or return;
- my $latin1 = Encode::find_encoding('iso-8859-1') or return;
-
- # Have to undef explicitly here
- undef ${^ENCODING};
-
- # Install a warning handler for decode()
- my $decoder = bless(
- [
- $ascii,
- $latin1,
- (($fatal eq 'FATAL') ? 'Carp::croak' : 'Carp::carp'),
- ], $class,
- );
-
- ${^ENCODING} = $decoder;
- $^H{$class} = 1;
-}
-
-sub unimport {
- my $class = shift;
- $^H{$class} = undef;
- undef ${^ENCODING};
-}
-
-# Don't worry about source code literals.
-sub cat_decode {
- my $self = shift;
- return $self->[LATIN1]->cat_decode(@_);
-}
-
-# Warn if the data is not purely US-ASCII.
-sub decode {
- my $self = shift;
-
- DO_WARN: {
- if ($] >= 5.009004) {
- my $hints = (caller(0))[10];
- $hints->{ref($self)} or last DO_WARN;
- }
-
- local $@;
- my $rv = eval { $self->[ASCII]->decode($_[0], Encode::FB_CROAK()) };
- return $rv unless $@;
-
- require Carp;
- no strict 'refs';
- $self->[FATAL]->(
- "Bytes implicitly upgraded into wide characters as iso-8859-1"
- );
-
- }
-
- return $self->[LATIN1]->decode(@_);
-}
-
-sub name { 'iso-8859-1' }
-
-1;
-
-__END__
-
-=head1 SEE ALSO
-
-L<perlunicode>, L<perluniintro>
-
-L<open>, L<utf8>, L<encoding>, L<Encode>
-
-=head1 AUTHORS
-
-Audrey Tang
-
-=head1 COPYRIGHT
-
-Copyright 2004, 2005, 2006, 2007 by Audrey Tang E<lt>cpan@audreyt.orgE<gt>.
-
-This program is free software; you can redistribute it and/or modify it
-under the same terms as Perl itself.
-
-See L<http://www.perl.com/perl/misc/Artistic.html>
-
-=cut
diff --git a/lib/encoding/warnings/t/1-warning.t b/lib/encoding/warnings/t/1-warning.t
deleted file mode 100644
index c7525aef1f..0000000000
--- a/lib/encoding/warnings/t/1-warning.t
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/perl
-# $File: /member/local/autrijus/encoding-warnings//t/1-warning.t $ $Author: autrijus $
-# $Revision: #5 $ $Change: 6145 $ $DateTime: 2004-07-16T03:49:06.717424Z $
-
-BEGIN {
- unless (eval { require Encode } ) {
- print "1..0 # Skip: no Encode\n";
- exit 0;
- }
-}
-
-use Test;
-BEGIN { plan tests => 2 }
-
-use strict;
-use encoding::warnings;
-ok(encoding::warnings->VERSION);
-
-if ($] < 5.008) {
- ok(1);
- exit;
-}
-
-my ($a, $b, $c, $ok);
-
-$SIG{__WARN__} = sub {
- if ($_[0] =~ /upgraded/) { ok(1); exit }
-};
-
-utf8::encode($a = chr(20000));
-$b = chr(20000);
-$c = $a . $b;
-
-ok($ok);
-
-__END__
diff --git a/lib/encoding/warnings/t/2-fatal.t b/lib/encoding/warnings/t/2-fatal.t
deleted file mode 100644
index 4fc16a1df6..0000000000
--- a/lib/encoding/warnings/t/2-fatal.t
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/perl
-# $File: /member/local/autrijus/encoding-warnings/t/2-fatal.t $ $Author: autrijus $
-# $Revision: #4 $ $Change: 1626 $ $DateTime: 2004-03-14T16:53:19.351256Z $
-
-BEGIN {
- unless (eval { require Encode } ) {
- print "1..0 # Skip: no Encode\n";
- exit 0;
- }
-}
-
-use Test;
-BEGIN { plan tests => 2 }
-
-use strict;
-use encoding::warnings 'FATAL';
-ok(encoding::warnings->VERSION);
-
-if ($] < 5.008) {
- ok(1);
- exit;
-}
-
-my ($a, $b, $c, $ok);
-
-$SIG{__DIE__} = sub {
- if ($_[0] =~ /upgraded/) { ok(1); exit }
-};
-
-utf8::encode($a = chr(20000));
-$b = chr(20000);
-$c = $a . $b;
-
-ok($ok);
diff --git a/lib/encoding/warnings/t/3-normal.t b/lib/encoding/warnings/t/3-normal.t
deleted file mode 100644
index f0e6446a56..0000000000
--- a/lib/encoding/warnings/t/3-normal.t
+++ /dev/null
@@ -1,23 +0,0 @@
-use Test;
-BEGIN { plan tests => 2 }
-
-use strict;
-use encoding::warnings 'FATAL';
-ok(encoding::warnings->VERSION);
-
-if ($] < 5.008) {
- ok(1);
- exit;
-}
-
-my ($a, $b, $c, $ok);
-$ok = 1;
-
-$SIG{__DIE__} = sub { $ok = 0 };
-$SIG{__WARN__} = sub { $ok = 0 };
-
-$a = chr(20000);
-$b = chr(20000);
-$c = $a . $b;
-
-ok($ok);
diff --git a/lib/encoding/warnings/t/4-lexical.t b/lib/encoding/warnings/t/4-lexical.t
deleted file mode 100644
index e80c50411a..0000000000
--- a/lib/encoding/warnings/t/4-lexical.t
+++ /dev/null
@@ -1,49 +0,0 @@
-use strict;
-use Test;
-BEGIN {
- use Config;
- if ($Config::Config{'extensions'} !~ /\bEncode\b/) {
- print "1..0 # Skip: Encode was not built\n";
- exit 0;
- }
-
- plan tests => 3;
-}
-
-{
- use encoding::warnings;
- ok(encoding::warnings->VERSION);
-
- if ($] < 5.009004) {
- ok('skipped');
- ok('skipped');
- exit;
- }
-
- my ($a, $b, $c, $warned);
-
- local $SIG{__WARN__} = sub {
- if ($_[0] =~ /upgraded/) { $warned = 1 }
- };
-
- utf8::encode($a = chr(20000));
- $b = chr(20000);
- $c = $a . $b;
- ok($warned);
-}
-
-{
- my ($a, $b, $c, $warned);
-
- local $SIG{__WARN__} = sub {
- if ($_[0] =~ /upgraded/) { $warned = 1 }
- };
-
- utf8::encode($a = chr(20000));
- $b = chr(20000);
- $c = $a . $b;
- ok(!$warned);
-}
-
-
-__END__