summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Fraser <fraserbn@gmail.com>2011-07-10 11:06:47 -0700
committerFather Chrysostomos <sprout@cpan.org>2011-07-12 21:46:52 -0700
commit2435e5d3caac53d7a120839117ce82b9e4700caa (patch)
tree54c0048498609e88bcac870e085759347bfd4f64
parent9f6ec8dd8d5cdd18bbed7e4e89cec93bd2d8f9e7 (diff)
downloadperl-2435e5d3caac53d7a120839117ce82b9e4700caa.tar.gz
pad.c: flags checking for the UTF8 flag when necessary
-rw-r--r--MANIFEST1
-rw-r--r--ext/XS-APItest/t/fetch_pad_names.t321
-rw-r--r--pad.c12
3 files changed, 331 insertions, 3 deletions
diff --git a/MANIFEST b/MANIFEST
index c0ec18469a..9a12bb34e3 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3721,6 +3721,7 @@ ext/XS-APItest/t/copyhints.t test hv_copy_hints_hv() API
ext/XS-APItest/t/customop.t XS::APItest: tests for custom ops
ext/XS-APItest/t/eval-filter.t Simple source filter/eval test
ext/XS-APItest/t/exception.t XS::APItest extension
+ext/XS-APItest/t/fetch_pad_names.t Tests for UTF8 names in pad
ext/XS-APItest/t/grok.t XS::APItest: tests for grok* functions
ext/XS-APItest/t/hash.t XS::APItest: tests for hash related APIs
ext/XS-APItest/t/keyword_multiline.t test keyword plugin parsing across lines
diff --git a/ext/XS-APItest/t/fetch_pad_names.t b/ext/XS-APItest/t/fetch_pad_names.t
new file mode 100644
index 0000000000..384ca36904
--- /dev/null
+++ b/ext/XS-APItest/t/fetch_pad_names.t
@@ -0,0 +1,321 @@
+use strict;
+use warnings;
+use Encode ();
+
+use Test::More tests => 77;
+
+use XS::APItest qw( fetch_pad_names pad_scalar );
+
+local $SIG{__WARN__} = sub { warn $_[0] unless $_[0] =~ /Wide character in print at/ };
+
+ok defined &fetch_pad_names, "sub imported";
+ok defined &pad_scalar;
+
+my $cv = sub {
+ my $test;
+};
+
+ok fetch_pad_names($cv), "Fetch working.";
+is ref fetch_pad_names($cv), ref [], 'Fetch returns an arrayref';
+is @{fetch_pad_names($cv)}, 1, 'Sub has one lexical.';
+is fetch_pad_names($cv)->[0], '$test', "Fetching a simple scalar works.";
+
+$cv = sub {
+ use utf8;
+
+ my $zest = 'invariant';
+ my $zèst = 'latin-1';
+
+ return [pad_scalar(1, "zèst"), pad_scalar(1, "z\350st"), pad_scalar(1, "z\303\250st")];
+};
+
+my $names_av = fetch_pad_names($cv);
+my $flagged = my $unflagged = "\$z\x{c3}\x{a8}st";
+Encode::_utf8_on($flagged);
+
+general_tests( $cv->(), $names_av, {
+ results => [
+ { cmp => 'latin-1', msg => 'Fetches through UTF-8.' },
+ { cmp => 'latin-1', msg => 'Fetches through Latin-1.' },
+ { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." },
+ ],
+ pad_size => {
+ total => { cmp => 2, msg => 'Sub has two lexicals.' },
+ utf8 => { cmp => 0, msg => '' },
+ invariant => { cmp => 2, msg => '' },
+ },
+ vars => [
+ { name => '$zest', msg => 'Sub has [\$zest].', type => 'ok' },
+ { name => "\$z\x{e8}st", msg => "Sub has [\$t\x{e8}st].", type => 'ok' },
+ { name => $unflagged, msg => "Sub doesn't have [$unflagged].", type => 'not ok' },
+ { name => $flagged, msg => "But does have it when flagged.", type => 'ok' },
+ ],
+});
+
+$cv = do {
+ my $ascii = 'Defined';
+ sub {
+ use utf8;
+ my $партнеры = $ascii;
+ return [$партнеры, pad_scalar(1, "партнеры"), pad_scalar(1, "\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213")];
+ };
+};
+
+$names_av = fetch_pad_names($cv);
+my $hex_var = "\$\x{43f}\x{430}\x{440}\x{442}\x{43d}\x{435}\x{440}\x{44b}";
+$flagged = $unflagged = "\$\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213";
+Encode::_utf8_on($flagged);
+
+my $russian_var = do {
+ use utf8;
+ '$партнеры';
+};
+
+general_tests( $cv->(), $names_av, {
+ results => [
+ { cmp => 'Defined', msg => 'UTF-8 fetching works.' },
+ { cmp => 'Defined', msg => 'pad_scalar fetch.' },
+ { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." },
+ ],
+ pad_size => {
+ total => { cmp => 2, msg => 'Sub has two lexicals, including those it closed over.' },
+ utf8 => { cmp => 1, msg => 'UTF-8 in the pad.' },
+ invariant => { cmp => 1, msg => '' },
+ },
+ vars => [
+ { name => '$ascii', msg => 'Sub has [$ascii].', type => 'ok' },
+ { name => $russian_var, msg => "Sub has [$russian_var].", type => 'ok' },
+ { name => $hex_var, msg => "Sub has [$hex_var].", type => 'ok' },
+ { name => $unflagged, msg => "Sub doesn't have [$unflagged]", type => 'not ok' },
+ { name => $flagged, msg => "But does have it when flagged.", type => 'ok' },
+ ],
+});
+
+my $leon1 = "\$L\x{e9}on";
+my $leon2 = my $leon3 = "\$L\x{c3}\x{a9}on";
+Encode::_utf8_on($leon2);
+
+local $@;
+$cv = eval <<"END";
+ sub {
+ use utf8;
+ my \$Leon = 'Invariant';
+ my $leon1 = 'Latin-1';
+ return [ \$Leon, $leon1, $leon2, pad_scalar(1, "L\x{e9}on"), pad_scalar(1, "L\x{c3}\x{a9}on")];
+ };
+END
+
+my $err = $@;
+ok !$err, $@;
+
+$names_av = fetch_pad_names($cv);
+
+general_tests( $cv->(), $names_av, {
+ results => [
+ { cmp => 'Invariant', msg => '' },
+ { cmp => 'Latin-1', msg => "Fetched through [$leon1]" },
+ { cmp => 'Latin-1', msg => "Fetched through [$leon2]" },
+ { cmp => 'Latin-1', msg => 'pad_scalar fetch.' },
+ { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." },
+ ],
+ pad_size => {
+ total => { cmp => 2, msg => 'Sub has two lexicals' },
+ utf8 => { cmp => 0, msg => 'Latin-1 not upgraded to UTF-8.' },
+ invariant => { cmp => 2, msg => '' },
+ },
+ vars => [
+ { name => '$Leon', msg => 'Sub has [$Leon].', type => 'ok' },
+ { name => $leon1, msg => "Sub has [$leon1].", type => 'ok' },
+ { name => $leon2, msg => "Sub has [$leon2].", type => 'ok' },
+ { name => $leon3, msg => "Sub doesn't have [$leon3]", type => 'not ok' },
+ ],
+});
+
+
+{
+ use utf8;
+ my $Cèon = 4;
+ my $str1 = "\$C\x{e8}on";
+ my $str2 = my $str3 = "\$C\x{c3}\x{a8}on";
+ Encode::_utf8_on($str2);
+
+ local $@;
+ $cv = eval <<"END_EVAL";
+ sub { [ \$Cèon, $str1, $str2 ] };
+END_EVAL
+
+ $err = $@;
+ ok !$err;
+
+ $names_av = fetch_pad_names($cv);
+
+ general_tests( $cv->(), $names_av, {
+ results => [ ({ SKIP => 1 }) x 3 ],
+ pad_size => {
+ total => { cmp => 1, msg => 'Sub has one lexical, which it closed over.' },
+ utf8 => { cmp => 0, msg => '' },
+ invariant => { cmp => 1, msg => '' },
+ },
+ vars => [
+ { name => '$Ceon', msg => "Sub doesn't have [\$Ceon].", type => 'not ok' },
+ map({ { name => $_, msg => "Sub has [$_].", type => 'ok' } } $str1, $str2 ),
+ { name => $str3, msg => "Sub doesn't have [$str3]", type => 'not ok' },
+ ],
+ });
+
+}
+
+#XXX: This will most certainly break once clean stashes are out.
+$cv = sub {
+ use utf8;
+ our $戦国 = 10;
+ {
+ no strict 'refs';
+ my ($symref, $encoded_sym) = (__PACKAGE__ . "::戦国") x 2;
+ utf8::encode($encoded_sym);
+ return [ $戦国, ${$symref}, ${$encoded_sym} ];
+ }
+};
+
+my $flagged_our = my $unflagged_our = "\$\346\210\246\345\233\275";
+Encode::_utf8_on($flagged_our);
+
+$names_av = fetch_pad_names($cv);
+
+general_tests( $cv->(), $names_av, {
+ results => [
+ { cmp => '10', msg => 'Fetched UTF-8 our var.' },
+ ({ cmp => '10', msg => "Symref fetch." }) x 2,
+ ],
+ pad_size => {
+ total => { cmp => 3, msg => 'Sub has three lexicals.' },
+ utf8 => { cmp => 1, msg => 'Japanese stored as UTF-8.' },
+ invariant => { cmp => 2, msg => '' },
+ },
+ vars => [
+ { name => "\$\x{6226}\x{56fd}", msg => "Sub has [\$\x{6226}\x{56fd}].", type => 'ok' },
+ { name => $flagged_our, msg => "Sub has [$flagged_our].", type => 'ok' },
+ { name => $unflagged_our, msg => "Sub doesn't have [$unflagged_our]", type => 'not ok' },
+ ],
+});
+
+
+{
+
+use utf8;
+{
+ my $test;
+ BEGIN {
+ $test = "t\x{c3}\x{a8}st";
+ Encode::_utf8_on($test);
+ }
+ use constant test => $test;
+}
+
+$cv = sub {
+ my $tèst = 'Good';
+
+ return [
+ $tèst,
+ pad_scalar(1, "tèst"), #"UTF-8"
+ pad_scalar(1, "t\350st"), #"Latin-1"
+ pad_scalar(1, "t\x{c3}\x{a8}st"), #"Octal"
+ pad_scalar(1, test()), #'UTF-8 enc'
+ ];
+};
+
+$names_av = fetch_pad_names($cv);
+
+general_tests( $cv->(), $names_av, {
+ results => [
+ { cmp => 'Good', msg => 'Fetched through Perl.' },
+ { cmp => 'Good', msg => "pad_scalar: UTF-8 works." },
+ { cmp => 'Good', msg => "pad_scalar: Latin-1 works." },
+ { cmp => 'NOT_IN_PAD', msg => "pad_scalar: Doesn't fetch through octets." },
+ { cmp => 'Good', msg => "pad_scalar: UTF-8-through-encoding works." },
+ ],
+ pad_size => {
+ total => { cmp => 1, msg => 'Sub has one lexical.' },
+ utf8 => { cmp => 0, msg => '' },
+ invariant => { cmp => 1, msg => '' },
+ },
+ vars => [],
+});
+
+}
+
+$cv = do {
+ use utf8;
+ sub {
+ my $ニコニコ = 'katakana';
+ my $にこにこ = 'hiragana';
+
+ return [
+ $ニコニコ,
+ $にこにこ,
+ pad_scalar(1, "にこにこ"),
+ pad_scalar(1, "\x{306b}\x{3053}\x{306b}\x{3053}"),
+ pad_scalar(1, "\343\201\253\343\201\223\343\201\253\343\201\223"),
+ pad_scalar(1, "ニコニコ"),
+ pad_scalar(1, "\x{30cb}\x{30b3}\x{30cb}\x{30b3}"),
+ pad_scalar(1, "\343\203\213\343\202\263\343\203\213\343\202\263"),
+ ];
+ }
+};
+
+$names_av = fetch_pad_names($cv);
+
+general_tests( $cv->(), $names_av, {
+ results => [
+ { cmp => 'katakana', msg => '' },
+ { cmp => 'hiragana', msg => '' },
+ { cmp => 'hiragana', msg => '' },
+ { cmp => 'hiragana', msg => '' },
+ { cmp => 'NOT_IN_PAD', msg => '' },
+ { cmp => 'katakana', msg => '' },
+ { cmp => 'katakana', msg => '' },
+ { cmp => 'NOT_IN_PAD', msg => '' },
+ ],
+ pad_size => {
+ total => { cmp => 2, msg => 'Sub has two lexicals.' },
+ utf8 => { cmp => 2, msg => '' },
+ invariant => { cmp => 0, msg => '' },
+ },
+ vars => [],
+});
+
+{
+ {
+ my $utf8_e;
+ BEGIN {
+ $utf8_e = "e";
+ Encode::_utf8_on($utf8_e);
+ }
+ use constant utf8_e => $utf8_e;
+ }
+ my $e = 'Invariant';
+ is pad_scalar(1, "e"), pad_scalar(1, utf8_e), 'Fetches the same thing, even if invariant but with differing utf8ness.';
+}
+
+
+sub general_tests {
+ my ($results, $names_av, $tests) = @_;
+
+ for my $i (0..$#$results) {
+ next if $tests->{results}[$i]{SKIP};
+ is $results->[$i], $tests->{results}[$i]{cmp}, $tests->{results}[$i]{msg};
+ }
+
+ is @$names_av, $tests->{pad_size}{total}{cmp}, $tests->{pad_size}{total}{msg};
+ is grep( Encode::is_utf8($_), @$names_av), $tests->{pad_size}{utf8}{cmp};
+ is grep( !Encode::is_utf8($_), @$names_av), $tests->{pad_size}{invariant}{cmp};
+
+ for my $var (@{$tests->{vars}}) {
+ if ($var->{type} eq 'ok') {
+ ok $var->{name} ~~ $names_av, $var->{msg};
+ } else {
+ ok !($var->{name} ~~ $names_av), $var->{msg};
+ }
+ }
+
+}
diff --git a/pad.c b/pad.c
index afdc8086bb..450fe2ee6f 100644
--- a/pad.c
+++ b/pad.c
@@ -501,7 +501,7 @@ Perl_pad_add_name_pvn(pTHX_ const char *namepv, STRLEN namelen,
PERL_ARGS_ASSERT_PAD_ADD_NAME_PVN;
- if (flags & ~(padadd_OUR|padadd_STATE|padadd_NO_DUP_CHECK))
+ if (flags & ~(padadd_OUR|padadd_STATE|padadd_NO_DUP_CHECK|padadd_UTF8_NAME))
Perl_croak(aTHX_ "panic: pad_add_name_pvn illegal flag bits 0x%" UVxf,
(UV)flags);
@@ -513,7 +513,7 @@ Perl_pad_add_name_pvn(pTHX_ const char *namepv, STRLEN namelen,
pad_check_dup(namesv, flags & padadd_OUR, ourstash);
}
- offset = pad_alloc_name(namesv, flags, typestash, ourstash);
+ offset = pad_alloc_name(namesv, flags & ~padadd_UTF8_NAME, typestash, ourstash);
/* not yet introduced */
COP_SEQ_RANGE_LOW_set(namesv, PERL_PADSEQ_INTRO);
@@ -813,7 +813,7 @@ Perl_pad_findmy_pvn(pTHX_ const char *namepv, STRLEN namelen, U32 flags)
pad_peg("pad_findmy_pvn");
- if (flags)
+ if (flags & ~padadd_UTF8_NAME)
Perl_croak(aTHX_ "panic: pad_findmy_pvn illegal flag bits 0x%" UVxf,
(UV)flags);
@@ -874,6 +874,8 @@ Perl_pad_findmy_sv(pTHX_ SV *name, U32 flags)
STRLEN namelen;
PERL_ARGS_ASSERT_PAD_FINDMY_SV;
namepv = SvPV(name, namelen);
+ if (SvUTF8(name))
+ flags |= padadd_UTF8_NAME;
return pad_findmy_pvn(namepv, namelen, flags);
}
@@ -968,6 +970,10 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
PERL_ARGS_ASSERT_PAD_FINDLEX;
+ if (flags & ~padadd_UTF8_NAME)
+ Perl_croak(aTHX_ "panic: pad_findlex illegal flag bits 0x%" UVxf,
+ (UV)flags);
+
*out_flags = 0;
DEBUG_Xv(PerlIO_printf(Perl_debug_log,