diff options
author | David Mitchell <davem@iabyn.com> | 2011-10-25 15:42:44 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2012-06-13 13:25:50 +0100 |
commit | c4b0eb7f70e4f46549b40591da99f8705364650b (patch) | |
tree | 857278fb723022df08d5f9f8c1804faff8a6f94a | |
parent | 8a45afe535d962511dc34619dcdb405aeff849da (diff) | |
download | perl-c4b0eb7f70e4f46549b40591da99f8705364650b.tar.gz |
fix for overload/stringfy and pp_regcomp
(bug found by code inspection)
The code in pp_regcomp to make a mortal copy of the pattern string
in the case of overload or utf8+'use bytes', missed the case of overload
with utf8. Fix this and at the same time simplify the code.
-rw-r--r-- | lib/overload.t | 41 | ||||
-rw-r--r-- | pp_ctl.c | 29 |
2 files changed, 47 insertions, 23 deletions
diff --git a/lib/overload.t b/lib/overload.t index 4be12603d6..b9316207ea 100644 --- a/lib/overload.t +++ b/lib/overload.t @@ -48,7 +48,7 @@ package main; $| = 1; BEGIN { require './test.pl' } -plan tests => 4983; +plan tests => 4995; use Scalar::Util qw(tainted); @@ -2186,6 +2186,45 @@ fresh_perl_is 'Error message when sub stub is encountered'; } +{ + # check that the right number of stringifications + # and the correct un-utf8-ifying happen on regex compile + package utf8_match; + my $c; + use overload '""' => sub { $c++; $_[0][0] ? "^\x{100}\$" : "^A\$"; }; + my $o = bless [0], 'utf8_match'; + + $o->[0] = 0; + $c = 0; + ::ok("A" =~ "^A\$", "regex stringify utf8=0 ol=0 bytes=0"); + ::ok("A" =~ $o, "regex stringify utf8=0 ol=1 bytes=0"); + ::is($c, 1, "regex stringify utf8=0 ol=1 bytes=0 count"); + + $o->[0] = 1; + $c = 0; + ::ok("\x{100}" =~ "^\x{100}\$", + "regex stringify utf8=1 ol=0 bytes=0"); + ::ok("\x{100}" =~ $o, "regex stringify utf8=1 ol=1 bytes=0"); + ::is($c, 1, "regex stringify utf8=1 ol=1 bytes=0 count"); + + use bytes; + + $o->[0] = 0; + $c = 0; + ::ok("A" =~ "^A\$", "regex stringify utf8=0 ol=0 bytes=1"); + ::ok("A" =~ $o, "regex stringify utf8=0 ol=1 bytes=1"); + ::is($c, 1, "regex stringify utf8=0 ol=1 bytes=1 count"); + + $o->[0] = 1; + $c = 0; + ::ok("\xc4\x80" =~ "^\x{100}\$", + "regex stringify utf8=1 ol=0 bytes=1"); + ::ok("\xc4\x80" =~ $o, "regex stringify utf8=1 ol=1 bytes=1"); + ::is($c, 1, "regex stringify utf8=1 ol=1 bytes=1 count"); + + +} + { # undefining the overload stash -- KEEP THIS TEST LAST package ant; use overload '+' => 'onion'; @@ -205,28 +205,13 @@ PP(pp_regcomp) if (PL_op->op_flags & OPf_SPECIAL) PL_reginterp_cnt = I32_MAX; /* Mark as safe. */ - if (DO_UTF8(tmpstr)) { - assert (SvUTF8(tmpstr)); - } else if (SvUTF8(tmpstr)) { - /* Not doing UTF-8, despite what the SV says. Is this only if - we're trapped in use 'bytes'? */ - /* Make a copy of the octet sequence, but without the flag on, - as the compiler now honours the SvUTF8 flag on tmpstr. */ - STRLEN len; - const char *const p = SvPV(tmpstr, len); - tmpstr = newSVpvn_flags(p, len, SVs_TEMP); - } - else if (SvAMAGIC(tmpstr)) { - /* make a copy to avoid extra stringifies */ - tmpstr = newSVpvn_flags(t, len, SVs_TEMP | SvUTF8(tmpstr)); - } - - /* If it is gmagical, create a mortal copy, but without calling - get-magic, as we have already done that. */ - if(SvGMAGICAL(tmpstr)) { - SV *mortalcopy = sv_newmortal(); - sv_setsv_flags(mortalcopy, tmpstr, 0); - tmpstr = mortalcopy; + if ((SvUTF8(tmpstr) && IN_BYTES) + || SvGMAGICAL(tmpstr) || SvAMAGIC(tmpstr)) + { + /* make a temporary copy; either to avoid repeating + * get-magic, or overloaded stringify, or to convert to bytes */ + tmpstr = newSVpvn_flags(t, len, SVs_TEMP | + (IN_BYTES ? 0 : SvUTF8(tmpstr))); } if (eng) |