diff options
author | Hugo van der Sanden <hv@crypt.org> | 2023-04-10 11:54:41 +0100 |
---|---|---|
committer | Steve Hay <steve.m.hay@googlemail.com> | 2023-04-10 11:55:19 +0100 |
commit | 087438c31af9a09867cf323916ec25db60089561 (patch) | |
tree | c3a0d25b3a2cb373d890028415829b038f4097b0 | |
parent | 50b48d85285cd147d92cee25c00a3692c1d821cf (diff) | |
download | perl-087438c31af9a09867cf323916ec25db60089561.tar.gz |
GH19478: applying taint is no reason to mess with pos
25fdce4a16 introduced a chunk in sv_magic() to "force pos to be stored
as characters, not bytes" whenever any magic was applied to a string
marked UTF8.
It is not clear why a random call to sv_magic(), eg to mark a string as
tainted, needs to do this - it would seem more logical for the check to
happen either earlier (when the string first qualifies as SvMAGICAL(sv)
&& DO_UTF8(sv)) or later (eg on mg_find).
Experimentally remove this block - it appears to cause no test failures,
and allows the new test cases to pass.
(cherry picked from commit f757874dac95bb608303f02ed7a2eeeaf1ec116b)
-rw-r--r-- | sv.c | 10 | ||||
-rw-r--r-- | t/op/taint.t | 14 |
2 files changed, 13 insertions, 11 deletions
@@ -5764,16 +5764,6 @@ Perl_sv_magic(pTHX_ SV *const sv, SV *const obj, const int how, } } - /* Force pos to be stored as characters, not bytes. */ - if (SvMAGICAL(sv) && DO_UTF8(sv) - && (mg = mg_find(sv, PERL_MAGIC_regex_global)) - && mg->mg_len != -1 - && mg->mg_flags & MGf_BYTES) { - mg->mg_len = (SSize_t)sv_pos_b2u_flags(sv, (STRLEN)mg->mg_len, - SV_CONST_RETURN); - mg->mg_flags &= ~MGf_BYTES; - } - /* Rest of work is done else where */ mg = sv_magicext(sv,obj,how,vtable,name,namlen); diff --git a/t/op/taint.t b/t/op/taint.t index f4f06f7461..30cd6f954a 100644 --- a/t/op/taint.t +++ b/t/op/taint.t @@ -25,7 +25,7 @@ if ($NoTaintSupport) { exit 0; } -plan tests => 1054; +plan tests => 1058; $| = 1; @@ -2955,6 +2955,18 @@ is_tainted("$ovtaint", "overload preserves taint"); taint_sig3($TAINT); } +{ + # GH 19478: panic on s///gre with tainted utf8 strings + my $u = "\x{10469}"; + my $r1 = ("foo$TAINT" =~ s/./"$u"/gre); + is($r1, "$u$u$u", 'tainted string with utf8 s/.//gre'); + my $r2 = ("foo$TAINT" =~ s/.*/"${u}"/gre); + is($r2, "$u$u", 'tainted string with utf8 s/.*//gre'); + my $r3 = ("foo$TAINT" =~ s/.+/"${u}"/gre); + is($r3, $u, 'tainted string with utf8 s/.+//gre'); + my $r4 = ("$u$TAINT" =~ s/./""/gre); + is($r4, '', 'tainted utf8 string with s///gre'); +} # This may bomb out with the alarm signal so keep it last SKIP: { |