summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo van der Sanden <hv@crypt.org>2023-04-10 11:54:41 +0100
committerSteve Hay <steve.m.hay@googlemail.com>2023-04-10 11:55:19 +0100
commit087438c31af9a09867cf323916ec25db60089561 (patch)
treec3a0d25b3a2cb373d890028415829b038f4097b0
parent50b48d85285cd147d92cee25c00a3692c1d821cf (diff)
downloadperl-087438c31af9a09867cf323916ec25db60089561.tar.gz
GH19478: applying taint is no reason to mess with pos
25fdce4a16 introduced a chunk in sv_magic() to "force pos to be stored as characters, not bytes" whenever any magic was applied to a string marked UTF8. It is not clear why a random call to sv_magic(), eg to mark a string as tainted, needs to do this - it would seem more logical for the check to happen either earlier (when the string first qualifies as SvMAGICAL(sv) && DO_UTF8(sv)) or later (eg on mg_find). Experimentally remove this block - it appears to cause no test failures, and allows the new test cases to pass. (cherry picked from commit f757874dac95bb608303f02ed7a2eeeaf1ec116b)
-rw-r--r--sv.c10
-rw-r--r--t/op/taint.t14
2 files changed, 13 insertions, 11 deletions
diff --git a/sv.c b/sv.c
index 16bba941cb..669678a350 100644
--- a/sv.c
+++ b/sv.c
@@ -5764,16 +5764,6 @@ Perl_sv_magic(pTHX_ SV *const sv, SV *const obj, const int how,
}
}
- /* Force pos to be stored as characters, not bytes. */
- if (SvMAGICAL(sv) && DO_UTF8(sv)
- && (mg = mg_find(sv, PERL_MAGIC_regex_global))
- && mg->mg_len != -1
- && mg->mg_flags & MGf_BYTES) {
- mg->mg_len = (SSize_t)sv_pos_b2u_flags(sv, (STRLEN)mg->mg_len,
- SV_CONST_RETURN);
- mg->mg_flags &= ~MGf_BYTES;
- }
-
/* Rest of work is done else where */
mg = sv_magicext(sv,obj,how,vtable,name,namlen);
diff --git a/t/op/taint.t b/t/op/taint.t
index f4f06f7461..30cd6f954a 100644
--- a/t/op/taint.t
+++ b/t/op/taint.t
@@ -25,7 +25,7 @@ if ($NoTaintSupport) {
exit 0;
}
-plan tests => 1054;
+plan tests => 1058;
$| = 1;
@@ -2955,6 +2955,18 @@ is_tainted("$ovtaint", "overload preserves taint");
taint_sig3($TAINT);
}
+{
+ # GH 19478: panic on s///gre with tainted utf8 strings
+ my $u = "\x{10469}";
+ my $r1 = ("foo$TAINT" =~ s/./"$u"/gre);
+ is($r1, "$u$u$u", 'tainted string with utf8 s/.//gre');
+ my $r2 = ("foo$TAINT" =~ s/.*/"${u}"/gre);
+ is($r2, "$u$u", 'tainted string with utf8 s/.*//gre');
+ my $r3 = ("foo$TAINT" =~ s/.+/"${u}"/gre);
+ is($r3, $u, 'tainted string with utf8 s/.+//gre');
+ my $r4 = ("$u$TAINT" =~ s/./""/gre);
+ is($r4, '', 'tainted utf8 string with s///gre');
+}
# This may bomb out with the alarm signal so keep it last
SKIP: {