diff options
author | David Mitchell <davem@iabyn.com> | 2017-01-21 15:47:43 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2017-01-21 15:47:43 +0000 |
commit | b949b68f22c917863062bdb655e0e956abeca90d (patch) | |
tree | b540178de74ee7bea504e50d567a15a5b890706a /mg.c | |
parent | 5219f5ec5c453357ab78722da5a91806251ffb67 (diff) | |
download | perl-b949b68f22c917863062bdb655e0e956abeca90d.tar.gz |
avoid disabling utf8 pos cache on tainted strings
RT #130584
When pos() or similar is used on a utf8 string, perl attaches magic
to it that caches a couple of byte<->char offset conversions. This can
avoid quadratic behaviour when continually scanning a big chunk of a long
string to convert a byte offset to a char offset when pos() is called.
v5.17.3-203-g7d1328b added code to invalidate this cache when get magic is
called on an SV, since the get magic may change the value of the SV.
However, under -T, taint magic gets added to a tainted string, which
includes a get method which doesn't actually change the SV's value.
So make a special exception to get-magic-cache-invalidation if the only
get magic on the string is taint.
This stops code like the following going quadratic under -T:
$_ = "... long tainted utf8 string ...";
while ( /..../g) {
my $p = pos(); # calculating pos() goes quadratic
}
Diffstat (limited to 'mg.c')
-rw-r--r-- | mg.c | 31 |
1 files changed, 25 insertions, 6 deletions
@@ -171,6 +171,7 @@ Perl_mg_get(pTHX_ SV *sv) const I32 mgs_ix = SSNEW(sizeof(MGS)); bool saved = FALSE; bool have_new = 0; + bool taint_only = TRUE; /* the only get method seen is taint */ MAGIC *newmg, *head, *cur, *mg; PERL_ARGS_ASSERT_MG_GET; @@ -189,10 +190,13 @@ Perl_mg_get(pTHX_ SV *sv) if (!(mg->mg_flags & MGf_GSKIP) && vtbl && vtbl->svt_get) { /* taint's mg get is so dumb it doesn't need flag saving */ - if (!saved && mg->mg_type != PERL_MAGIC_taint) { - save_magic(mgs_ix, sv); - saved = TRUE; - } + if (mg->mg_type != PERL_MAGIC_taint) { + taint_only = FALSE; + if (!saved) { + save_magic(mgs_ix, sv); + saved = TRUE; + } + } vtbl->svt_get(aTHX_ sv, mg); @@ -210,8 +214,23 @@ Perl_mg_get(pTHX_ SV *sv) ~(SVs_GMG|SVs_SMG|SVs_RMG); } else if (vtbl == &PL_vtbl_utf8) { - /* get-magic can reallocate the PV */ - magic_setutf8(sv, mg); + /* get-magic can reallocate the PV, unless there's only taint + * magic */ + if (taint_only) { + MAGIC *mg2; + for (mg2 = nextmg; mg2; mg2 = mg2->mg_moremagic) { + if ( mg2->mg_type != PERL_MAGIC_taint + && !(mg2->mg_flags & MGf_GSKIP) + && mg2->mg_virtual + && mg2->mg_virtual->svt_get + ) { + taint_only = FALSE; + break; + } + } + } + if (!taint_only) + magic_setutf8(sv, mg); } mg = nextmg; |