summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLoren Merritt <pengvado@videolan.org>2022-07-14 00:09:06 +0000
committerℕicolas ℝ <nicolas@atoomic.org>2022-07-20 15:03:07 -0600
commitbe76ad45a5a937ec83906e666e2318c0351115b4 (patch)
tree8bc97d8c1b33b15fe81ea192c063f270853b3918
parent7d47ba27dfa1ef9db23c4e6f934b0698d7fd539f (diff)
downloadperl-be76ad45a5a937ec83906e666e2318c0351115b4.tar.gz
pp_subst: optimize by not calling utf8_length
Length just isn't needed, and often took more cpu-time than the actual regex.
-rw-r--r--.mailmap1
-rw-r--r--AUTHORS1
-rw-r--r--pp_hot.c11
-rw-r--r--t/re/pat_rt_report.t3
4 files changed, 10 insertions, 6 deletions
diff --git a/.mailmap b/.mailmap
index 71c997cddb..c879ff464c 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1110,6 +1110,7 @@ Lincoln D. Stein <lstein@cshl.org> Lincoln Stein <lstein@formaggio.cshl.org>
Lincoln D. Stein <lstein@cshl.org> Lincoln Stein <lstein@genome.wi.mit.edu>
Linda Walsh <unknown> Linda Walsh <unknown>
Lionel Cons <lionel.cons@cern.ch> Lionel Cons <lionel.cons@cern.ch>
+Loren Merritt <pengvado@videolan.org> Loren Merritt <pengvado@videolan.org>
Louis Strous <louis.strous@gmail.com> Louis Strous <louis.strous@gmail.com>
Lubomir Rintel <lkundrak@v3.sk> Lubomir Rintel (GoodData) <lubo.rintel@gooddata.com>
Lubomir Rintel <lkundrak@v3.sk> Lubomir Rintel <lkundrak@v3.sk>
diff --git a/AUTHORS b/AUTHORS
index 44e587bfe8..bd3984f578 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -827,6 +827,7 @@ Lesley Binks <lesley.binks@gmail.com>
Lincoln D. Stein <lstein@cshl.org>
Linda Walsh
Lionel Cons <lionel.cons@cern.ch>
+Loren Merritt <pengvado@videolan.org>
Louis Strous <louis.strous@gmail.com>
Lubomir Rintel <lkundrak@v3.sk>
Luc St-Louis <luc.st-louis@ca.transport.bombardier.com>
diff --git a/pp_hot.c b/pp_hot.c
index f583261558..97985b7b5e 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -4250,7 +4250,6 @@ PP(pp_subst)
STRLEN len;
int force_on_match = 0;
const I32 oldsave = PL_savestack_ix;
- STRLEN slen;
bool doutf8 = FALSE; /* whether replacement is in utf8 */
#ifdef PERL_ANY_COW
bool was_cow;
@@ -4316,10 +4315,12 @@ PP(pp_subst)
DIE(aTHX_ "panic: pp_subst, pm=%p, orig=%p", pm, orig);
strend = orig + len;
- slen = DO_UTF8(TARG) ? utf8_length((U8*)orig, (U8*)strend) : len;
- maxiters = 2 * slen + 10; /* We can match twice at each
- position, once with zero-length,
- second time with non-zero. */
+ /* We can match twice at each position, once with zero-length,
+ * second time with non-zero.
+ * Don't handle utf8 specially; we can use length-in-bytes as an
+ * upper bound on length-in-characters, and avoid the cpu-cost of
+ * computing a tighter bound. */
+ maxiters = 2 * len + 10;
/* handle the empty pattern */
if (!RX_PRELEN(rx) && PL_curpm && !prog->mother_re) {
diff --git a/t/re/pat_rt_report.t b/t/re/pat_rt_report.t
index ced4fe670b..895da8ea81 100644
--- a/t/re/pat_rt_report.t
+++ b/t/re/pat_rt_report.t
@@ -1076,10 +1076,11 @@ SKIP: {
unless $Config{extensions} =~ / Encode /;
# Test case cut down by jhi
- fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, {}, 'Segfault using HTML::Entities');
+ fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\)!, {}, 'Segfault using HTML::Entities');
use Encode;
my $t = ord('A') == 193 ? "\xEA" : "\xE9";
Encode::_utf8_on($t);
+substr($t,0);
$t =~ s/([^a])//ge;
EOP
}