diff options
author | Loren Merritt <pengvado@videolan.org> | 2022-07-14 00:09:06 +0000 |
---|---|---|
committer | ℕicolas ℝ <nicolas@atoomic.org> | 2022-07-20 15:03:07 -0600 |
commit | be76ad45a5a937ec83906e666e2318c0351115b4 (patch) | |
tree | 8bc97d8c1b33b15fe81ea192c063f270853b3918 | |
parent | 7d47ba27dfa1ef9db23c4e6f934b0698d7fd539f (diff) | |
download | perl-be76ad45a5a937ec83906e666e2318c0351115b4.tar.gz |
pp_subst: optimize by not calling utf8_length
Length just isn't needed, and often took more cpu-time than the actual regex.
-rw-r--r-- | .mailmap | 1 | ||||
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | pp_hot.c | 11 | ||||
-rw-r--r-- | t/re/pat_rt_report.t | 3 |
4 files changed, 10 insertions, 6 deletions
@@ -1110,6 +1110,7 @@ Lincoln D. Stein <lstein@cshl.org> Lincoln Stein <lstein@formaggio.cshl.org> Lincoln D. Stein <lstein@cshl.org> Lincoln Stein <lstein@genome.wi.mit.edu> Linda Walsh <unknown> Linda Walsh <unknown> Lionel Cons <lionel.cons@cern.ch> Lionel Cons <lionel.cons@cern.ch> +Loren Merritt <pengvado@videolan.org> Loren Merritt <pengvado@videolan.org> Louis Strous <louis.strous@gmail.com> Louis Strous <louis.strous@gmail.com> Lubomir Rintel <lkundrak@v3.sk> Lubomir Rintel (GoodData) <lubo.rintel@gooddata.com> Lubomir Rintel <lkundrak@v3.sk> Lubomir Rintel <lkundrak@v3.sk> @@ -827,6 +827,7 @@ Lesley Binks <lesley.binks@gmail.com> Lincoln D. Stein <lstein@cshl.org> Linda Walsh Lionel Cons <lionel.cons@cern.ch> +Loren Merritt <pengvado@videolan.org> Louis Strous <louis.strous@gmail.com> Lubomir Rintel <lkundrak@v3.sk> Luc St-Louis <luc.st-louis@ca.transport.bombardier.com> @@ -4250,7 +4250,6 @@ PP(pp_subst) STRLEN len; int force_on_match = 0; const I32 oldsave = PL_savestack_ix; - STRLEN slen; bool doutf8 = FALSE; /* whether replacement is in utf8 */ #ifdef PERL_ANY_COW bool was_cow; @@ -4316,10 +4315,12 @@ PP(pp_subst) DIE(aTHX_ "panic: pp_subst, pm=%p, orig=%p", pm, orig); strend = orig + len; - slen = DO_UTF8(TARG) ? utf8_length((U8*)orig, (U8*)strend) : len; - maxiters = 2 * slen + 10; /* We can match twice at each - position, once with zero-length, - second time with non-zero. */ + /* We can match twice at each position, once with zero-length, + * second time with non-zero. + * Don't handle utf8 specially; we can use length-in-bytes as an + * upper bound on length-in-characters, and avoid the cpu-cost of + * computing a tighter bound. */ + maxiters = 2 * len + 10; /* handle the empty pattern */ if (!RX_PRELEN(rx) && PL_curpm && !prog->mother_re) { diff --git a/t/re/pat_rt_report.t b/t/re/pat_rt_report.t index ced4fe670b..895da8ea81 100644 --- a/t/re/pat_rt_report.t +++ b/t/re/pat_rt_report.t @@ -1076,10 +1076,11 @@ SKIP: { unless $Config{extensions} =~ / Encode /; # Test case cut down by jhi - fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, {}, 'Segfault using HTML::Entities'); + fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\)!, {}, 'Segfault using HTML::Entities'); use Encode; my $t = ord('A') == 193 ? "\xEA" : "\xE9"; Encode::_utf8_on($t); +substr($t,0); $t =~ s/([^a])//ge; EOP } |