From 05b13cf680588a26de64f13d2b3be385e17624bc Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Tue, 7 Mar 2023 18:50:04 +0100 Subject: regcomp.c - track parens related to CURLYX and CURLYM This was originally a patch which made somewhat drastic changes to how we represent capture buffers, which Dave M and I and are still discussing offline and which has a larger impact than is acceptable to address at the current time. As such I have reverted the controversial parts of this patch for now, while keeping most of it intact even if in some cases the changes are unused except for debugging purposes. This patch still contains valuable changes, for instance teaching CURLYX and CURLYM about how many parens there are before the curly[1] (which will be useful in follow up patches even if stricly speaking they are not directly used yet), tests and other cleanups. Also this patch is sufficiently large that reverting it out would have a large effect on the patches that were made on top of it. Thus keeping most of this patch while eliminating the controversial parts of it for now seemed the best approach, especially as some of the changes it introduces and the follow up patches based on it are very useful in cleaning up the structures we use to represent regops. [1] Curly is the regexp internals term for quantifiers, named after x{min,max} "curly brace" quantifiers. --- t/re/pat.t | 32 +++++++++++++++++++++++++++++++- t/re/re_tests | 10 ++++++++-- 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 't') diff --git a/t/re/pat.t b/t/re/pat.t index c494434675..b837157c42 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -27,7 +27,7 @@ skip_all_without_unicode_tables(); my $has_locales = locales_enabled('LC_CTYPE'); -plan tests => 1231; # Update this when adding/deleting tests. +plan tests => 1240; # Update this when adding/deleting tests. run_tests() unless caller; @@ -2426,6 +2426,36 @@ SKIP: print "ok"; }, 'ok', {}, 'gh20826: test regex save stack overflow'); } + { + local $::TODO = "Not Yet Implemented"; + my ($x, $y); + ok( "aaa" =~ /(?:(a)?\1)+/, + "GH Issue #18865 'aaa' - pattern matches"); + $x = "($-[0],$+[0])"; + ok( "aaa" =~ /(?:((?{})a)?\1)+/, + "GH Issue #18865 'aaa' - deoptimized pattern matches"); + $y = "($-[0],$+[0])"; + is( $y, $x, + "GH Issue #18865 'aaa' - test optimization"); + + ok( "ababab" =~ /(?:(?:(ab))?\1)+/, + "GH Issue #18865 'ababab' - pattern matches"); + $x = "($-[0],$+[0])"; + ok( "ababab" =~ /(?:(?:((?{})ab))?\1)+/, + "GH Issue #18865 'ababab' - deoptimized pattern matches"); + $y = "($-[0],$+[0])"; + is( $y, $x, + "GH Issue #18865 'ababab' - test optimization"); + + ok( "XaaXbbXb" =~ /(?:X([ab])?\1)+/, + "GH Issue #18865 'XaaXbbXb' - pattern matches"); + $x = "($-[0],$+[0])"; + ok( "XaaXbbXb" =~ /(?:X((?{})[ab])?\1)+/, + "GH Issue #18865 'XaaXbbXb' - deoptimized pattern matches"); + $y = "($-[0],$+[0])"; + is( $y, $x, + "GH Issue #18865 'XaaXbbXb' - test optimization"); + } } # End of sub run_tests 1; diff --git a/t/re/re_tests b/t/re/re_tests index 2afc639313..7379a39787 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -478,7 +478,7 @@ a(?:b|c|d)+(.) acdbcdbe y $1 e a(?:b|c|d){2}(.) acdbcdbe y $1 b a(?:b|c|d){4,5}(.) acdbcdbe y $1 b a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d -((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar +((foo)|(bar))* foobar Ty $1-$2-$3 bar--bar # was bar-foo-bar prior to 5.37.7 :(?: - c - Sequence (? incomplete a(?:b|c|d){6,7}(.) acdbcdbe y $1 e a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e @@ -501,7 +501,7 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce ((a{4})+) aaaaaaaaa y $1 aaaaaaaa (((aa){2})+) aaaaaaaaaa y $1 aaaaaaaa (((a{2}){2})+) aaaaaaaaaa y $1 aaaaaaaa -(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r +(?:(f)(o)(o)|(b)(a)(r))* foobar Ty $1:$2:$3:$4:$5:$6 :::b:a:r (?<=a)b ab y $& b (?<=af?)b ab y $& b (?<=a)b cb n - - @@ -2126,6 +2126,7 @@ AB\s+\x{100} AB \x{100}X y - - ((?|(?a)(?-1)|(?b)(?-1)|(?c)(?-1))) aa y $1 aa # GH 20653 ((?|(?a)(?-1)|(?b)(?-1)|(?c)(?-1))) bb y $1 bb # GH 20653 ((?|(?a)(?-1)|(?b)(?-1)|(?c)(?-1))) cc y $1 cc # GH 20653 + (?|(a)|(b)) b y $+ b # GH 20912 (?|(a)(?{$::plus_got=$+})|(b)(?{$::plus_got=$+})) b y $::plus_got b # GH 20912 (?|(a)|(b)) b y $^N b # GH 20912 @@ -2134,6 +2135,11 @@ AB\s+\x{100} AB \x{100}X y - - (?|(a)(?{$::plus_got=$+})|(b)(?{$::plus_got=$+})) a y $::plus_got a # GH 20912 (?|(a)|(b)) a y $^N a # GH 20912 (?|(a)(?{$::caret_n_got=$^N})|(b)(?{$::caret_n_got=$^N})) a y $::caret_n_got a # GH 20912 + +/(([ab]+)|([cd]+)|([ef]+))+/ ace y $1-$2-$3-$4=$& e---e=ace +/(([ab]+)|([cd]+)|([ef]+))+/ aceb Ty $1-$2-$3-$4=$& b-b--=aceb +/(([ab]+)|([cd]+)|([ef]+))+/ acebd Ty $1-$2-$3-$4=$& d--d-=acebd +/(([ab]+)|([cd]+)|([ef]+))+/ acebdf Ty $1-$2-$3-$4=$& f---f=acebdf # Keep these lines at the end of the file # pat string y/n/etc expr expected-expr skip-reason comment # vim: softtabstop=0 noexpandtab -- cgit v1.2.1