diff options
author | Yves Orton <demerphq@gmail.com> | 2023-01-09 20:37:28 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2023-01-15 17:21:03 +0100 |
commit | 5c6240fadac873b60c46677b4d5b180f4fb6074b (patch) | |
tree | ce9346482bac081a0cbfd59d2edc2610c740c2d5 | |
parent | 37040543d024b3ecb0aecd78849bd5af61408d02 (diff) | |
download | perl-5c6240fadac873b60c46677b4d5b180f4fb6074b.tar.gz |
regexec.c - fix accept in CURLYX/WHILEM construct.
The ACCEPT logic didnt know how to handle WHILEM, which for
some reason does not have a next_off defined. I am not sure why.
This was revealed by forcing CURLYX optimisations off. This includes
a patch to test what happens if we embed an eval group in the tests
run by regexp.t when run via regexp_normal.t, which disabled CURLYX ->
CURLYN and CURLYM optimisations and revealed this issue.
This adds t/re/regexp_normal.t which test "normalized" forms of
the patterns in t/re/re_tests by munging them in various ways
to see if they still behave as expected. For instance injecting
a (?{}) can disable optimisations.
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | regexec.c | 6 | ||||
-rw-r--r-- | t/re/pat_re_eval.t | 10 | ||||
-rw-r--r-- | t/re/re_tests | 4 | ||||
-rw-r--r-- | t/re/regexp.t | 43 | ||||
-rw-r--r-- | t/re/regexp_normal.t | 10 |
6 files changed, 62 insertions, 12 deletions
@@ -6207,6 +6207,7 @@ t/re/regex_sets_compat.t Test (?[ ]) is compatible with old [ ] t/re/regexp.t See if regular expressions work t/re/regexp_noamp.t See if regular expressions work with optimizations t/re/regexp_nonull.t See if regexps work without trailing nulls +t/re/regexp_normal.t See if regexps work when expressions are normalized in various ways t/re/regexp_notrie.t See if regular expressions work without trie optimisation t/re/regexp_qr.t See if regular expressions work as qr// t/re/regexp_qr_embed.t See if regular expressions work with embedded qr// @@ -8527,7 +8527,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) for ( cursor = scan; cursor && ( OP(cursor) != END ); - cursor = ( REGNODE_TYPE( OP(cursor) ) == END ) + cursor = ( + REGNODE_TYPE( OP(cursor) ) == END + || REGNODE_TYPE( OP(cursor) ) == WHILEM + ) ? REGNODE_AFTER(cursor) : regnext(cursor) ){ @@ -8741,7 +8744,6 @@ NULL ); /* First just match a string of min A's. */ - if (n < min) { ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, maxopenparen); cur_curlyx->u.curlyx.lastloc = locinput; diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index b626f8eae1..95c2e33322 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -24,7 +24,7 @@ BEGIN { our @global; -plan tests => 508; # Update this when adding/deleting tests. +plan tests => 510; # Update this when adding/deleting tests. run_tests() unless caller; @@ -126,7 +126,13 @@ sub run_tests { "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; } } - + { + our $this_counter; + ok( "ABDE" =~ /(A(A|B(*ACCEPT)|C)+D)(E)(?{ $this_counter++ })/, + "ACCEPT/CURLYX/EVAL - pattern should match"); + is( "$1-$2", "AB-B", + "Make sure that ACCEPT works in CURLYX by using EVAL"); + } { # Test if $^N and $+ work in (?{}) diff --git a/t/re/re_tests b/t/re/re_tests index a585e30bce..e961fcae17 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -964,8 +964,8 @@ tt+$ xxxtt y - - (?i) y - - (?a:((?u)\w)\W) \xC0\xC0 y $& \xC0\xC0 '(?!\A)x'm a\nxb\n y - - -^(a(b)?)+$ aba y -$1-$2- -a-- -^(aa(bb)?)+$ aabbaa y -$1-$2- -aa-- +^(a(b)?)+$ aba y -$1-$2- -a-- # !normal +^(aa(bb)?)+$ aabbaa y -$1-$2- -aa-- # !normal '^.{9}abc.*\n'm 123\nabcabcabcabc\n y - - ^(a)?a$ a y -$1- -- ^(a)?(?(1)a|b)+$ a n - - diff --git a/t/re/regexp.t b/t/re/regexp.t index 03a7178157..e536429e85 100644 --- a/t/re/regexp.t +++ b/t/re/regexp.t @@ -144,7 +144,7 @@ $nulnul = "\0" x 2; my $OP = $qr ? 'qr' : 'm'; $| = 1; - +$::normalize_pat = $::normalize_pat; # silence warning TEST: foreach (@tests) { $test_num++; @@ -186,6 +186,22 @@ foreach (@tests) { my $todo_qr = $qr_embed_thr && ($result =~ s/t//); my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//)); ++$skip if $result =~ s/M// && !defined &DynaLoader::boot_DynaLoader; + + if ($::normalize_pat) { + my $opat= $pat; + # Convert (x)? to (?:(x)|) and (x)+ to (?:(x))+ and (x)* to (?:(x))* + $pat =~ s/\(([\w|.]+)\)\?(?![+*?])/(?:($1)|)/g; + $pat =~ s/\(([\w|.]+)\)([+*])(?![+*?])/(?:($1))$2/g; + if ($opat eq $pat) { + # we didn't change anything, no point in testing it again. + $skip++; + $reason = "Test not valid for $0"; + } elsif ($comment=~/!\s*normal/) { + $result .= "T"; + $comment = "# Known to be broken under $0"; + } + } + if ($result =~ s/ ( [Ss] ) //x) { if (($1 eq 'S' && $regex_sets) || ($1 eq 's' && ! $regex_sets)) { $skip++; @@ -421,9 +437,28 @@ foreach (@tests) { $pat = $modified; } } + if ($::normalize_pat){ + if (!$skip && ($result eq "y" or $result eq "n")) { + my $opat= $pat; + # Convert (x)? to (?:(x)|) and (x)+ to (?:(x))+ and (x)* to (?:(x))* + $pat =~ s/\(([\w|.]+)\)\?(?![+*?])/(?:($1)|)/g; + $pat =~ s/\(([\w|.]+)\)([+*])(?![+*?])/(?:($1))$2/g; + # inject an EVAL into the front of the pattern. + # this should disable all optimizations. + $pat =~ s/\A(.)/$1(?{ \$the_counter++ })/ + or die $pat; + } elsif (!$skip) { + $skip = $reason = "Test not applicable to $0"; + } + } for my $study ('', 'study $subject;', 'utf8::upgrade($subject);', 'utf8::upgrade($subject); study $subject;') { + if ( $skip ) { + print "ok $testname # skipped", length($reason) ? ". $reason" : '', "\n"; + next TEST; + } + our $the_counter = 0; # used in normalization tests # Need to make a copy, else the utf8::upgrade of an already studied # scalar confuses things. my $subject = $subject; @@ -486,11 +521,7 @@ EOFCODE eval $code; } chomp( my $err = $@ ); - if ( $skip ) { - print "ok $testname # skipped", length($reason) ? ". $reason" : '', "\n"; - next TEST; - } - elsif ($result eq 'c') { + if ($result eq 'c') { if ($err !~ m!^\Q$expect!) { print "not ok $testname$todo (compile) $input => '$err'\n"; next TEST } last; # no need to study a syntax error } diff --git a/t/re/regexp_normal.t b/t/re/regexp_normal.t new file mode 100644 index 0000000000..ca945bd8a6 --- /dev/null +++ b/t/re/regexp_normal.t @@ -0,0 +1,10 @@ +#!./perl + +$::normalize_pat = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; |