summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-09 20:37:28 +0100
committerYves Orton <demerphq@gmail.com>2023-01-15 17:21:03 +0100
commit5c6240fadac873b60c46677b4d5b180f4fb6074b (patch)
treece9346482bac081a0cbfd59d2edc2610c740c2d5
parent37040543d024b3ecb0aecd78849bd5af61408d02 (diff)
downloadperl-5c6240fadac873b60c46677b4d5b180f4fb6074b.tar.gz
regexec.c - fix accept in CURLYX/WHILEM construct.
The ACCEPT logic didnt know how to handle WHILEM, which for some reason does not have a next_off defined. I am not sure why. This was revealed by forcing CURLYX optimisations off. This includes a patch to test what happens if we embed an eval group in the tests run by regexp.t when run via regexp_normal.t, which disabled CURLYX -> CURLYN and CURLYM optimisations and revealed this issue. This adds t/re/regexp_normal.t which test "normalized" forms of the patterns in t/re/re_tests by munging them in various ways to see if they still behave as expected. For instance injecting a (?{}) can disable optimisations.
-rw-r--r--MANIFEST1
-rw-r--r--regexec.c6
-rw-r--r--t/re/pat_re_eval.t10
-rw-r--r--t/re/re_tests4
-rw-r--r--t/re/regexp.t43
-rw-r--r--t/re/regexp_normal.t10
6 files changed, 62 insertions, 12 deletions
diff --git a/MANIFEST b/MANIFEST
index a153ce7a06..f6b78371a8 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -6207,6 +6207,7 @@ t/re/regex_sets_compat.t Test (?[ ]) is compatible with old [ ]
t/re/regexp.t See if regular expressions work
t/re/regexp_noamp.t See if regular expressions work with optimizations
t/re/regexp_nonull.t See if regexps work without trailing nulls
+t/re/regexp_normal.t See if regexps work when expressions are normalized in various ways
t/re/regexp_notrie.t See if regular expressions work without trie optimisation
t/re/regexp_qr.t See if regular expressions work as qr//
t/re/regexp_qr_embed.t See if regular expressions work with embedded qr//
diff --git a/regexec.c b/regexec.c
index d2c020feb2..a022098380 100644
--- a/regexec.c
+++ b/regexec.c
@@ -8527,7 +8527,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
for (
cursor = scan;
cursor && ( OP(cursor) != END );
- cursor = ( REGNODE_TYPE( OP(cursor) ) == END )
+ cursor = (
+ REGNODE_TYPE( OP(cursor) ) == END
+ || REGNODE_TYPE( OP(cursor) ) == WHILEM
+ )
? REGNODE_AFTER(cursor)
: regnext(cursor)
){
@@ -8741,7 +8744,6 @@ NULL
);
/* First just match a string of min A's. */
-
if (n < min) {
ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, maxopenparen);
cur_curlyx->u.curlyx.lastloc = locinput;
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index b626f8eae1..95c2e33322 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -24,7 +24,7 @@ BEGIN {
our @global;
-plan tests => 508; # Update this when adding/deleting tests.
+plan tests => 510; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -126,7 +126,13 @@ sub run_tests {
"Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8";
}
}
-
+ {
+ our $this_counter;
+ ok( "ABDE" =~ /(A(A|B(*ACCEPT)|C)+D)(E)(?{ $this_counter++ })/,
+ "ACCEPT/CURLYX/EVAL - pattern should match");
+ is( "$1-$2", "AB-B",
+ "Make sure that ACCEPT works in CURLYX by using EVAL");
+ }
{
# Test if $^N and $+ work in (?{})
diff --git a/t/re/re_tests b/t/re/re_tests
index a585e30bce..e961fcae17 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -964,8 +964,8 @@ tt+$ xxxtt y - -
(?i) y - -
(?a:((?u)\w)\W) \xC0\xC0 y $& \xC0\xC0
'(?!\A)x'm a\nxb\n y - -
-^(a(b)?)+$ aba y -$1-$2- -a--
-^(aa(bb)?)+$ aabbaa y -$1-$2- -aa--
+^(a(b)?)+$ aba y -$1-$2- -a-- # !normal
+^(aa(bb)?)+$ aabbaa y -$1-$2- -aa-- # !normal
'^.{9}abc.*\n'm 123\nabcabcabcabc\n y - -
^(a)?a$ a y -$1- --
^(a)?(?(1)a|b)+$ a n - -
diff --git a/t/re/regexp.t b/t/re/regexp.t
index 03a7178157..e536429e85 100644
--- a/t/re/regexp.t
+++ b/t/re/regexp.t
@@ -144,7 +144,7 @@ $nulnul = "\0" x 2;
my $OP = $qr ? 'qr' : 'm';
$| = 1;
-
+$::normalize_pat = $::normalize_pat; # silence warning
TEST:
foreach (@tests) {
$test_num++;
@@ -186,6 +186,22 @@ foreach (@tests) {
my $todo_qr = $qr_embed_thr && ($result =~ s/t//);
my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
++$skip if $result =~ s/M// && !defined &DynaLoader::boot_DynaLoader;
+
+ if ($::normalize_pat) {
+ my $opat= $pat;
+ # Convert (x)? to (?:(x)|) and (x)+ to (?:(x))+ and (x)* to (?:(x))*
+ $pat =~ s/\(([\w|.]+)\)\?(?![+*?])/(?:($1)|)/g;
+ $pat =~ s/\(([\w|.]+)\)([+*])(?![+*?])/(?:($1))$2/g;
+ if ($opat eq $pat) {
+ # we didn't change anything, no point in testing it again.
+ $skip++;
+ $reason = "Test not valid for $0";
+ } elsif ($comment=~/!\s*normal/) {
+ $result .= "T";
+ $comment = "# Known to be broken under $0";
+ }
+ }
+
if ($result =~ s/ ( [Ss] ) //x) {
if (($1 eq 'S' && $regex_sets) || ($1 eq 's' && ! $regex_sets)) {
$skip++;
@@ -421,9 +437,28 @@ foreach (@tests) {
$pat = $modified;
}
}
+ if ($::normalize_pat){
+ if (!$skip && ($result eq "y" or $result eq "n")) {
+ my $opat= $pat;
+ # Convert (x)? to (?:(x)|) and (x)+ to (?:(x))+ and (x)* to (?:(x))*
+ $pat =~ s/\(([\w|.]+)\)\?(?![+*?])/(?:($1)|)/g;
+ $pat =~ s/\(([\w|.]+)\)([+*])(?![+*?])/(?:($1))$2/g;
+ # inject an EVAL into the front of the pattern.
+ # this should disable all optimizations.
+ $pat =~ s/\A(.)/$1(?{ \$the_counter++ })/
+ or die $pat;
+ } elsif (!$skip) {
+ $skip = $reason = "Test not applicable to $0";
+ }
+ }
for my $study ('', 'study $subject;', 'utf8::upgrade($subject);',
'utf8::upgrade($subject); study $subject;') {
+ if ( $skip ) {
+ print "ok $testname # skipped", length($reason) ? ". $reason" : '', "\n";
+ next TEST;
+ }
+ our $the_counter = 0; # used in normalization tests
# Need to make a copy, else the utf8::upgrade of an already studied
# scalar confuses things.
my $subject = $subject;
@@ -486,11 +521,7 @@ EOFCODE
eval $code;
}
chomp( my $err = $@ );
- if ( $skip ) {
- print "ok $testname # skipped", length($reason) ? ". $reason" : '', "\n";
- next TEST;
- }
- elsif ($result eq 'c') {
+ if ($result eq 'c') {
if ($err !~ m!^\Q$expect!) { print "not ok $testname$todo (compile) $input => '$err'\n"; next TEST }
last; # no need to study a syntax error
}
diff --git a/t/re/regexp_normal.t b/t/re/regexp_normal.t
new file mode 100644
index 0000000000..ca945bd8a6
--- /dev/null
+++ b/t/re/regexp_normal.t
@@ -0,0 +1,10 @@
+#!./perl
+
+$::normalize_pat = 1;
+for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') {
+ if (-r $file) {
+ do $file or die $@;
+ exit;
+ }
+}
+die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n";