summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorBram <p5p@perl.wizbit.be>2010-08-26 13:27:24 +0200
committerYves Orton <demerphq@gmail.com>2010-08-26 13:36:57 +0200
commitd1c771f5a95fddf225347623798f65884aa6eee7 (patch)
treec4487cca6e52a71bbd4ca06df7771c75d5ea5fcf /t
parent39c4496953eaedb023e4a860fd4bdeacdba098c8 (diff)
downloadperl-d1c771f5a95fddf225347623798f65884aa6eee7.tar.gz
VERB nodes in the regex engine should NOT be marked as JUMPABLE.
JUMPABLE nodes can be ignored during certain phases of regex execution, including ones where backtracking is affected. This change disables this behviour so that the VERBS can perform their desired results. Committer has taken the liberty of modifying the patch so that all VERBS are jumped, thus making the JUMPABLE expression a little simpler. I have left Bram's change to JUMPABLE intact, but inside of a comment for now. See discussion in thread for [perl #71942] *COMMIT bypasses optimisation for futher details. http://rt.perl.org/rt3/Ticket/Display.html?id=71942 There appears to be room for futher optimisation here by moving the JUMPABLE logic to regex-compile time. Currently it is arguable that the "optimisation" this patch seeks to avoid is actually not an optimisation at all, as it happens OVER AND OVER during execution of a match, thus the extra effort might actually outweight the benefit, especially on large strings.
Diffstat (limited to 't')
-rw-r--r--t/re/pat_advanced.t293
1 files changed, 292 insertions, 1 deletions
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index 881fd9eb24..ff96079296 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -21,7 +21,7 @@ BEGIN {
}
-plan tests => 1159; # Update this when adding/deleting tests.
+plan tests => 1303; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1781,6 +1781,297 @@ sub run_tests {
'IsPunct agrees with [:punct:] with explicit Latin1';
}
+
+ {
+ # Tests for [#perl 71942]
+ our $count_a;
+ our $count_b;
+
+ my $c = 0;
+ for my $re (
+# [
+# should match?,
+# input string,
+# re 1,
+# re 2,
+# expected values of count_a and count_b,
+# ]
+ [
+ 0,
+ "xababz",
+ qr/a+(?{$count_a++})b?(*COMMIT)(*FAIL)/,
+ qr/a+(?{$count_b++})b?(*COMMIT)z/,
+ 1,
+ ],
+ [
+ 0,
+ "xababz",
+ qr/a+(?{$count_a++})b?(*COMMIT)\s*(*FAIL)/,
+ qr/a+(?{$count_b++})b?(*COMMIT)\s*z/,
+ 1,
+ ],
+ [
+ 0,
+ "xababz",
+ qr/a+(?{$count_a++})(?:b|)?(*COMMIT)(*FAIL)/,
+ qr/a+(?{$count_b++})(?:b|)?(*COMMIT)z/,
+ 1,
+ ],
+ [
+ 0,
+ "xababz",
+ qr/a+(?{$count_a++})b{0,6}(*COMMIT)(*FAIL)/,
+ qr/a+(?{$count_b++})b{0,6}(*COMMIT)z/,
+ 1,
+ ],
+ [
+ 0,
+ "xabcabcz",
+ qr/a+(?{$count_a++})(bc){0,6}(*COMMIT)(*FAIL)/,
+ qr/a+(?{$count_b++})(bc){0,6}(*COMMIT)z/,
+ 1,
+ ],
+ [
+ 0,
+ "xabcabcz",
+ qr/a+(?{$count_a++})(bc*){0,6}(*COMMIT)(*FAIL)/,
+ qr/a+(?{$count_b++})(bc*){0,6}(*COMMIT)z/,
+ 1,
+ ],
+
+
+ [
+ 0,
+ "aaaabtz",
+ qr/a+(?{$count_a++})b?(*PRUNE)(*FAIL)/,
+ qr/a+(?{$count_b++})b?(*PRUNE)z/,
+ 4,
+ ],
+ [
+ 0,
+ "aaaabtz",
+ qr/a+(?{$count_a++})b?(*PRUNE)\s*(*FAIL)/,
+ qr/a+(?{$count_b++})b?(*PRUNE)\s*z/,
+ 4,
+ ],
+ [
+ 0,
+ "aaaabtz",
+ qr/a+(?{$count_a++})(?:b|)(*PRUNE)(*FAIL)/,
+ qr/a+(?{$count_b++})(?:b|)(*PRUNE)z/,
+ 4,
+ ],
+ [
+ 0,
+ "aaaabtz",
+ qr/a+(?{$count_a++})b{0,6}(*PRUNE)(*FAIL)/,
+ qr/a+(?{$count_b++})b{0,6}(*PRUNE)z/,
+ 4,
+ ],
+ [
+ 0,
+ "aaaabctz",
+ qr/a+(?{$count_a++})(bc){0,6}(*PRUNE)(*FAIL)/,
+ qr/a+(?{$count_b++})(bc){0,6}(*PRUNE)z/,
+ 4,
+ ],
+ [
+ 0,
+ "aaaabctz",
+ qr/a+(?{$count_a++})(bc*){0,6}(*PRUNE)(*FAIL)/,
+ qr/a+(?{$count_b++})(bc*){0,6}(*PRUNE)z/,
+ 4,
+ ],
+
+ [
+ 0,
+ "aaabaaab",
+ qr/a+(?{$count_a++;})b?(*SKIP)(*FAIL)/,
+ qr/a+(?{$count_b++;})b?(*SKIP)z/,
+ 2,
+ ],
+ [
+ 0,
+ "aaabaaab",
+ qr/a+(?{$count_a++;})b?(*SKIP)\s*(*FAIL)/,
+ qr/a+(?{$count_b++;})b?(*SKIP)\s*z/,
+ 2,
+ ],
+ [
+ 0,
+ "aaabaaab",
+ qr/a+(?{$count_a++;})(?:b|)(*SKIP)(*FAIL)/,
+ qr/a+(?{$count_b++;})(?:b|)(*SKIP)z/,
+ 2,
+ ],
+ [
+ 0,
+ "aaabaaab",
+ qr/a+(?{$count_a++;})b{0,6}(*SKIP)(*FAIL)/,
+ qr/a+(?{$count_b++;})b{0,6}(*SKIP)z/,
+ 2,
+ ],
+ [
+ 0,
+ "aaabcaaabc",
+ qr/a+(?{$count_a++;})(bc){0,6}(*SKIP)(*FAIL)/,
+ qr/a+(?{$count_b++;})(bc){0,6}(*SKIP)z/,
+ 2,
+ ],
+ [
+ 0,
+ "aaabcaaabc",
+ qr/a+(?{$count_a++;})(bc*){0,6}(*SKIP)(*FAIL)/,
+ qr/a+(?{$count_b++;})(bc*){0,6}(*SKIP)z/,
+ 2,
+ ],
+
+
+ [
+ 0,
+ "aaddbdaabyzc",
+ qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) z \s* c \1 /x,
+ 4,
+ ],
+ [
+ 0,
+ "aaddbdaabyzc",
+ qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) \s* (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) \s* z \s* c \1 /x,
+ 4,
+ ],
+ [
+ 0,
+ "aaddbdaabyzc",
+ qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (?:b|) (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (?:b|) (*SKIP:T1) z \s* c \1 /x,
+ 4,
+ ],
+ [
+ 0,
+ "aaddbdaabyzc",
+ qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b{0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b{0,6} (*SKIP:T1) z \s* c \1 /x,
+ 4,
+ ],
+ [
+ 0,
+ "aaddbcdaabcyzc",
+ qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (bc){0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (bc){0,6} (*SKIP:T1) z \s* c \1 /x,
+ 4,
+ ],
+ [
+ 0,
+ "aaddbcdaabcyzc",
+ qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (bc*){0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (bc*){0,6} (*SKIP:T1) z \s* c \1 /x,
+ 4,
+ ],
+
+
+ [
+ 0,
+ "aaaaddbdaabyzc",
+ qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) z \s* c \1 /x,
+ 2,
+ ],
+ [
+ 0,
+ "aaaaddbdaabyzc",
+ qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) \s* (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) \s* z \s* c \1 /x,
+ 2,
+ ],
+ [
+ 0,
+ "aaaaddbdaabyzc",
+ qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (?:b|) (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (?:b|) (*MARK:T1) (*SKIP:T1) z \s* c \1 /x,
+ 2,
+ ],
+ [
+ 0,
+ "aaaaddbdaabyzc",
+ qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b{0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b{0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x,
+ 2,
+ ],
+ [
+ 0,
+ "aaaaddbcdaabcyzc",
+ qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (bc){0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (bc){0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x,
+ 2,
+ ],
+ [
+ 0,
+ "aaaaddbcdaabcyzc",
+ qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (bc*){0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x,
+ qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (bc*){0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x,
+ 2,
+ ],
+
+
+ [
+ 0,
+ "AbcdCBefgBhiBqz",
+ qr/(A (.*) (?{ $count_a++ }) C? (*THEN) | A D) (*FAIL)/x,
+ qr/(A (.*) (?{ $count_b++ }) C? (*THEN) | A D) z/x,
+ 1,
+ ],
+ [
+ 0,
+ "AbcdCBefgBhiBqz",
+ qr/(A (.*) (?{ $count_a++ }) C? (*THEN) | A D) \s* (*FAIL)/x,
+ qr/(A (.*) (?{ $count_b++ }) C? (*THEN) | A D) \s* z/x,
+ 1,
+ ],
+ [
+ 0,
+ "AbcdCBefgBhiBqz",
+ qr/(A (.*) (?{ $count_a++ }) (?:C|) (*THEN) | A D) (*FAIL)/x,
+ qr/(A (.*) (?{ $count_b++ }) (?:C|) (*THEN) | A D) z/x,
+ 1,
+ ],
+ [
+ 0,
+ "AbcdCBefgBhiBqz",
+ qr/(A (.*) (?{ $count_a++ }) C{0,6} (*THEN) | A D) (*FAIL)/x,
+ qr/(A (.*) (?{ $count_b++ }) C{0,6} (*THEN) | A D) z/x,
+ 1,
+ ],
+ [
+ 0,
+ "AbcdCEBefgBhiBqz",
+ qr/(A (.*) (?{ $count_a++ }) (CE){0,6} (*THEN) | A D) (*FAIL)/x,
+ qr/(A (.*) (?{ $count_b++ }) (CE){0,6} (*THEN) | A D) z/x,
+ 1,
+ ],
+ [
+ 0,
+ "AbcdCBefgBhiBqz",
+ qr/(A (.*) (?{ $count_a++ }) (CE*){0,6} (*THEN) | A D) (*FAIL)/x,
+ qr/(A (.*) (?{ $count_b++ }) (CE*){0,6} (*THEN) | A D) z/x,
+ 1,
+ ],
+ ) {
+ $c++;
+ $count_a = 0;
+ $count_b = 0;
+
+ my $match_a = ($re->[1] =~ $re->[2]) || 0;
+ my $match_b = ($re->[1] =~ $re->[3]) || 0;
+
+ iseq($match_a, $re->[0], "match a " . ($re->[0] ? "succeeded" : "failed") . " ($c)");
+ iseq($match_b, $re->[0], "match b " . ($re->[0] ? "succeeded" : "failed") . " ($c)");
+ iseq($count_a, $re->[4], "count a ($c)");
+ iseq($count_b, $re->[4], "count b ($c)");
+ }
+ }
+
#
# Keep the following tests last -- they may crash perl
#