diff options
author | Bram <p5p@perl.wizbit.be> | 2010-08-26 13:27:24 +0200 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2010-08-26 13:36:57 +0200 |
commit | d1c771f5a95fddf225347623798f65884aa6eee7 (patch) | |
tree | c4487cca6e52a71bbd4ca06df7771c75d5ea5fcf /t | |
parent | 39c4496953eaedb023e4a860fd4bdeacdba098c8 (diff) | |
download | perl-d1c771f5a95fddf225347623798f65884aa6eee7.tar.gz |
VERB nodes in the regex engine should NOT be marked as JUMPABLE.
JUMPABLE nodes can be ignored during certain phases of regex execution,
including ones where backtracking is affected. This change disables this
behviour so that the VERBS can perform their desired results.
Committer has taken the liberty of modifying the patch so that all
VERBS are jumped, thus making the JUMPABLE expression a little simpler.
I have left Bram's change to JUMPABLE intact, but inside of a comment
for now.
See discussion in thread for [perl #71942] *COMMIT bypasses optimisation
for futher details.
http://rt.perl.org/rt3/Ticket/Display.html?id=71942
There appears to be room for futher optimisation here
by moving the JUMPABLE logic to regex-compile time. Currently
it is arguable that the "optimisation" this patch seeks to avoid
is actually not an optimisation at all, as it happens OVER AND OVER
during execution of a match, thus the extra effort might actually
outweight the benefit, especially on large strings.
Diffstat (limited to 't')
-rw-r--r-- | t/re/pat_advanced.t | 293 |
1 files changed, 292 insertions, 1 deletions
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 881fd9eb24..ff96079296 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -21,7 +21,7 @@ BEGIN { } -plan tests => 1159; # Update this when adding/deleting tests. +plan tests => 1303; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1781,6 +1781,297 @@ sub run_tests { 'IsPunct agrees with [:punct:] with explicit Latin1'; } + + { + # Tests for [#perl 71942] + our $count_a; + our $count_b; + + my $c = 0; + for my $re ( +# [ +# should match?, +# input string, +# re 1, +# re 2, +# expected values of count_a and count_b, +# ] + [ + 0, + "xababz", + qr/a+(?{$count_a++})b?(*COMMIT)(*FAIL)/, + qr/a+(?{$count_b++})b?(*COMMIT)z/, + 1, + ], + [ + 0, + "xababz", + qr/a+(?{$count_a++})b?(*COMMIT)\s*(*FAIL)/, + qr/a+(?{$count_b++})b?(*COMMIT)\s*z/, + 1, + ], + [ + 0, + "xababz", + qr/a+(?{$count_a++})(?:b|)?(*COMMIT)(*FAIL)/, + qr/a+(?{$count_b++})(?:b|)?(*COMMIT)z/, + 1, + ], + [ + 0, + "xababz", + qr/a+(?{$count_a++})b{0,6}(*COMMIT)(*FAIL)/, + qr/a+(?{$count_b++})b{0,6}(*COMMIT)z/, + 1, + ], + [ + 0, + "xabcabcz", + qr/a+(?{$count_a++})(bc){0,6}(*COMMIT)(*FAIL)/, + qr/a+(?{$count_b++})(bc){0,6}(*COMMIT)z/, + 1, + ], + [ + 0, + "xabcabcz", + qr/a+(?{$count_a++})(bc*){0,6}(*COMMIT)(*FAIL)/, + qr/a+(?{$count_b++})(bc*){0,6}(*COMMIT)z/, + 1, + ], + + + [ + 0, + "aaaabtz", + qr/a+(?{$count_a++})b?(*PRUNE)(*FAIL)/, + qr/a+(?{$count_b++})b?(*PRUNE)z/, + 4, + ], + [ + 0, + "aaaabtz", + qr/a+(?{$count_a++})b?(*PRUNE)\s*(*FAIL)/, + qr/a+(?{$count_b++})b?(*PRUNE)\s*z/, + 4, + ], + [ + 0, + "aaaabtz", + qr/a+(?{$count_a++})(?:b|)(*PRUNE)(*FAIL)/, + qr/a+(?{$count_b++})(?:b|)(*PRUNE)z/, + 4, + ], + [ + 0, + "aaaabtz", + qr/a+(?{$count_a++})b{0,6}(*PRUNE)(*FAIL)/, + qr/a+(?{$count_b++})b{0,6}(*PRUNE)z/, + 4, + ], + [ + 0, + "aaaabctz", + qr/a+(?{$count_a++})(bc){0,6}(*PRUNE)(*FAIL)/, + qr/a+(?{$count_b++})(bc){0,6}(*PRUNE)z/, + 4, + ], + [ + 0, + "aaaabctz", + qr/a+(?{$count_a++})(bc*){0,6}(*PRUNE)(*FAIL)/, + qr/a+(?{$count_b++})(bc*){0,6}(*PRUNE)z/, + 4, + ], + + [ + 0, + "aaabaaab", + qr/a+(?{$count_a++;})b?(*SKIP)(*FAIL)/, + qr/a+(?{$count_b++;})b?(*SKIP)z/, + 2, + ], + [ + 0, + "aaabaaab", + qr/a+(?{$count_a++;})b?(*SKIP)\s*(*FAIL)/, + qr/a+(?{$count_b++;})b?(*SKIP)\s*z/, + 2, + ], + [ + 0, + "aaabaaab", + qr/a+(?{$count_a++;})(?:b|)(*SKIP)(*FAIL)/, + qr/a+(?{$count_b++;})(?:b|)(*SKIP)z/, + 2, + ], + [ + 0, + "aaabaaab", + qr/a+(?{$count_a++;})b{0,6}(*SKIP)(*FAIL)/, + qr/a+(?{$count_b++;})b{0,6}(*SKIP)z/, + 2, + ], + [ + 0, + "aaabcaaabc", + qr/a+(?{$count_a++;})(bc){0,6}(*SKIP)(*FAIL)/, + qr/a+(?{$count_b++;})(bc){0,6}(*SKIP)z/, + 2, + ], + [ + 0, + "aaabcaaabc", + qr/a+(?{$count_a++;})(bc*){0,6}(*SKIP)(*FAIL)/, + qr/a+(?{$count_b++;})(bc*){0,6}(*SKIP)z/, + 2, + ], + + + [ + 0, + "aaddbdaabyzc", + qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) z \s* c \1 /x, + 4, + ], + [ + 0, + "aaddbdaabyzc", + qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) \s* (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) \s* z \s* c \1 /x, + 4, + ], + [ + 0, + "aaddbdaabyzc", + qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (?:b|) (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (?:b|) (*SKIP:T1) z \s* c \1 /x, + 4, + ], + [ + 0, + "aaddbdaabyzc", + qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b{0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b{0,6} (*SKIP:T1) z \s* c \1 /x, + 4, + ], + [ + 0, + "aaddbcdaabcyzc", + qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (bc){0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (bc){0,6} (*SKIP:T1) z \s* c \1 /x, + 4, + ], + [ + 0, + "aaddbcdaabcyzc", + qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (bc*){0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (bc*){0,6} (*SKIP:T1) z \s* c \1 /x, + 4, + ], + + + [ + 0, + "aaaaddbdaabyzc", + qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, + 2, + ], + [ + 0, + "aaaaddbdaabyzc", + qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) \s* (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) \s* z \s* c \1 /x, + 2, + ], + [ + 0, + "aaaaddbdaabyzc", + qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (?:b|) (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (?:b|) (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, + 2, + ], + [ + 0, + "aaaaddbdaabyzc", + qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b{0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b{0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, + 2, + ], + [ + 0, + "aaaaddbcdaabcyzc", + qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (bc){0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (bc){0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, + 2, + ], + [ + 0, + "aaaaddbcdaabcyzc", + qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (bc*){0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, + qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (bc*){0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, + 2, + ], + + + [ + 0, + "AbcdCBefgBhiBqz", + qr/(A (.*) (?{ $count_a++ }) C? (*THEN) | A D) (*FAIL)/x, + qr/(A (.*) (?{ $count_b++ }) C? (*THEN) | A D) z/x, + 1, + ], + [ + 0, + "AbcdCBefgBhiBqz", + qr/(A (.*) (?{ $count_a++ }) C? (*THEN) | A D) \s* (*FAIL)/x, + qr/(A (.*) (?{ $count_b++ }) C? (*THEN) | A D) \s* z/x, + 1, + ], + [ + 0, + "AbcdCBefgBhiBqz", + qr/(A (.*) (?{ $count_a++ }) (?:C|) (*THEN) | A D) (*FAIL)/x, + qr/(A (.*) (?{ $count_b++ }) (?:C|) (*THEN) | A D) z/x, + 1, + ], + [ + 0, + "AbcdCBefgBhiBqz", + qr/(A (.*) (?{ $count_a++ }) C{0,6} (*THEN) | A D) (*FAIL)/x, + qr/(A (.*) (?{ $count_b++ }) C{0,6} (*THEN) | A D) z/x, + 1, + ], + [ + 0, + "AbcdCEBefgBhiBqz", + qr/(A (.*) (?{ $count_a++ }) (CE){0,6} (*THEN) | A D) (*FAIL)/x, + qr/(A (.*) (?{ $count_b++ }) (CE){0,6} (*THEN) | A D) z/x, + 1, + ], + [ + 0, + "AbcdCBefgBhiBqz", + qr/(A (.*) (?{ $count_a++ }) (CE*){0,6} (*THEN) | A D) (*FAIL)/x, + qr/(A (.*) (?{ $count_b++ }) (CE*){0,6} (*THEN) | A D) z/x, + 1, + ], + ) { + $c++; + $count_a = 0; + $count_b = 0; + + my $match_a = ($re->[1] =~ $re->[2]) || 0; + my $match_b = ($re->[1] =~ $re->[3]) || 0; + + iseq($match_a, $re->[0], "match a " . ($re->[0] ? "succeeded" : "failed") . " ($c)"); + iseq($match_b, $re->[0], "match b " . ($re->[0] ? "succeeded" : "failed") . " ($c)"); + iseq($count_a, $re->[4], "count a ($c)"); + iseq($count_b, $re->[4], "count b ($c)"); + } + } + # # Keep the following tests last -- they may crash perl # |