summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-12-07 19:32:32 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-12-07 19:32:32 +0000
commit82893b7b730b740ec045a6d772ad0fc118074c88 (patch)
tree849880f14a872a99013aa9942cbbd4163db520e6
parent73f8e94ae08a28aad03773b4497be1e83eb12c86 (diff)
downloadpcre-82893b7b730b740ec045a6d772ad0fc118074c88.tar.gz
Fix non-diagnosis of (?=a)(?R) (positive lookaheads not skipped when checking
for an empty match). git-svn-id: svn://vcs.exim.org/pcre/code/trunk@282 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--pcre_compile.c15
-rw-r--r--testdata/testinput28
-rw-r--r--testdata/testoutput212
4 files changed, 38 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index f8f3ac8..7cfc09d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -75,6 +75,11 @@ Version 7.5 12-Nov-07
line for every non-matching line. GNU grep prints nothing, and pcregrep now
does the same. The return code can be used to tell if there were any
non-matching lines.
+
+14. The pattern (?=something)(?R) was not being diagnosed as a potentially
+ infinitely looping recursion. The bug was that positive lookaheads were not
+ being skipped when checking for a possible empty match (negative lookaheads
+ and both kinds of lookbehind were skipped).
Version 7.4 21-Sep-07
diff --git a/pcre_compile.c b/pcre_compile.c
index 885e239..8c5c4a1 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1508,8 +1508,9 @@ for (;;)
can match the empty string or not. It is called from could_be_empty()
below and from compile_branch() when checking for an unlimited repeat of a
group that can match nothing. Note that first_significant_code() skips over
-assertions. If we hit an unclosed bracket, we return "empty" - this means we've
-struck an inner bracket whose current branch will already have been scanned.
+backward and negative forward assertions when its final argument is TRUE. If we
+hit an unclosed bracket, we return "empty" - this means we've struck an inner
+bracket whose current branch will already have been scanned.
Arguments:
code points to start of search
@@ -1530,6 +1531,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
const uschar *ccode;
c = *code;
+
+ /* Skip over forward assertions; the other assertions are skipped by
+ first_significant_code() with a TRUE final argument. */
+
+ if (c == OP_ASSERT)
+ {
+ do code += GET(code, 1); while (*code == OP_ALT);
+ c = *code;
+ continue;
+ }
/* Groups with zero repeats can of course be empty; skip them. */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 9b153dc..32c5ef2 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2563,4 +2563,12 @@ a random value. /Ix
/(?P>)/
+/(?!\w)(?R)/
+
+/(?=\w)(?R)/
+
+/(?<!\w)(?R)/
+
+/(?<=\w)(?R)/
+
/ End of testinput2 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index c7ae26f..72ac0af 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -9346,4 +9346,16 @@ Failed: subpattern name expected at offset 4
/(?P>)/
Failed: subpattern name expected at offset 4
+/(?!\w)(?R)/
+Failed: recursive call could loop indefinitely at offset 9
+
+/(?=\w)(?R)/
+Failed: recursive call could loop indefinitely at offset 9
+
+/(?<!\w)(?R)/
+Failed: recursive call could loop indefinitely at offset 10
+
+/(?<=\w)(?R)/
+Failed: recursive call could loop indefinitely at offset 10
+
/ End of testinput2 /