summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-04-07 15:52:11 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-04-07 15:52:11 +0000
commitb4332d7dd831b3547b3f541495de4a79554e538e (patch)
tree62916f7f12e1726d9651cce38e426cfca169ffca
parent256d94987eecd7eb87b37e1c981a4e753ed8ab7a (diff)
downloadpcre-b4332d7dd831b3547b3f541495de4a79554e538e.tar.gz
Fix pcregrep loop when \K is used in a lookbehind assertion.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rwxr-xr-xRunGrepTest5
-rw-r--r--pcregrep.c109
-rw-r--r--testdata/grepoutput8
4 files changed, 88 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index 53f6853..e14cb80 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -145,6 +145,9 @@ Version 8.37 xx-xxx-2015
35. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
caused a stack overflow instead of the diagnosis of a non-fixed length
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
+
+36. The use of \K in a positive lookbehind assertion in a non-anchored pattern
+ (e.g. /(?<=\Ka)/) could make pcregrep loop.
Version 8.36 26-September-2014
diff --git a/RunGrepTest b/RunGrepTest
index f1b0348..766278b 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -506,6 +506,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
+echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
+echo "a" >testtemp1grep
+echo "aaaaa" >>testtemp1grep
+(cd $srcdir; $valgrind $pcregrep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
# Now compare the results.
diff --git a/pcregrep.c b/pcregrep.c
index 4f7fa38..b1af129 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -1582,11 +1582,14 @@ while (ptr < endptr)
int endlinelength;
int mrc = 0;
int startoffset = 0;
+ int prevoffsets[2];
unsigned int options = 0;
BOOL match;
char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
+
+ prevoffsets[0] = prevoffsets[1] = -1;
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre_exec(). In multiline mode, it is the
@@ -1729,55 +1732,86 @@ while (ptr < endptr)
{
if (!invert)
{
- if (printname != NULL) fprintf(stdout, "%s:", printname);
- if (number) fprintf(stdout, "%d:", linenumber);
-
- /* Handle --line-offsets */
-
- if (line_offsets)
- fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
- offsets[1] - offsets[0]);
-
- /* Handle --file-offsets */
-
- else if (file_offsets)
- fprintf(stdout, "%d,%d\n",
- (int)(filepos + matchptr + offsets[0] - ptr),
- offsets[1] - offsets[0]);
-
- /* Handle --only-matching, which may occur many times */
-
- else
+ int oldstartoffset = startoffset;
+
+ /* It is possible, when a lookbehind assertion contains \K, for the
+ same string to be found again. The code below advances startoffset, but
+ until it is past the "bumpalong" offset that gave the match, the same
+ substring will be returned. The PCRE1 library does not return the
+ bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
+ does this better.) */
+
+ if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
{
- BOOL printed = FALSE;
- omstr *om;
-
- for (om = only_matching; om != NULL; om = om->next)
+ prevoffsets[0] = offsets[0];
+ prevoffsets[1] = offsets[1];
+
+ if (printname != NULL) fprintf(stdout, "%s:", printname);
+ if (number) fprintf(stdout, "%d:", linenumber);
+
+ /* Handle --line-offsets */
+
+ if (line_offsets)
+ fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
+ offsets[1] - offsets[0]);
+
+ /* Handle --file-offsets */
+
+ else if (file_offsets)
+ fprintf(stdout, "%d,%d\n",
+ (int)(filepos + matchptr + offsets[0] - ptr),
+ offsets[1] - offsets[0]);
+
+ /* Handle --only-matching, which may occur many times */
+
+ else
{
- int n = om->groupnum;
- if (n < mrc)
+ BOOL printed = FALSE;
+ omstr *om;
+
+ for (om = only_matching; om != NULL; om = om->next)
{
- int plen = offsets[2*n + 1] - offsets[2*n];
- if (plen > 0)
+ int n = om->groupnum;
+ if (n < mrc)
{
- if (printed) fprintf(stdout, "%s", om_separator);
- if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
- FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
- if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
- printed = TRUE;
+ int plen = offsets[2*n + 1] - offsets[2*n];
+ if (plen > 0)
+ {
+ if (printed) fprintf(stdout, "%s", om_separator);
+ if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+ FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
+ if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
+ printed = TRUE;
+ }
}
}
+
+ if (printed || printname != NULL || number) fprintf(stdout, "\n");
}
-
- if (printed || printname != NULL || number) fprintf(stdout, "\n");
- }
-
- /* Prepare to repeat to find the next match */
+ }
+
+ /* Prepare to repeat to find the next match. If the patterned contained
+ a lookbehind tht included \K, it is possible that the end of the match
+ might be at or before the actual strting offset we have just used. We
+ need to start one character further on. Unfortunately, for unanchored
+ patterns, the actual start offset can be greater that the one that was
+ set as a result of "bumpalong". PCRE1 does not return the actual start
+ offset, so we have to check against the original start offset. This may
+ lead to duplicates - we we need the fudge above to avoid printing them.
+ (PCRE2 does this better.) */
match = FALSE;
if (line_buffered) fflush(stdout);
rc = 0; /* Had some success */
startoffset = offsets[1]; /* Restart after the match */
+ if (startoffset <= oldstartoffset)
+ {
+ if ((size_t)startoffset >= length)
+ goto END_ONE_MATCH; /* We were at the end */
+ startoffset = oldstartoffset + 1;
+ if (utf8)
+ while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
+ }
goto ONLY_MATCHING_RESTART;
}
}
@@ -1974,6 +2008,7 @@ while (ptr < endptr)
/* Advance to after the newline and increment the line number. The file
offset to the current line is maintained in filepos. */
+ END_ONE_MATCH:
ptr += linelength + endlinelength;
filepos += (int)(linelength + endlinelength);
linenumber++;
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 9bf9d9d..4d61752 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -743,3 +743,11 @@ RC=0
---------------------------- Test 106 -----------------------------
a
RC=0
+---------------------------- Test 107 -----------------------------
+1:0,1
+2:0,1
+2:1,1
+2:2,1
+2:3,1
+2:4,1
+RC=0