diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2015-04-07 15:52:11 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2015-04-07 15:52:11 +0000 |
commit | b4332d7dd831b3547b3f541495de4a79554e538e (patch) | |
tree | 62916f7f12e1726d9651cce38e426cfca169ffca | |
parent | 256d94987eecd7eb87b37e1c981a4e753ed8ab7a (diff) | |
download | pcre-b4332d7dd831b3547b3f541495de4a79554e538e.tar.gz |
Fix pcregrep loop when \K is used in a lookbehind assertion.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rwxr-xr-x | RunGrepTest | 5 | ||||
-rw-r--r-- | pcregrep.c | 109 | ||||
-rw-r--r-- | testdata/grepoutput | 8 |
4 files changed, 88 insertions, 37 deletions
@@ -145,6 +145,9 @@ Version 8.37 xx-xxx-2015 35. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1))) caused a stack overflow instead of the diagnosis of a non-fixed length lookbehind assertion. This bug was discovered by the LLVM fuzzer. + +36. The use of \K in a positive lookbehind assertion in a non-anchored pattern + (e.g. /(?<=\Ka)/) could make pcregrep loop. Version 8.36 26-September-2014 diff --git a/RunGrepTest b/RunGrepTest index f1b0348..766278b 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -506,6 +506,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes (cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep +echo "---------------------------- Test 107 -----------------------------" >>testtrygrep +echo "a" >testtemp1grep +echo "aaaaa" >>testtemp1grep +(cd $srcdir; $valgrind $pcregrep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep # Now compare the results. @@ -1582,11 +1582,14 @@ while (ptr < endptr) int endlinelength; int mrc = 0; int startoffset = 0; + int prevoffsets[2]; unsigned int options = 0; BOOL match; char *matchptr = ptr; char *t = ptr; size_t length, linelength; + + prevoffsets[0] = prevoffsets[1] = -1; /* At this point, ptr is at the start of a line. We need to find the length of the subject string to pass to pcre_exec(). In multiline mode, it is the @@ -1729,55 +1732,86 @@ while (ptr < endptr) { if (!invert) { - if (printname != NULL) fprintf(stdout, "%s:", printname); - if (number) fprintf(stdout, "%d:", linenumber); - - /* Handle --line-offsets */ - - if (line_offsets) - fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), - offsets[1] - offsets[0]); - - /* Handle --file-offsets */ - - else if (file_offsets) - fprintf(stdout, "%d,%d\n", - (int)(filepos + matchptr + offsets[0] - ptr), - offsets[1] - offsets[0]); - - /* Handle --only-matching, which may occur many times */ - - else + int oldstartoffset = startoffset; + + /* It is possible, when a lookbehind assertion contains \K, for the + same string to be found again. The code below advances startoffset, but + until it is past the "bumpalong" offset that gave the match, the same + substring will be returned. The PCRE1 library does not return the + bumpalong offset, so all we can do is ignore repeated strings. (PCRE2 + does this better.) */ + + if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1]) { - BOOL printed = FALSE; - omstr *om; - - for (om = only_matching; om != NULL; om = om->next) + prevoffsets[0] = offsets[0]; + prevoffsets[1] = offsets[1]; + + if (printname != NULL) fprintf(stdout, "%s:", printname); + if (number) fprintf(stdout, "%d:", linenumber); + + /* Handle --line-offsets */ + + if (line_offsets) + fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), + offsets[1] - offsets[0]); + + /* Handle --file-offsets */ + + else if (file_offsets) + fprintf(stdout, "%d,%d\n", + (int)(filepos + matchptr + offsets[0] - ptr), + offsets[1] - offsets[0]); + + /* Handle --only-matching, which may occur many times */ + + else { - int n = om->groupnum; - if (n < mrc) + BOOL printed = FALSE; + omstr *om; + + for (om = only_matching; om != NULL; om = om->next) { - int plen = offsets[2*n + 1] - offsets[2*n]; - if (plen > 0) + int n = om->groupnum; + if (n < mrc) { - if (printed) fprintf(stdout, "%s", om_separator); - if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); - FWRITE(matchptr + offsets[n*2], 1, plen, stdout); - if (do_colour) fprintf(stdout, "%c[00m", 0x1b); - printed = TRUE; + int plen = offsets[2*n + 1] - offsets[2*n]; + if (plen > 0) + { + if (printed) fprintf(stdout, "%s", om_separator); + if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); + FWRITE(matchptr + offsets[n*2], 1, plen, stdout); + if (do_colour) fprintf(stdout, "%c[00m", 0x1b); + printed = TRUE; + } } } + + if (printed || printname != NULL || number) fprintf(stdout, "\n"); } - - if (printed || printname != NULL || number) fprintf(stdout, "\n"); - } - - /* Prepare to repeat to find the next match */ + } + + /* Prepare to repeat to find the next match. If the patterned contained + a lookbehind tht included \K, it is possible that the end of the match + might be at or before the actual strting offset we have just used. We + need to start one character further on. Unfortunately, for unanchored + patterns, the actual start offset can be greater that the one that was + set as a result of "bumpalong". PCRE1 does not return the actual start + offset, so we have to check against the original start offset. This may + lead to duplicates - we we need the fudge above to avoid printing them. + (PCRE2 does this better.) */ match = FALSE; if (line_buffered) fflush(stdout); rc = 0; /* Had some success */ startoffset = offsets[1]; /* Restart after the match */ + if (startoffset <= oldstartoffset) + { + if ((size_t)startoffset >= length) + goto END_ONE_MATCH; /* We were at the end */ + startoffset = oldstartoffset + 1; + if (utf8) + while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; + } goto ONLY_MATCHING_RESTART; } } @@ -1974,6 +2008,7 @@ while (ptr < endptr) /* Advance to after the newline and increment the line number. The file offset to the current line is maintained in filepos. */ + END_ONE_MATCH: ptr += linelength + endlinelength; filepos += (int)(linelength + endlinelength); linenumber++; diff --git a/testdata/grepoutput b/testdata/grepoutput index 9bf9d9d..4d61752 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -743,3 +743,11 @@ RC=0 ---------------------------- Test 106 ----------------------------- a RC=0 +---------------------------- Test 107 ----------------------------- +1:0,1 +2:0,1 +2:1,1 +2:2,1 +2:3,1 +2:4,1 +RC=0 |