diff options
Diffstat (limited to 'pcre/pcregrep.c')
-rw-r--r-- | pcre/pcregrep.c | 93 |
1 files changed, 64 insertions, 29 deletions
diff --git a/pcre/pcregrep.c b/pcre/pcregrep.c index 4f7fa38491a..c49a74ffb78 100644 --- a/pcre/pcregrep.c +++ b/pcre/pcregrep.c @@ -1582,12 +1582,15 @@ while (ptr < endptr) int endlinelength; int mrc = 0; int startoffset = 0; + int prevoffsets[2]; unsigned int options = 0; BOOL match; char *matchptr = ptr; char *t = ptr; size_t length, linelength; + prevoffsets[0] = prevoffsets[1] = -1; + /* At this point, ptr is at the start of a line. We need to find the length of the subject string to pass to pcre_exec(). In multiline mode, it is the length remainder of the data in the buffer. Otherwise, it is the length of @@ -1729,55 +1732,86 @@ while (ptr < endptr) { if (!invert) { - if (printname != NULL) fprintf(stdout, "%s:", printname); - if (number) fprintf(stdout, "%d:", linenumber); + int oldstartoffset = startoffset; - /* Handle --line-offsets */ + /* It is possible, when a lookbehind assertion contains \K, for the + same string to be found again. The code below advances startoffset, but + until it is past the "bumpalong" offset that gave the match, the same + substring will be returned. The PCRE1 library does not return the + bumpalong offset, so all we can do is ignore repeated strings. (PCRE2 + does this better.) */ - if (line_offsets) - fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), - offsets[1] - offsets[0]); + if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1]) + { + prevoffsets[0] = offsets[0]; + prevoffsets[1] = offsets[1]; - /* Handle --file-offsets */ + if (printname != NULL) fprintf(stdout, "%s:", printname); + if (number) fprintf(stdout, "%d:", linenumber); - else if (file_offsets) - fprintf(stdout, "%d,%d\n", - (int)(filepos + matchptr + offsets[0] - ptr), - offsets[1] - offsets[0]); + /* Handle --line-offsets */ - /* Handle --only-matching, which may occur many times */ + if (line_offsets) + fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), + offsets[1] - offsets[0]); - else - { - BOOL printed = FALSE; - omstr *om; + /* Handle --file-offsets */ + + else if (file_offsets) + fprintf(stdout, "%d,%d\n", + (int)(filepos + matchptr + offsets[0] - ptr), + offsets[1] - offsets[0]); + + /* Handle --only-matching, which may occur many times */ - for (om = only_matching; om != NULL; om = om->next) + else { - int n = om->groupnum; - if (n < mrc) + BOOL printed = FALSE; + omstr *om; + + for (om = only_matching; om != NULL; om = om->next) { - int plen = offsets[2*n + 1] - offsets[2*n]; - if (plen > 0) + int n = om->groupnum; + if (n < mrc) { - if (printed) fprintf(stdout, "%s", om_separator); - if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); - FWRITE(matchptr + offsets[n*2], 1, plen, stdout); - if (do_colour) fprintf(stdout, "%c[00m", 0x1b); - printed = TRUE; + int plen = offsets[2*n + 1] - offsets[2*n]; + if (plen > 0) + { + if (printed) fprintf(stdout, "%s", om_separator); + if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); + FWRITE(matchptr + offsets[n*2], 1, plen, stdout); + if (do_colour) fprintf(stdout, "%c[00m", 0x1b); + printed = TRUE; + } } } - } - if (printed || printname != NULL || number) fprintf(stdout, "\n"); + if (printed || printname != NULL || number) fprintf(stdout, "\n"); + } } - /* Prepare to repeat to find the next match */ + /* Prepare to repeat to find the next match. If the patterned contained + a lookbehind tht included \K, it is possible that the end of the match + might be at or before the actual strting offset we have just used. We + need to start one character further on. Unfortunately, for unanchored + patterns, the actual start offset can be greater that the one that was + set as a result of "bumpalong". PCRE1 does not return the actual start + offset, so we have to check against the original start offset. This may + lead to duplicates - we we need the fudge above to avoid printing them. + (PCRE2 does this better.) */ match = FALSE; if (line_buffered) fflush(stdout); rc = 0; /* Had some success */ startoffset = offsets[1]; /* Restart after the match */ + if (startoffset <= oldstartoffset) + { + if ((size_t)startoffset >= length) + goto END_ONE_MATCH; /* We were at the end */ + startoffset = oldstartoffset + 1; + if (utf8) + while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; + } goto ONLY_MATCHING_RESTART; } } @@ -1974,6 +2008,7 @@ while (ptr < endptr) /* Advance to after the newline and increment the line number. The file offset to the current line is maintained in filepos. */ + END_ONE_MATCH: ptr += linelength + endlinelength; filepos += (int)(linelength + endlinelength); linenumber++; |