summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-12-04 20:01:43 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-12-04 20:01:43 +0000
commit228b0cfb8b1a91b136e6431dd8d369362a613b75 (patch)
tree54e95ed9738183e6371573b9c4ceda02428cc886
parent61a61529db5c788a88fdb2fe78e916b42b1f2581 (diff)
downloadpcre-228b0cfb8b1a91b136e6431dd8d369362a613b75.tar.gz
Fix -o bugs in pcregrep.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@279 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog10
-rw-r--r--doc/pcregrep.153
-rw-r--r--pcregrep.c29
-rw-r--r--testdata/grepoutput3
4 files changed, 63 insertions, 32 deletions
diff --git a/ChangeLog b/ChangeLog
index 32a5914..f8f3ac8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -65,6 +65,16 @@ Version 7.5 12-Nov-07
U+03260 - U+0327f
U+0fb46 - U+0fbb1
U+10450 - U+1049d
+
+12. The -o option (show only the matching part of a line) for pcregrep was not
+ compatible with GNU grep in that, if there was more than one match in a
+ line, it showed only the first of them. It now behaves in the same way as
+ GNU grep.
+
+13. If the -o and -v options were combined for pcregrep, it printed a blank
+ line for every non-matching line. GNU grep prints nothing, and pcregrep now
+ does the same. The return code can be used to tell if there were any
+ non-matching lines.
Version 7.4 21-Sep-07
diff --git a/doc/pcregrep.1 b/doc/pcregrep.1
index 8d723c3..1820d17 100644
--- a/doc/pcregrep.1
+++ b/doc/pcregrep.1
@@ -23,9 +23,9 @@ without delimiters. For example:
.sp
If you attempt to use delimiters (for example, by surrounding a pattern with
slashes, as is common in Perl scripts), they are interpreted as part of the
-pattern. Quotes can of course be used on the command line because they are
-interpreted by the shell, and indeed they are required if a pattern contains
-white space or shell metacharacters.
+pattern. Quotes can of course be used to delimit patterns on the command line
+because they are interpreted by the shell, and indeed they are required if a
+pattern contains white space or shell metacharacters.
.P
The first argument that follows any option settings is treated as the single
pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present.
@@ -41,10 +41,10 @@ For example:
.sp
By default, each line that matches the pattern is copied to the standard
output, and if there is more than one file, the file name is output at the
-start of each line. However, there are options that can change how
-\fBpcregrep\fP behaves. In particular, the \fB-M\fP option makes it possible to
-search for patterns that span line boundaries. What defines a line boundary is
-controlled by the \fB-N\fP (\fB--newline\fP) option.
+start of each line, followed by a colon. However, there are options that can
+change how \fBpcregrep\fP behaves. In particular, the \fB-M\fP option makes it
+possible to search for patterns that span line boundaries. What defines a line
+boundary is controlled by the \fB-N\fP (\fB--newline\fP) option.
.P
Patterns are limited to 8K or BUFSIZ characters, whichever is the greater.
BUFSIZ is defined in \fB<stdio.h>\fP.
@@ -116,22 +116,22 @@ option), or "skip" (silently skip the path). In the default case, directories
are read as if they were ordinary files. In some operating systems the effect
of reading a directory like this is an immediate end-of-file.
.TP
-\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP,
-\fB--regexp=\fP\fIpattern\fP Specify a pattern to be matched. This option can
-be used multiple times in order to specify several patterns. It can also be
-used as a way of specifying a single pattern that starts with a hyphen. When
-\fB-e\fP is used, no argument pattern is taken from the command line; all
-arguments are treated as file names. There is an overall maximum of 100
-patterns. They are applied to each line in the order in which they are defined
-until one matches (or fails to match if \fB-v\fP is used). If \fB-f\fP is used
-with \fB-e\fP, the command line patterns are matched first, followed by the
-patterns from the file, independent of the order in which these options are
-specified. Note that multiple use of \fB-e\fP is not the same as a single
-pattern with alternatives. For example, X|Y finds the first character in a line
-that is X or Y, whereas if the two patterns are given separately,
-\fBpcregrep\fP finds X if it is present, even if it follows Y in the line. It
-finds Y only if there is no X in the line. This really matters only if you are
-using \fB-o\fP to show the portion of the line that matched.
+\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP
+Specify a pattern to be matched. This option can be used multiple times in
+order to specify several patterns. It can also be used as a way of specifying a
+single pattern that starts with a hyphen. When \fB-e\fP is used, no argument
+pattern is taken from the command line; all arguments are treated as file
+names. There is an overall maximum of 100 patterns. They are applied to each
+line in the order in which they are defined until one matches (or fails to
+match if \fB-v\fP is used). If \fB-f\fP is used with \fB-e\fP, the command line
+patterns are matched first, followed by the patterns from the file, independent
+of the order in which these options are specified. Note that multiple use of
+\fB-e\fP is not the same as a single pattern with alternatives. For example,
+X|Y finds the first character in a line that is X or Y, whereas if the two
+patterns are given separately, \fBpcregrep\fP finds X if it is present, even if
+it follows Y in the line. It finds Y only if there is no X in the line. This
+really matters only if you are using \fB-o\fP to show the part(s) of the line
+that matched.
.TP
\fB--exclude\fP=\fIpattern\fP
When \fBpcregrep\fP is searching the files in a directory as a consequence of
@@ -246,7 +246,10 @@ the filename is also being output, it precedes the line number.
\fB-o\fP, \fB--only-matching\fP
Show only the part of the line that matched a pattern. In this mode, no
context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are
-ignored.
+ignored. If there is more than one match in a line, each of them is shown
+separately. If \fB-o\fP is combined with \fB-v\fP (invert the sense of the
+match to find non-matching lines), no output is generated, but the return code
+is set appropriately.
.TP
\fB-q\fP, \fB--quiet\fP
Work quietly, that is, display nothing except error messages. The exit
@@ -388,6 +391,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 16 April 2007
+Last updated: 04 December 2007
Copyright (c) 1997-2007 University of Cambridge.
.fi
diff --git a/pcregrep.c b/pcregrep.c
index b44574e..6c3f7a1 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -844,6 +844,7 @@ while (ptr < endptr)
int i, endlinelength;
int mrc = 0;
BOOL match = FALSE;
+ char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
@@ -906,13 +907,17 @@ while (ptr < endptr)
}
#endif
+ /* We come back here after a match when the -o option (only_matching) is set,
+ in order to find any further matches in the same line. */
+
+ ONLY_MATCHING_RESTART:
/* Run through all the patterns until one matches. Note that we don't include
the final newline in the subject string. */
for (i = 0; i < pattern_count; i++)
{
- mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
+ mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
offsets, 99);
if (mrc >= 0) { match = TRUE; break; }
if (mrc != PCRE_ERROR_NOMATCH)
@@ -920,7 +925,7 @@ while (ptr < endptr)
fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
fprintf(stderr, "this line:\n");
- fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
+ fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
fprintf(stderr, "\n");
if (error_count == 0 &&
(mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
@@ -967,14 +972,24 @@ while (ptr < endptr)
else if (quiet) return 0;
/* The --only-matching option prints just the substring that matched, and
- does not pring any context. */
+ does not print any context. Afterwards, adjust the start and length, and
+ then jump back to look for further matches in the same line. If we are in
+ invert mode, however, nothing is printed - this could be useful still
+ because the return code is set. */
else if (only_matching)
{
- if (printname != NULL) fprintf(stdout, "%s:", printname);
- if (number) fprintf(stdout, "%d:", linenumber);
- fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
- fprintf(stdout, "\n");
+ if (!invert)
+ {
+ if (printname != NULL) fprintf(stdout, "%s:", printname);
+ if (number) fprintf(stdout, "%d:", linenumber);
+ fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
+ fprintf(stdout, "\n");
+ matchptr += offsets[1];
+ length -= offsets[1];
+ match = FALSE;
+ goto ONLY_MATCHING_RESTART;
+ }
}
/* This is the default case when none of the above options is set. We print
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 7f76d9e..abf7aeb 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -358,10 +358,13 @@ after the binary zero
./testdata/grepinput:597:after the binary zero
---------------------------- Test 42 ------------------------------
595:before
+595:zero
596:zero
597:after
+597:zero
---------------------------- Test 43 ------------------------------
595:before
+595:zero
596:zero
597:zero
---------------------------- Test 44 ------------------------------