summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-10-31 18:18:48 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-10-31 18:18:48 +0000
commit1baf641ddd67f78693280553a81a05b69cbb3fff (patch)
tree53f082312a47154399f1b42fa6f642825c045e1d
parent8d1ceb3140a89b5e44d9464a0f6dd4251a24da78 (diff)
downloadpcre-1baf641ddd67f78693280553a81a05b69cbb3fff.tar.gz
Added parentheses argument to -o and --only-matching options of pcregrep.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@565 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rwxr-xr-xRunGrepTest20
-rw-r--r--doc/pcregrep.130
-rw-r--r--pcregrep.c82
-rw-r--r--testdata/grepoutput13
5 files changed, 112 insertions, 36 deletions
diff --git a/ChangeLog b/ChangeLog
index 3ab583d..00cff4e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -63,6 +63,9 @@ Version 8.11 10-Oct-2010
11. When the -o option was used, pcregrep was setting a return code of 1, even
when matches were found, and --line-buffered was not being honoured.
+
+12. Added an optional parentheses number to the -o and --only-matching options
+ of pcregrep.
Version 8.10 25-Jun-2010
diff --git a/RunGrepTest b/RunGrepTest
index bce1a14..7ba0141 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -307,6 +307,26 @@ echo "---------------------------- Test 63 -----------------------------" >>test
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
+echo "---------------------------- Test 64 ------------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 65 ------------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 66 ------------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 67 ------------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 68 ------------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
diff --git a/doc/pcregrep.1 b/doc/pcregrep.1
index fd5de57..c08ddff 100644
--- a/doc/pcregrep.1
+++ b/doc/pcregrep.1
@@ -346,7 +346,7 @@ the previous 8K characters (or all the previous characters, if fewer than 8K)
are guaranteed to be available for lookbehind assertions. This option does not
work when input is read line by line (see \fP--line-buffered\fP.)
.TP
-\fB-N\fP \fInewline-type\fP, \fB--newline=\fP\fInewline-type\fP
+\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
The PCRE library supports five different conventions for indicating
the ends of lines. They are the single-character sequences CR (carriage return)
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
@@ -372,13 +372,25 @@ output, it precedes the line number. This option is forced if
\fB--line-offsets\fP is used.
.TP
\fB-o\fP, \fB--only-matching\fP
-Show only the part of the line that matched a pattern. In this mode, no
-context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are
-ignored. If there is more than one match in a line, each of them is shown
-separately. If \fB-o\fP is combined with \fB-v\fP (invert the sense of the
-match to find non-matching lines), no output is generated, but the return code
-is set appropriately. This option is mutually exclusive with
-\fB--file-offsets\fP and \fB--line-offsets\fP.
+Show only the part of the line that matched a pattern instead of the whole
+line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
+\fB-C\fP options are ignored. If there is more than one match in a line, each
+of them is shown separately. If \fB-o\fP is combined with \fB-v\fP (invert the
+sense of the match to find non-matching lines), no output is generated, but the
+return code is set appropriately. If the matched portion of the line is empty,
+nothing is output unless the file name or line number are being printed, in
+which case they are shown on an otherwise empty line. This option is mutually
+exclusive with \fB--file-offsets\fP and \fB--line-offsets\fP.
+.TP
+\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
+Show only the part of the line that matched the capturing parentheses of the
+given number. Up to 32 capturing parentheses are supported. Because these
+options can be given without an argument (see above), if an argument is
+present, it must be given in the same shell item, for example, -o3 or
+--only-matching=2. The comments given for the non-argument case above also
+apply to this case. If the specified capturing parentheses do not exist in the
+pattern, or were not set in the match, nothing is output unless the file name
+or line number are being printed.
.TP
\fB-q\fP, \fB--quiet\fP
Work quietly, that is, display nothing except error messages. The exit
@@ -525,6 +537,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 30 October 2010
+Last updated: 31 October 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
diff --git a/pcregrep.c b/pcregrep.c
index 64cc871..610757d 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -163,6 +163,7 @@ static int dee_action = dee_READ;
static int DEE_action = DEE_READ;
static int error_count = 0;
static int filenames = FN_DEFAULT;
+static int only_matching = -1;
static int process_options = 0;
static unsigned long int match_limit = 0;
@@ -178,7 +179,6 @@ static BOOL line_offsets = FALSE;
static BOOL multiline = FALSE;
static BOOL number = FALSE;
static BOOL omit_zero_count = FALSE;
-static BOOL only_matching = FALSE;
static BOOL resource_error = FALSE;
static BOOL quiet = FALSE;
static BOOL silent = FALSE;
@@ -244,7 +244,7 @@ static option_item optionlist[] = {
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
{ OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
- { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
+ { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
{ OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
@@ -1174,33 +1174,40 @@ while (ptr < endptr)
else if (quiet) return 0;
- /* The --only-matching option prints just the substring that matched, and
- the --file-offsets and --line-offsets options output offsets for the
- matching substring (they both force --only-matching). None of these options
+ /* The --only-matching option prints just the substring that matched, or a
+ captured portion of it, as long as this string is not empty, and the
+ --file-offsets and --line-offsets options output offsets for the matching
+ substring (they both force --only-matching = 0). None of these options
prints any context. Afterwards, adjust the start and length, and then jump
back to look for further matches in the same line. If we are in invert
- mode, however, nothing is printed - this could be still useful because the
- return code is set. */
+ mode, however, nothing is printed and we do not restart - this could still
+ be useful because the return code is set. */
- else if (only_matching)
+ else if (only_matching >= 0)
{
if (!invert)
{
if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%d:", linenumber);
if (line_offsets)
- fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
+ fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
offsets[1] - offsets[0]);
else if (file_offsets)
- fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
+ fprintf(stdout, "%d,%d\n",
+ (int)(filepos + matchptr + offsets[0] - ptr),
offsets[1] - offsets[0]);
- else
+ else if (only_matching < mrc)
{
- if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
- FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
- if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
+ int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
+ if (plen > 0)
+ {
+ if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+ FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
+ if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
+ fprintf(stdout, "\n");
+ }
}
- fprintf(stdout, "\n");
+ else if (printname != NULL || number) fprintf(stdout, "\n");
matchptr += offsets[1];
length -= offsets[1];
match = FALSE;
@@ -1465,7 +1472,7 @@ while (ptr < endptr)
/* End of file; print final "after" lines if wanted; do_after_lines sets
hyphenpending if it prints something. */
-if (!only_matching && !count_only)
+if (only_matching < 0 && !count_only)
{
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
hyphenpending |= endhyphenpending;
@@ -1814,7 +1821,7 @@ switch(letter)
case 'L': filenames = FN_NOMATCH_ONLY; break;
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
case 'n': number = TRUE; break;
- case 'o': only_matching = TRUE; break;
+ case 'o': only_matching = 0; break;
case 'q': quiet = TRUE; break;
case 'r': dee_action = dee_RECURSE; break;
case 's': silent = TRUE; break;
@@ -2154,18 +2161,34 @@ for (i = 1; i < argc; i++)
while (*s != 0)
{
for (op = optionlist; op->one_char != 0; op++)
- { if (*s == op->one_char) break; }
+ {
+ if (*s == op->one_char) break;
+ }
if (op->one_char == 0)
{
fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
*s, argv[i]);
pcregrep_exit(usage(2));
}
- if (op->type != OP_NODATA || s[1] == 0)
- {
- option_data = s+1;
- break;
+
+ /* Check for a single-character option that has data: OP_OP_NUMBER
+ is used for one that either has a numerical number or defaults, i.e. the
+ data is optional. If a digit follows, there is data; if not, carry on
+ with other single-character options in the same string. */
+
+ option_data = s+1;
+ if (op->type == OP_OP_NUMBER)
+ {
+ if (isdigit((unsigned char)s[1])) break;
}
+ else /* Check for end or a dataless option */
+ {
+ if (op->type != OP_NODATA || s[1] == 0) break;
+ }
+
+ /* Handle a single-character option with no data, then loop for the
+ next character in the string. */
+
pcre_options = handle_option(*s++, pcre_options);
}
}
@@ -2182,8 +2205,8 @@ for (i = 1; i < argc; i++)
/* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
either has a value or defaults to something. It cannot have data in a
- separate item. At the moment, the only such options are "colo(u)r" and
- Jeffrey Friedl's special -S debugging option. */
+ separate item. At the moment, the only such options are "colo(u)r",
+ "only-matching", and Jeffrey Friedl's special -S debugging option. */
if (*option_data == 0 &&
(op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
@@ -2193,6 +2216,11 @@ for (i = 1; i < argc; i++)
case N_COLOUR:
colour_option = (char *)"auto";
break;
+
+ case 'o':
+ only_matching = 0;
+ break;
+
#ifdef JFRIEDL_DEBUG
case 'S':
S_arg = 0;
@@ -2274,9 +2302,9 @@ if (both_context > 0)
}
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
-However, the latter two set the only_matching flag. */
+However, the latter two set only_matching. */
-if ((only_matching && (file_offsets || line_offsets)) ||
+if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
(file_offsets && line_offsets))
{
fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
@@ -2284,7 +2312,7 @@ if ((only_matching && (file_offsets || line_offsets)) ||
pcregrep_exit(usage(2));
}
-if (file_offsets || line_offsets) only_matching = TRUE;
+if (file_offsets || line_offsets) only_matching = 0;
/* If a locale has not been provided as an option, see if the LC_CTYPE or
LC_ALL environment variable is set, and if so, use it. */
diff --git a/testdata/grepoutput b/testdata/grepoutput
index eb48dbb..b91a3da 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -525,3 +525,16 @@ long so that it needs more than a single read
pcregrep: Error -8 or -21 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
+---------------------------- Test 64 ------------------------------
+appears
+RC=0
+---------------------------- Test 65 ------------------------------
+pear
+RC=0
+---------------------------- Test 66 ------------------------------
+RC=0
+---------------------------- Test 67 ------------------------------
+RC=0
+---------------------------- Test 68 ------------------------------
+pear
+RC=0