summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-03-01 14:13:34 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-03-01 14:13:34 +0000
commit3893b93c01b7deb23b0d566224c595de0cc27188 (patch)
treeeaa5197f11b069f18dd532bbedab4f9abbce8351
parentf9a90bcfbc382a5fd60767e213650f49dc9bcdcf (diff)
downloadpcre-3893b93c01b7deb23b0d566224c595de0cc27188.tar.gz
Make pcregrep with --colour show all matches in a line in colour.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@378 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog10
-rwxr-xr-xRunGrepTest3
-rw-r--r--doc/pcregrep.116
-rw-r--r--pcregrep.c127
-rw-r--r--testdata/grepinputv1
-rw-r--r--testdata/grepinputx1
-rw-r--r--testdata/grepoutput27
7 files changed, 133 insertions, 52 deletions
diff --git a/ChangeLog b/ChangeLog
index 211066a..2abd7b4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -24,9 +24,13 @@ Version 7.9 xx-xxx-09
lines. This is not true; no spaces are inserted. I have also clarified the
wording for the --colour (or --color) option.
-5. When --colour was used with -o, the list of matching strings was not
- coloured; this is different to GNU grep, so I have changed it to be the
- same.
+5. In pcregrep, when --colour was used with -o, the list of matching strings
+ was not coloured; this is different to GNU grep, so I have changed it to be
+ the same.
+
+6. When --colo(u)r was used in pcregrep, only the first matching substring in
+ each matching line was coloured. Now it goes on to look for further matches
+ of any of the test patterns, which is the same behaviour as GNU grep.
Version 7.8 05-Sep-08
diff --git a/RunGrepTest b/RunGrepTest
index 2d87541..ed28ee3 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -214,6 +214,9 @@ echo "---------------------------- Test 52 ------------------------------" >>tes
echo "---------------------------- Test 53 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
+echo "---------------------------- Test 54 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
+
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
diff --git a/doc/pcregrep.1 b/doc/pcregrep.1
index 3ba8a05..ed24df2 100644
--- a/doc/pcregrep.1
+++ b/doc/pcregrep.1
@@ -119,12 +119,16 @@ This option specifies under what circumstances the parts of a line that matched
a pattern should be coloured in the output. By default, the output is not
coloured. The value (which is optional, see above) may be "never", "always", or
"auto". In the latter case, colouring happens only if the standard output is
-connected to a terminal. The colour that is used can be specified by setting
-the environment variable PCREGREP_COLOUR or PCREGREP_COLOR. The value of this
-variable should be a string of two numbers, separated by a semicolon. They are
-copied directly into the control string for setting colour on a terminal, so it
-is your responsibility to ensure that they make sense. If neither of the
-environment variables is set, the default is "1;31", which gives red.
+connected to a terminal. More resources are used when colouring is enabled,
+because \fBpcregrep\fP has to search for all possible matches in a line, not
+just one, in order to colour them all.
+
+The colour that is used can be specified by setting the environment variable
+PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
+string of two numbers, separated by a semicolon. They are copied directly into
+the control string for setting colour on a terminal, so it is your
+responsibility to ensure that they make sense. If neither of the environment
+variables is set, the default is "1;31", which gives red.
.TP
\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
If an input path is not a regular file or a directory, "action" specifies how
diff --git a/pcregrep.c b/pcregrep.c
index d595caf..d7db4a5 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -71,6 +71,7 @@ POSSIBILITY OF SUCH DAMAGE.
typedef int BOOL;
#define MAX_PATTERN_COUNT 100
+#define OFFSET_SIZE 99
#if BUFSIZ > 8192
#define MBUFTHIRD BUFSIZ
@@ -821,6 +822,60 @@ if (after_context > 0 && lastmatchnumber > 0)
/*************************************************
+* Apply patterns to subject till one matches *
+*************************************************/
+
+/* This function is called to run through all patterns, looking for a match. It
+is used multiple times for the same subject when colouring is enabled, in order
+to find all possible matches.
+
+Arguments:
+ matchptr the start of the subject
+ length the length of the subject to match
+ offsets the offets vector to fill in
+ mrc address of where to put the result of pcre_exec()
+
+Returns: TRUE if there was a match
+ FALSE if there was no match
+ invert if there was a non-fatal error
+*/
+
+static BOOL
+match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
+{
+int i;
+for (i = 0; i < pattern_count; i++)
+ {
+ *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
+ offsets, OFFSET_SIZE);
+ if (*mrc >= 0) return TRUE;
+ if (*mrc == PCRE_ERROR_NOMATCH) continue;
+ fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
+ if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
+ fprintf(stderr, "this text:\n");
+ fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
+ fprintf(stderr, "\n");
+ if (error_count == 0 &&
+ (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
+ {
+ fprintf(stderr, "pcregrep: error %d means that a resource limit "
+ "was exceeded\n", *mrc);
+ fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
+ }
+ if (error_count++ > 20)
+ {
+ fprintf(stderr, "pcregrep: too many errors - abandoned\n");
+ exit(2);
+ }
+ return invert; /* No more matching; don't show the line again */
+ }
+
+return FALSE; /* No match, no errors */
+}
+
+
+
+/*************************************************
* Grep an individual file *
*************************************************/
@@ -853,7 +908,7 @@ int linenumber = 1;
int lastmatchnumber = 0;
int count = 0;
int filepos = 0;
-int offsets[99];
+int offsets[OFFSET_SIZE];
char *lastmatchrestart = NULL;
char buffer[3*MBUFTHIRD];
char *ptr = buffer;
@@ -909,9 +964,9 @@ way, the buffer is shifted left and re-filled. */
while (ptr < endptr)
{
- int i, endlinelength;
+ int endlinelength;
int mrc = 0;
- BOOL match = FALSE;
+ BOOL match;
char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
@@ -919,9 +974,10 @@ while (ptr < endptr)
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre_exec(). In multiline mode, it is the
length remainder of the data in the buffer. Otherwise, it is the length of
- the next line. After matching, we always advance by the length of the next
- line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
- that any match is constrained to be in the first line. */
+ the next line, excluding the terminating newline. After matching, we always
+ advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
+ option is used for compiling, so that any match is constrained to be in the
+ first line. */
t = end_of_line(t, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
@@ -936,6 +992,7 @@ while (ptr < endptr)
#include <time.h>
struct timeval start_time, end_time;
struct timezone dummy;
+ int i;
if (jfriedl_XT)
{
@@ -961,7 +1018,7 @@ while (ptr < endptr)
for (i = 0; i < jfriedl_XR; i++)
- match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
+ match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0);
if (gettimeofday(&end_time, &dummy) != 0)
perror("bad gettimeofday");
@@ -980,37 +1037,11 @@ while (ptr < endptr)
ONLY_MATCHING_RESTART:
- /* Run through all the patterns until one matches. Note that we don't include
- the final newline in the subject string. */
-
- for (i = 0; i < pattern_count; i++)
- {
- mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
- offsets, 99);
- if (mrc >= 0) { match = TRUE; break; }
- if (mrc != PCRE_ERROR_NOMATCH)
- {
- fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
- if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
- fprintf(stderr, "this line:\n");
- fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
- fprintf(stderr, "\n");
- if (error_count == 0 &&
- (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
- {
- fprintf(stderr, "pcregrep: error %d means that a resource limit "
- "was exceeded\n", mrc);
- fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
- }
- if (error_count++ > 20)
- {
- fprintf(stderr, "pcregrep: too many errors - abandoned\n");
- exit(2);
- }
- match = invert; /* No more matching; don't show the line again */
- break;
- }
- }
+ /* Run through all the patterns until one matches or there is an error other
+ than NOMATCH. This code is in a subroutine so that it can be re-used for
+ finding subsequent matches when colouring matched lines. */
+
+ match = match_patterns(matchptr, length, offsets, &mrc);
/* If it's a match or a not-match (as required), do what's wanted. */
@@ -1201,17 +1232,33 @@ while (ptr < endptr)
else
#endif
- /* We have to split the line(s) up if colouring. */
+ /* We have to split the line(s) up if colouring, and search for further
+ matches. */
if (do_colour)
{
+ int last_offset = 0;
fwrite(ptr, 1, offsets[0], stdout);
fprintf(stdout, "%c[%sm", 0x1b, colour_string);
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
fprintf(stdout, "%c[00m", 0x1b);
- fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
+ for (;;)
+ {
+ last_offset += offsets[1];
+ matchptr += offsets[1];
+ length -= offsets[1];
+ if (!match_patterns(matchptr, length, offsets, &mrc)) break;
+ fwrite(matchptr, 1, offsets[0], stdout);
+ fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+ fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
+ fprintf(stdout, "%c[00m", 0x1b);
+ }
+ fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
stdout);
}
+
+ /* Not colouring; no need to search for further matches */
+
else fwrite(ptr, 1, linelength + endlinelength, stdout);
}
diff --git a/testdata/grepinputv b/testdata/grepinputv
index 528153d..d33d326 100644
--- a/testdata/grepinputv
+++ b/testdata/grepinputv
@@ -1,3 +1,4 @@
The quick brown
fox jumps
over the lazy dog.
+This time it jumps and jumps and jumps.
diff --git a/testdata/grepinputx b/testdata/grepinputx
index aebba02..730cc8a 100644
--- a/testdata/grepinputx
+++ b/testdata/grepinputx
@@ -39,4 +39,5 @@ eighteen
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 3241984..882344e 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -18,6 +18,7 @@ PATTERN at the start of a line.
./testdata/grepinput:608:Check up on PATTERN near the end.
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx:5:Pattern
+./testdata/grepinputx:42:This line contains pattern not on a line by itself.
---------------------------- Test 6 ------------------------------
7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
@@ -25,6 +26,7 @@ PATTERN at the start of a line.
608:Check up on PATTERN near the end.
3:Here is the pattern again.
5:Pattern
+42:This line contains pattern not on a line by itself.
---------------------------- Test 7 ------------------------------
./testdata/grepinput
./testdata/grepinputx
@@ -75,12 +77,13 @@ RC=1
39:nineteen
40:twenty
41:
-42:This is the last line of this file.
+43:This is the last line of this file.
---------------------------- Test 12 -----------------------------
Pattern
---------------------------- Test 13 -----------------------------
Here is the pattern again.
That time it was on a line by itself.
+This line contains pattern not on a line by itself.
---------------------------- Test 14 -----------------------------
./testdata/grepinputx:To pat or not to pat, that is the question.
---------------------------- Test 15 -----------------------------
@@ -157,6 +160,7 @@ eighteen
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 25 -----------------------------
15-
@@ -207,6 +211,7 @@ eighteen
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 27 -----------------------------
four
@@ -227,6 +232,7 @@ eighteen
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 28 -----------------------------
14-of lines all by themselves.
@@ -279,6 +285,7 @@ eighteen
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 30 -----------------------------
./testdata/grepinput-4-features should be added at the end, because some of the tests involve the
@@ -299,6 +306,11 @@ This is the last line of this file.
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx-4-
./testdata/grepinputx:5:Pattern
+--
+./testdata/grepinputx-39-nineteen
+./testdata/grepinputx-40-twenty
+./testdata/grepinputx-41-
+./testdata/grepinputx:42:This line contains pattern not on a line by itself.
---------------------------- Test 31 -----------------------------
./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
@@ -317,6 +329,9 @@ This is the last line of this file.
./testdata/grepinputx-6-That time it was on a line by itself.
./testdata/grepinputx-7-
./testdata/grepinputx-8-To pat or not to pat, that is the question.
+--
+./testdata/grepinputx:42:This line contains pattern not on a line by itself.
+./testdata/grepinputx-43-This is the last line of this file.
---------------------------- Test 32 -----------------------------
./testdata/grepinputx
---------------------------- Test 33 -----------------------------
@@ -336,11 +351,11 @@ aaaaa0
aaaaa2
RC=0
======== STDERR ========
-pcregrep: pcre_exec() error -8 while matching this line:
+pcregrep: pcre_exec() error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcregrep: error -8 means that a resource limit was exceeded
pcregrep: check your regex for nested unlimited loops
-pcregrep: pcre_exec() error -8 while matching this line:
+pcregrep: pcre_exec() error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >
@@ -388,8 +403,10 @@ PUT NEW DATA ABOVE THIS LINE.
---------------------------- Test 49 ------------------------------
---------------------------- Test 50 ------------------------------
over the lazy dog.
+This time it jumps and jumps and jumps.
---------------------------- Test 51 ------------------------------
fox jumps
+This time it jumps and jumps and jumps.
---------------------------- Test 52 ------------------------------
36972,6
36990,4
@@ -402,3 +419,7 @@ fox jumps
596:28,4
597:15,5
597:32,4
+---------------------------- Test 54 -----------------------------
+Here is the pattern again.
+That time it was on a line by itself.
+This line contains pattern not on a line by itself.