diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2009-03-01 14:13:34 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2009-03-01 14:13:34 +0000 |
commit | 3893b93c01b7deb23b0d566224c595de0cc27188 (patch) | |
tree | eaa5197f11b069f18dd532bbedab4f9abbce8351 | |
parent | f9a90bcfbc382a5fd60767e213650f49dc9bcdcf (diff) | |
download | pcre-3893b93c01b7deb23b0d566224c595de0cc27188.tar.gz |
Make pcregrep with --colour show all matches in a line in colour.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@378 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 10 | ||||
-rwxr-xr-x | RunGrepTest | 3 | ||||
-rw-r--r-- | doc/pcregrep.1 | 16 | ||||
-rw-r--r-- | pcregrep.c | 127 | ||||
-rw-r--r-- | testdata/grepinputv | 1 | ||||
-rw-r--r-- | testdata/grepinputx | 1 | ||||
-rw-r--r-- | testdata/grepoutput | 27 |
7 files changed, 133 insertions, 52 deletions
@@ -24,9 +24,13 @@ Version 7.9 xx-xxx-09 lines. This is not true; no spaces are inserted. I have also clarified the wording for the --colour (or --color) option. -5. When --colour was used with -o, the list of matching strings was not - coloured; this is different to GNU grep, so I have changed it to be the - same. +5. In pcregrep, when --colour was used with -o, the list of matching strings + was not coloured; this is different to GNU grep, so I have changed it to be + the same. + +6. When --colo(u)r was used in pcregrep, only the first matching substring in + each matching line was coloured. Now it goes on to look for further matches + of any of the test patterns, which is the same behaviour as GNU grep. Version 7.8 05-Sep-08 diff --git a/RunGrepTest b/RunGrepTest index 2d87541..ed28ee3 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -214,6 +214,9 @@ echo "---------------------------- Test 52 ------------------------------" >>tes echo "---------------------------- Test 53 ------------------------------" >>testtry (cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry +echo "---------------------------- Test 54 -----------------------------" >>testtry +(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry + # Now compare the results. $cf $srcdir/testdata/grepoutput testtry diff --git a/doc/pcregrep.1 b/doc/pcregrep.1 index 3ba8a05..ed24df2 100644 --- a/doc/pcregrep.1 +++ b/doc/pcregrep.1 @@ -119,12 +119,16 @@ This option specifies under what circumstances the parts of a line that matched a pattern should be coloured in the output. By default, the output is not coloured. The value (which is optional, see above) may be "never", "always", or "auto". In the latter case, colouring happens only if the standard output is -connected to a terminal. The colour that is used can be specified by setting -the environment variable PCREGREP_COLOUR or PCREGREP_COLOR. The value of this -variable should be a string of two numbers, separated by a semicolon. They are -copied directly into the control string for setting colour on a terminal, so it -is your responsibility to ensure that they make sense. If neither of the -environment variables is set, the default is "1;31", which gives red. +connected to a terminal. More resources are used when colouring is enabled, +because \fBpcregrep\fP has to search for all possible matches in a line, not +just one, in order to colour them all. + +The colour that is used can be specified by setting the environment variable +PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a +string of two numbers, separated by a semicolon. They are copied directly into +the control string for setting colour on a terminal, so it is your +responsibility to ensure that they make sense. If neither of the environment +variables is set, the default is "1;31", which gives red. .TP \fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP If an input path is not a regular file or a directory, "action" specifies how @@ -71,6 +71,7 @@ POSSIBILITY OF SUCH DAMAGE. typedef int BOOL; #define MAX_PATTERN_COUNT 100 +#define OFFSET_SIZE 99 #if BUFSIZ > 8192 #define MBUFTHIRD BUFSIZ @@ -821,6 +822,60 @@ if (after_context > 0 && lastmatchnumber > 0) /************************************************* +* Apply patterns to subject till one matches * +*************************************************/ + +/* This function is called to run through all patterns, looking for a match. It +is used multiple times for the same subject when colouring is enabled, in order +to find all possible matches. + +Arguments: + matchptr the start of the subject + length the length of the subject to match + offsets the offets vector to fill in + mrc address of where to put the result of pcre_exec() + +Returns: TRUE if there was a match + FALSE if there was no match + invert if there was a non-fatal error +*/ + +static BOOL +match_patterns(char *matchptr, size_t length, int *offsets, int *mrc) +{ +int i; +for (i = 0; i < pattern_count; i++) + { + *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0, + offsets, OFFSET_SIZE); + if (*mrc >= 0) return TRUE; + if (*mrc == PCRE_ERROR_NOMATCH) continue; + fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc); + if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); + fprintf(stderr, "this text:\n"); + fwrite(matchptr, 1, length, stderr); /* In case binary zero included */ + fprintf(stderr, "\n"); + if (error_count == 0 && + (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)) + { + fprintf(stderr, "pcregrep: error %d means that a resource limit " + "was exceeded\n", *mrc); + fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); + } + if (error_count++ > 20) + { + fprintf(stderr, "pcregrep: too many errors - abandoned\n"); + exit(2); + } + return invert; /* No more matching; don't show the line again */ + } + +return FALSE; /* No match, no errors */ +} + + + +/************************************************* * Grep an individual file * *************************************************/ @@ -853,7 +908,7 @@ int linenumber = 1; int lastmatchnumber = 0; int count = 0; int filepos = 0; -int offsets[99]; +int offsets[OFFSET_SIZE]; char *lastmatchrestart = NULL; char buffer[3*MBUFTHIRD]; char *ptr = buffer; @@ -909,9 +964,9 @@ way, the buffer is shifted left and re-filled. */ while (ptr < endptr) { - int i, endlinelength; + int endlinelength; int mrc = 0; - BOOL match = FALSE; + BOOL match; char *matchptr = ptr; char *t = ptr; size_t length, linelength; @@ -919,9 +974,10 @@ while (ptr < endptr) /* At this point, ptr is at the start of a line. We need to find the length of the subject string to pass to pcre_exec(). In multiline mode, it is the length remainder of the data in the buffer. Otherwise, it is the length of - the next line. After matching, we always advance by the length of the next - line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so - that any match is constrained to be in the first line. */ + the next line, excluding the terminating newline. After matching, we always + advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE + option is used for compiling, so that any match is constrained to be in the + first line. */ t = end_of_line(t, endptr, &endlinelength); linelength = t - ptr - endlinelength; @@ -936,6 +992,7 @@ while (ptr < endptr) #include <time.h> struct timeval start_time, end_time; struct timezone dummy; + int i; if (jfriedl_XT) { @@ -961,7 +1018,7 @@ while (ptr < endptr) for (i = 0; i < jfriedl_XR; i++) - match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0); + match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0); if (gettimeofday(&end_time, &dummy) != 0) perror("bad gettimeofday"); @@ -980,37 +1037,11 @@ while (ptr < endptr) ONLY_MATCHING_RESTART: - /* Run through all the patterns until one matches. Note that we don't include - the final newline in the subject string. */ - - for (i = 0; i < pattern_count; i++) - { - mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0, - offsets, 99); - if (mrc >= 0) { match = TRUE; break; } - if (mrc != PCRE_ERROR_NOMATCH) - { - fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc); - if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); - fprintf(stderr, "this line:\n"); - fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */ - fprintf(stderr, "\n"); - if (error_count == 0 && - (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT)) - { - fprintf(stderr, "pcregrep: error %d means that a resource limit " - "was exceeded\n", mrc); - fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); - } - if (error_count++ > 20) - { - fprintf(stderr, "pcregrep: too many errors - abandoned\n"); - exit(2); - } - match = invert; /* No more matching; don't show the line again */ - break; - } - } + /* Run through all the patterns until one matches or there is an error other + than NOMATCH. This code is in a subroutine so that it can be re-used for + finding subsequent matches when colouring matched lines. */ + + match = match_patterns(matchptr, length, offsets, &mrc); /* If it's a match or a not-match (as required), do what's wanted. */ @@ -1201,17 +1232,33 @@ while (ptr < endptr) else #endif - /* We have to split the line(s) up if colouring. */ + /* We have to split the line(s) up if colouring, and search for further + matches. */ if (do_colour) { + int last_offset = 0; fwrite(ptr, 1, offsets[0], stdout); fprintf(stdout, "%c[%sm", 0x1b, colour_string); fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); fprintf(stdout, "%c[00m", 0x1b); - fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1], + for (;;) + { + last_offset += offsets[1]; + matchptr += offsets[1]; + length -= offsets[1]; + if (!match_patterns(matchptr, length, offsets, &mrc)) break; + fwrite(matchptr, 1, offsets[0], stdout); + fprintf(stdout, "%c[%sm", 0x1b, colour_string); + fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); + fprintf(stdout, "%c[00m", 0x1b); + } + fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset, stdout); } + + /* Not colouring; no need to search for further matches */ + else fwrite(ptr, 1, linelength + endlinelength, stdout); } diff --git a/testdata/grepinputv b/testdata/grepinputv index 528153d..d33d326 100644 --- a/testdata/grepinputv +++ b/testdata/grepinputv @@ -1,3 +1,4 @@ The quick brown fox jumps over the lazy dog. +This time it jumps and jumps and jumps. diff --git a/testdata/grepinputx b/testdata/grepinputx index aebba02..730cc8a 100644 --- a/testdata/grepinputx +++ b/testdata/grepinputx @@ -39,4 +39,5 @@ eighteen nineteen twenty +This line contains pattern not on a line by itself. This is the last line of this file. diff --git a/testdata/grepoutput b/testdata/grepoutput index 3241984..882344e 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -18,6 +18,7 @@ PATTERN at the start of a line. ./testdata/grepinput:608:Check up on PATTERN near the end. ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx:5:Pattern +./testdata/grepinputx:42:This line contains pattern not on a line by itself. ---------------------------- Test 6 ------------------------------ 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. @@ -25,6 +26,7 @@ PATTERN at the start of a line. 608:Check up on PATTERN near the end. 3:Here is the pattern again. 5:Pattern +42:This line contains pattern not on a line by itself. ---------------------------- Test 7 ------------------------------ ./testdata/grepinput ./testdata/grepinputx @@ -75,12 +77,13 @@ RC=1 39:nineteen 40:twenty 41: -42:This is the last line of this file. +43:This is the last line of this file. ---------------------------- Test 12 ----------------------------- Pattern ---------------------------- Test 13 ----------------------------- Here is the pattern again. That time it was on a line by itself. +This line contains pattern not on a line by itself. ---------------------------- Test 14 ----------------------------- ./testdata/grepinputx:To pat or not to pat, that is the question. ---------------------------- Test 15 ----------------------------- @@ -157,6 +160,7 @@ eighteen nineteen twenty +This line contains pattern not on a line by itself. This is the last line of this file. ---------------------------- Test 25 ----------------------------- 15- @@ -207,6 +211,7 @@ eighteen nineteen twenty +This line contains pattern not on a line by itself. This is the last line of this file. ---------------------------- Test 27 ----------------------------- four @@ -227,6 +232,7 @@ eighteen nineteen twenty +This line contains pattern not on a line by itself. This is the last line of this file. ---------------------------- Test 28 ----------------------------- 14-of lines all by themselves. @@ -279,6 +285,7 @@ eighteen nineteen twenty +This line contains pattern not on a line by itself. This is the last line of this file. ---------------------------- Test 30 ----------------------------- ./testdata/grepinput-4-features should be added at the end, because some of the tests involve the @@ -299,6 +306,11 @@ This is the last line of this file. ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx-4- ./testdata/grepinputx:5:Pattern +-- +./testdata/grepinputx-39-nineteen +./testdata/grepinputx-40-twenty +./testdata/grepinputx-41- +./testdata/grepinputx:42:This line contains pattern not on a line by itself. ---------------------------- Test 31 ----------------------------- ./testdata/grepinput:7:PATTERN at the start of a line. ./testdata/grepinput:8:In the middle of a line, PATTERN appears. @@ -317,6 +329,9 @@ This is the last line of this file. ./testdata/grepinputx-6-That time it was on a line by itself. ./testdata/grepinputx-7- ./testdata/grepinputx-8-To pat or not to pat, that is the question. +-- +./testdata/grepinputx:42:This line contains pattern not on a line by itself. +./testdata/grepinputx-43-This is the last line of this file. ---------------------------- Test 32 ----------------------------- ./testdata/grepinputx ---------------------------- Test 33 ----------------------------- @@ -336,11 +351,11 @@ aaaaa0 aaaaa2 RC=0 ======== STDERR ======== -pcregrep: pcre_exec() error -8 while matching this line: +pcregrep: pcre_exec() error -8 while matching this text: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa pcregrep: error -8 means that a resource limit was exceeded pcregrep: check your regex for nested unlimited loops -pcregrep: pcre_exec() error -8 while matching this line: +pcregrep: pcre_exec() error -8 while matching this text: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ---------------------------- Test 38 ------------------------------ This line contains a binary zero here > @@ -388,8 +403,10 @@ PUT NEW DATA ABOVE THIS LINE. ---------------------------- Test 49 ------------------------------ ---------------------------- Test 50 ------------------------------ over the lazy dog. +This time it jumps and jumps and jumps. ---------------------------- Test 51 ------------------------------ fox [1;31mjumps[00m +This time it [1;31mjumps[00m and [1;31mjumps[00m and [1;31mjumps[00m. ---------------------------- Test 52 ------------------------------ 36972,6 36990,4 @@ -402,3 +419,7 @@ fox [1;31mjumps[00m 596:28,4 597:15,5 597:32,4 +---------------------------- Test 54 ----------------------------- +Here is the [1;31mpattern[00m again. +That time it was on a [1;31mline by itself[00m. +This line contains [1;31mpattern[00m not on a [1;31mline by itself[00m. |