diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-01-14 19:01:25 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-01-14 19:01:25 +0000 |
commit | 2f6cf8f8728feed8f809ad4544ff9b5e06ef5dab (patch) | |
tree | 493efc00a2c452083a645fb657b9f7851afed8e9 | |
parent | 765df2c46b9dd8b9d2cd949fea547d8089ba588f (diff) | |
download | pcre-2f6cf8f8728feed8f809ad4544ff9b5e06ef5dab.tar.gz |
Fix -M bugs in pcregrep
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@587 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 11 | ||||
-rwxr-xr-x | RunGrepTest | 4 | ||||
-rw-r--r-- | doc/pcregrep.1 | 11 | ||||
-rw-r--r-- | pcregrep.c | 35 | ||||
-rw-r--r-- | testdata/grepinput3 | 15 | ||||
-rw-r--r-- | testdata/grepoutput | 14 |
6 files changed, 67 insertions, 23 deletions
@@ -17,11 +17,18 @@ Version 8.12 12-Jan-2011 of course, ignore a request for colour when reporting lines that do not match. -4. If pcregrep was compiled under Windows, there was a reference to the +4. Another pcregrep bug caused similar problems if --colour was specified with + -M (multiline) and the pattern match finished with a line ending. + +5. In pcregrep, when a pattern that ended with a literal newline sequence was + matched in multiline mode, the following line was shown as part of the + match. This seems wrong, so I have changed it. + +6. If pcregrep was compiled under Windows, there was a reference to the function pcregrep_exit() before it was defined. I am assuming this was the cause of the "error C2371: 'pcregrep_exit' : redefinition;" that was reported by a user. I've moved the definition above the reference. - + Version 8.11 10-Dec-2010 ------------------------ diff --git a/RunGrepTest b/RunGrepTest index 26ad6a1..013b99b 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -336,6 +336,10 @@ echo "---------------------------- Test 69 -----------------------------" >>test (cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry echo "RC=$?" >>testtry +echo "---------------------------- Test 70 -----------------------------" >>testtry +(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry +echo "RC=$?" >>testtry + # Now compare the results. $cf $srcdir/testdata/grepoutput testtry diff --git a/doc/pcregrep.1 b/doc/pcregrep.1 index 995ce05..bd3b381 100644 --- a/doc/pcregrep.1 +++ b/doc/pcregrep.1 @@ -336,8 +336,11 @@ when the PCRE library is compiled, with the default default being 10 million. \fB-M\fP, \fB--multiline\fP Allow patterns to match more than one line. When this option is given, patterns may usefully contain literal newline characters and internal occurrences of ^ -and $ characters. The output for any one match may consist of more than one -line. When this option is set, the PCRE library is called in "multiline" mode. +and $ characters. The output for a successful match may consist of more than +one line, the last of which is the one in which the match ended. If the matched +string ends with a newline sequence the output ends at the end of that line. +.sp +When this option is set, the PCRE library is called in "multiline" mode. There is a limit to the number of lines that can be matched, imposed by the way that \fBpcregrep\fP buffers the input file as it scans it. However, \fBpcregrep\fP ensures that at least 8K characters or the rest of the document @@ -553,6 +556,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 16 November 2010 -Copyright (c) 1997-2010 University of Cambridge. +Last updated: 14 January 2011 +Copyright (c) 1997-2011 University of Cambridge. .fi @@ -634,7 +634,7 @@ Arguments: endptr end of available data lenptr where to put the length of the eol sequence -Returns: pointer to the last byte of the line +Returns: pointer to the last byte of the line, including the newline byte(s) */ static char * @@ -1316,22 +1316,16 @@ while (ptr < endptr) (invert not set). Because the PCRE_FIRSTLINE option is set, the start of the match will always be before the first newline sequence. */ - if (multiline) + if (multiline & !invert) { - int ellength; - char *endmatch = ptr; - if (!invert) + char *endmatch = ptr + offsets[1]; + t = ptr; + while (t < endmatch) { - endmatch += offsets[1]; - t = ptr; - while (t < endmatch) - { - t = end_of_line(t, endptr, &ellength); - if (t <= endmatch) linenumber++; else break; - } + t = end_of_line(t, endptr, &endlinelength); + if (t < endmatch) linenumber++; else break; } - endmatch = end_of_line(endmatch, endptr, &ellength); - linelength = endmatch - ptr - ellength; + linelength = t - ptr - endlinelength; } /*** NOTE: Use only fwrite() to output the data line, so that binary @@ -1355,9 +1349,10 @@ while (ptr < endptr) /* We have to split the line(s) up if colouring, and search for further matches, but not of course if the line is a non-match. */ - + if (do_colour && !invert) { + int plength; int last_offset = 0; FWRITE(ptr, 1, offsets[0], stdout); fprintf(stdout, "%c[%sm", 0x1b, colour_string); @@ -1374,8 +1369,14 @@ while (ptr < endptr) FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); fprintf(stdout, "%c[00m", 0x1b); } - FWRITE(ptr + last_offset, 1, - (linelength + endlinelength) - last_offset, stdout); + + /* In multiline mode, we may have already printed the complete line + and its line-ending characters (if they matched the pattern), so there + may be no more to print. */ + + plength = (linelength + endlinelength) - last_offset; + if (plength > 0) + FWRITE(ptr + last_offset, 1, plength, stdout); } /* Not colouring; no need to search for further matches */ diff --git a/testdata/grepinput3 b/testdata/grepinput3 new file mode 100644 index 0000000..7409cfc --- /dev/null +++ b/testdata/grepinput3 @@ -0,0 +1,15 @@ +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t2_txt s1_tag s_txt p_tag p_txt o_tag +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t5_txt s1_tag s_txt p_tag p_txt o_tag +o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt diff --git a/testdata/grepoutput b/testdata/grepoutput index e6bb45c..43a0f09 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -373,6 +373,7 @@ RC=2 ./testdata/grepinputx RC=0 ---------------------------- Test 36 ----------------------------- +./testdata/grepinput3 ./testdata/grepinput8 ./testdata/grepinputx RC=0 @@ -477,6 +478,7 @@ This line contains [1;31mpattern[00m not on a [1;31mline by itself[00m. RC=0 ---------------------------- Test 56 ----------------------------- ./testdata/grepinput:456 +./testdata/grepinput3:0 ./testdata/grepinput8:0 ./testdata/grepinputv:1 ./testdata/grepinputx:0 @@ -581,3 +583,15 @@ RC=0 41: 43:This is the last line of this file. RC=0 +---------------------------- Test 70 ----------------------------- +[1;31mtriple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +[00m[1;31mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +[00m[1;31mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +[00m[1;31mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +[00m[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +[00mRC=0 |