summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-01-14 19:01:25 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-01-14 19:01:25 +0000
commit2f6cf8f8728feed8f809ad4544ff9b5e06ef5dab (patch)
tree493efc00a2c452083a645fb657b9f7851afed8e9
parent765df2c46b9dd8b9d2cd949fea547d8089ba588f (diff)
downloadpcre-2f6cf8f8728feed8f809ad4544ff9b5e06ef5dab.tar.gz
Fix -M bugs in pcregrep
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@587 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog11
-rwxr-xr-xRunGrepTest4
-rw-r--r--doc/pcregrep.111
-rw-r--r--pcregrep.c35
-rw-r--r--testdata/grepinput315
-rw-r--r--testdata/grepoutput14
6 files changed, 67 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index ea15821..6cf5266 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -17,11 +17,18 @@ Version 8.12 12-Jan-2011
of course, ignore a request for colour when reporting lines that do not
match.
-4. If pcregrep was compiled under Windows, there was a reference to the
+4. Another pcregrep bug caused similar problems if --colour was specified with
+ -M (multiline) and the pattern match finished with a line ending.
+
+5. In pcregrep, when a pattern that ended with a literal newline sequence was
+ matched in multiline mode, the following line was shown as part of the
+ match. This seems wrong, so I have changed it.
+
+6. If pcregrep was compiled under Windows, there was a reference to the
function pcregrep_exit() before it was defined. I am assuming this was
the cause of the "error C2371: 'pcregrep_exit' : redefinition;" that was
reported by a user. I've moved the definition above the reference.
-
+
Version 8.11 10-Dec-2010
------------------------
diff --git a/RunGrepTest b/RunGrepTest
index 26ad6a1..013b99b 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -336,6 +336,10 @@ echo "---------------------------- Test 69 -----------------------------" >>test
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
+echo "---------------------------- Test 70 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
+echo "RC=$?" >>testtry
+
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
diff --git a/doc/pcregrep.1 b/doc/pcregrep.1
index 995ce05..bd3b381 100644
--- a/doc/pcregrep.1
+++ b/doc/pcregrep.1
@@ -336,8 +336,11 @@ when the PCRE library is compiled, with the default default being 10 million.
\fB-M\fP, \fB--multiline\fP
Allow patterns to match more than one line. When this option is given, patterns
may usefully contain literal newline characters and internal occurrences of ^
-and $ characters. The output for any one match may consist of more than one
-line. When this option is set, the PCRE library is called in "multiline" mode.
+and $ characters. The output for a successful match may consist of more than
+one line, the last of which is the one in which the match ended. If the matched
+string ends with a newline sequence the output ends at the end of that line.
+.sp
+When this option is set, the PCRE library is called in "multiline" mode.
There is a limit to the number of lines that can be matched, imposed by the way
that \fBpcregrep\fP buffers the input file as it scans it. However,
\fBpcregrep\fP ensures that at least 8K characters or the rest of the document
@@ -553,6 +556,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 16 November 2010
-Copyright (c) 1997-2010 University of Cambridge.
+Last updated: 14 January 2011
+Copyright (c) 1997-2011 University of Cambridge.
.fi
diff --git a/pcregrep.c b/pcregrep.c
index 0f3080d..1159774 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -634,7 +634,7 @@ Arguments:
endptr end of available data
lenptr where to put the length of the eol sequence
-Returns: pointer to the last byte of the line
+Returns: pointer to the last byte of the line, including the newline byte(s)
*/
static char *
@@ -1316,22 +1316,16 @@ while (ptr < endptr)
(invert not set). Because the PCRE_FIRSTLINE option is set, the start of
the match will always be before the first newline sequence. */
- if (multiline)
+ if (multiline & !invert)
{
- int ellength;
- char *endmatch = ptr;
- if (!invert)
+ char *endmatch = ptr + offsets[1];
+ t = ptr;
+ while (t < endmatch)
{
- endmatch += offsets[1];
- t = ptr;
- while (t < endmatch)
- {
- t = end_of_line(t, endptr, &ellength);
- if (t <= endmatch) linenumber++; else break;
- }
+ t = end_of_line(t, endptr, &endlinelength);
+ if (t < endmatch) linenumber++; else break;
}
- endmatch = end_of_line(endmatch, endptr, &ellength);
- linelength = endmatch - ptr - ellength;
+ linelength = t - ptr - endlinelength;
}
/*** NOTE: Use only fwrite() to output the data line, so that binary
@@ -1355,9 +1349,10 @@ while (ptr < endptr)
/* We have to split the line(s) up if colouring, and search for further
matches, but not of course if the line is a non-match. */
-
+
if (do_colour && !invert)
{
+ int plength;
int last_offset = 0;
FWRITE(ptr, 1, offsets[0], stdout);
fprintf(stdout, "%c[%sm", 0x1b, colour_string);
@@ -1374,8 +1369,14 @@ while (ptr < endptr)
FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
fprintf(stdout, "%c[00m", 0x1b);
}
- FWRITE(ptr + last_offset, 1,
- (linelength + endlinelength) - last_offset, stdout);
+
+ /* In multiline mode, we may have already printed the complete line
+ and its line-ending characters (if they matched the pattern), so there
+ may be no more to print. */
+
+ plength = (linelength + endlinelength) - last_offset;
+ if (plength > 0)
+ FWRITE(ptr + last_offset, 1, plength, stdout);
}
/* Not colouring; no need to search for further matches */
diff --git a/testdata/grepinput3 b/testdata/grepinput3
new file mode 100644
index 0000000..7409cfc
--- /dev/null
+++ b/testdata/grepinput3
@@ -0,0 +1,15 @@
+triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t2_txt s1_tag s_txt p_tag p_txt o_tag
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t5_txt s1_tag s_txt p_tag p_txt o_tag
+o_txt
+
+triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt
diff --git a/testdata/grepoutput b/testdata/grepoutput
index e6bb45c..43a0f09 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -373,6 +373,7 @@ RC=2
./testdata/grepinputx
RC=0
---------------------------- Test 36 -----------------------------
+./testdata/grepinput3
./testdata/grepinput8
./testdata/grepinputx
RC=0
@@ -477,6 +478,7 @@ This line contains pattern not on a line by itself.
RC=0
---------------------------- Test 56 -----------------------------
./testdata/grepinput:456
+./testdata/grepinput3:0
./testdata/grepinput8:0
./testdata/grepinputv:1
./testdata/grepinputx:0
@@ -581,3 +583,15 @@ RC=0
41:
43:This is the last line of this file.
RC=0
+---------------------------- Test 70 -----------------------------
+triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
+
+RC=0