summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-30 15:46:27 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-30 15:46:27 +0000
commit28ed4f58e289c711ec3494e9f512da278506ece9 (patch)
tree06a6f1e644a86f80dd401ba66951259bc112c1b7
parent44b15daf3a13326a8e41a73b882fb3cbad686c66 (diff)
downloadpcre-28ed4f58e289c711ec3494e9f512da278506ece9.tar.gz
Fixed obscure bug when matching a null string in multiline mode with
newline=any. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@141 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog8
-rw-r--r--pcregrep.c10
-rw-r--r--pcretest.c23
-rw-r--r--testdata/testinput23
-rw-r--r--testdata/testinput73
-rw-r--r--testdata/testoutput25
-rw-r--r--testdata/testoutput75
7 files changed, 51 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 639dba5..da3397e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -126,6 +126,14 @@ Version 7.1 12-Mar-07
16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub"
in an attempt to make files that differ only in their line terminators
compare equal. This works on Linux.
+
+17. Under certain error circumstances pcregrep might try to free random memory
+ as it exited. This is now fixed, thanks to valgrind.
+
+19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string
+ "abc\r\n\r\n", it found an unwanted second match after the second \r. This
+ was because its rules for how to advance for /g after matching an empty
+ string did not allow for this case. They now check for it specially.
Version 7.0 19-Dec-06
diff --git a/pcregrep.c b/pcregrep.c
index 1750f20..79596f5 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -1408,7 +1408,11 @@ sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
suffix[process_options]);
pattern_list[pattern_count] =
pcre_compile(buffer, options, &error, &errptr, pcretables);
-if (pattern_list[pattern_count++] != NULL) return TRUE;
+if (pattern_list[pattern_count] != NULL)
+ {
+ pattern_count++;
+ return TRUE;
+ }
/* Handle compile errors */
@@ -1490,6 +1494,7 @@ int i, j;
int rc = 1;
int pcre_options = 0;
int cmd_pattern_count = 0;
+int hint_count = 0;
int errptr;
BOOL only_one_at_top;
char *patterns[MAX_PATTERN_COUNT];
@@ -1942,6 +1947,7 @@ for (j = 0; j < pattern_count; j++)
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
goto EXIT2;
}
+ hint_count++;
}
/* If there are include or exclude patterns, compile them. */
@@ -2001,7 +2007,7 @@ if (pattern_list != NULL)
}
if (hints_list != NULL)
{
- for (i = 0; i < pattern_count; i++) free(hints_list[i]);
+ for (i = 0; i < hint_count; i++) free(hints_list[i]);
free(hints_list);
}
return rc;
diff --git a/pcretest.c b/pcretest.c
index 54f02f1..405e9cf 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1972,6 +1972,7 @@ while (!done)
for (;; gmatched++) /* Loop for /g or /G */
{
+ int gany_fudge;
if (timeitm > 0)
{
register int i;
@@ -2255,25 +2256,39 @@ while (!done)
what Perl's /g options does. This turns out to be rather cunning. First
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
same point. If this fails (picked up above) we advance to the next
- character. */
+ character.
+
+ Yet more complication arises in the case when the newline option is
+ "any" and a pattern in multiline mode has to match at the start of a
+ line. If a previous match was at the end of a line, and advance of one
+ character just passes the \r, whereas we should prefer the longer newline
+ sequence, as does the code in pcre_exec(). So we fudge it. */
g_notempty = 0;
+ gany_fudge = 0;
+
if (use_offsets[0] == use_offsets[1])
{
if (use_offsets[0] == len) break;
g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
+ if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&
+ (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
+ use_offsets[0] < len - 1 &&
+ bptr[use_offsets[0]] == '\r' &&
+ bptr[use_offsets[0]+1] == '\n')
+ gany_fudge = 1;
}
/* For /g, update the start offset, leaving the rest alone */
- if (do_g) start_offset = use_offsets[1];
+ if (do_g) start_offset = use_offsets[1] + gany_fudge;
/* For /G, update the pointer and length */
else
{
- bptr += use_offsets[1];
- len -= use_offsets[1];
+ bptr += use_offsets[1] + gany_fudge;
+ len -= use_offsets[1] + gany_fudge;
}
} /* End of loop for /g and /G */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7e39ee7..1de01cd 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2136,4 +2136,7 @@ a random value. /Ix
** Failers
abc\r\nxyz
+/(?m)^$/<any>g+
+ abc\r\n\r\n
+
/ End of testinput2 /
diff --git a/testdata/testinput7 b/testdata/testinput7
index 801fb95..4f1dbc5 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4240,4 +4240,7 @@
** Failers
XABC\B
+/(?m)^$/<any>g+
+ abc\r\n\r\n
+
/ End of testinput7 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 0357244..56806b6 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -8136,4 +8136,9 @@ No match
abc\r\nxyz
No match
+/(?m)^$/<any>g+
+ abc\r\n\r\n
+ 0:
+ 0+ \x0d\x0a
+
/ End of testinput2 /
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 6fd6955..f4abe04 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -6970,4 +6970,9 @@ No match
XABC\B
No match
+/(?m)^$/<any>g+
+ abc\r\n\r\n
+ 0:
+ 0+ \x0d\x0a
+
/ End of testinput7 /