diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-03-30 15:46:27 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-03-30 15:46:27 +0000 |
commit | 28ed4f58e289c711ec3494e9f512da278506ece9 (patch) | |
tree | 06a6f1e644a86f80dd401ba66951259bc112c1b7 | |
parent | 44b15daf3a13326a8e41a73b882fb3cbad686c66 (diff) | |
download | pcre-28ed4f58e289c711ec3494e9f512da278506ece9.tar.gz |
Fixed obscure bug when matching a null string in multiline mode with
newline=any.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@141 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | pcregrep.c | 10 | ||||
-rw-r--r-- | pcretest.c | 23 | ||||
-rw-r--r-- | testdata/testinput2 | 3 | ||||
-rw-r--r-- | testdata/testinput7 | 3 | ||||
-rw-r--r-- | testdata/testoutput2 | 5 | ||||
-rw-r--r-- | testdata/testoutput7 | 5 |
7 files changed, 51 insertions, 6 deletions
@@ -126,6 +126,14 @@ Version 7.1 12-Mar-07 16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub" in an attempt to make files that differ only in their line terminators compare equal. This works on Linux. + +17. Under certain error circumstances pcregrep might try to free random memory + as it exited. This is now fixed, thanks to valgrind. + +19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string + "abc\r\n\r\n", it found an unwanted second match after the second \r. This + was because its rules for how to advance for /g after matching an empty + string did not allow for this case. They now check for it specially. Version 7.0 19-Dec-06 @@ -1408,7 +1408,11 @@ sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, suffix[process_options]); pattern_list[pattern_count] = pcre_compile(buffer, options, &error, &errptr, pcretables); -if (pattern_list[pattern_count++] != NULL) return TRUE; +if (pattern_list[pattern_count] != NULL) + { + pattern_count++; + return TRUE; + } /* Handle compile errors */ @@ -1490,6 +1494,7 @@ int i, j; int rc = 1; int pcre_options = 0; int cmd_pattern_count = 0; +int hint_count = 0; int errptr; BOOL only_one_at_top; char *patterns[MAX_PATTERN_COUNT]; @@ -1942,6 +1947,7 @@ for (j = 0; j < pattern_count; j++) fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); goto EXIT2; } + hint_count++; } /* If there are include or exclude patterns, compile them. */ @@ -2001,7 +2007,7 @@ if (pattern_list != NULL) } if (hints_list != NULL) { - for (i = 0; i < pattern_count; i++) free(hints_list[i]); + for (i = 0; i < hint_count; i++) free(hints_list[i]); free(hints_list); } return rc; @@ -1972,6 +1972,7 @@ while (!done) for (;; gmatched++) /* Loop for /g or /G */ { + int gany_fudge; if (timeitm > 0) { register int i; @@ -2255,25 +2256,39 @@ while (!done) what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the same point. If this fails (picked up above) we advance to the next - character. */ + character. + + Yet more complication arises in the case when the newline option is + "any" and a pattern in multiline mode has to match at the start of a + line. If a previous match was at the end of a line, and advance of one + character just passes the \r, whereas we should prefer the longer newline + sequence, as does the code in pcre_exec(). So we fudge it. */ g_notempty = 0; + gany_fudge = 0; + if (use_offsets[0] == use_offsets[1]) { if (use_offsets[0] == len) break; g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; + if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 && + (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY && + use_offsets[0] < len - 1 && + bptr[use_offsets[0]] == '\r' && + bptr[use_offsets[0]+1] == '\n') + gany_fudge = 1; } /* For /g, update the start offset, leaving the rest alone */ - if (do_g) start_offset = use_offsets[1]; + if (do_g) start_offset = use_offsets[1] + gany_fudge; /* For /G, update the pointer and length */ else { - bptr += use_offsets[1]; - len -= use_offsets[1]; + bptr += use_offsets[1] + gany_fudge; + len -= use_offsets[1] + gany_fudge; } } /* End of loop for /g and /G */ diff --git a/testdata/testinput2 b/testdata/testinput2 index 7e39ee7..1de01cd 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2136,4 +2136,7 @@ a random value. /Ix ** Failers abc\r\nxyz +/(?m)^$/<any>g+ + abc\r\n\r\n + / End of testinput2 / diff --git a/testdata/testinput7 b/testdata/testinput7 index 801fb95..4f1dbc5 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -4240,4 +4240,7 @@ ** Failers XABC\B +/(?m)^$/<any>g+ + abc\r\n\r\n + / End of testinput7 / diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 0357244..56806b6 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -8136,4 +8136,9 @@ No match abc\r\nxyz No match +/(?m)^$/<any>g+ + abc\r\n\r\n + 0: + 0+ \x0d\x0a + / End of testinput2 / diff --git a/testdata/testoutput7 b/testdata/testoutput7 index 6fd6955..f4abe04 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -6970,4 +6970,9 @@ No match XABC\B No match +/(?m)^$/<any>g+ + abc\r\n\r\n + 0: + 0+ \x0d\x0a + / End of testinput7 / |