summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-10-24 13:27:29 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-10-24 13:35:19 -0700
commit16f6616acaa4fcf44edbc3b56ca10fc06b07cf25 (patch)
tree7c29ee4edb115141168c73fa7a8eccdc2ff6e9af
parentb2490802defe3c3bf7ef0036a4515d006a08a769 (diff)
downloadgrep-16f6616acaa4fcf44edbc3b56ca10fc06b07cf25.tar.gz
grep: fix grep -P crash
Reported by Shlomi Fish in: http://bugs.gnu.org/18806 Commit 9fa500407137f49f6edc3c6b4ee6c7096f0190c5 (2014-09-16) is a hack that I put in to speed up 'grep -P'. Unfortunately, not only is it violation of modularity, it's also a bug magnet, as we have found out with Bug#18738 and Bug#18806. Remove the optimization instead of applying more bandaids. Perhaps we can think of a better way of doing the optimization, or perhaps we can just live with a slower grep -P (as -P is inherently slower anyway...). * src/grep.c, src/grep.h (validated_boundary): Remove. All uses removed. * src/pcresearch.c (Pexecute): Do not worry about validated_boundary.
-rw-r--r--src/grep.c3
-rw-r--r--src/grep.h4
-rw-r--r--src/pcresearch.c37
3 files changed, 14 insertions, 30 deletions
diff --git a/src/grep.c b/src/grep.c
index a0f2620f..0a4ac277 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -352,7 +352,6 @@ bool match_words;
bool match_lines;
char eolbyte;
enum textbin input_textbin;
-char const *validated_boundary;
static char const *matcher;
@@ -1226,7 +1225,6 @@ grepbuf (char const *beg, char const *lim)
intmax_t outleft0 = outleft;
char const *p;
char const *endp;
- validated_boundary = beg;
for (p = beg; p < lim; p = endp)
{
@@ -2516,7 +2514,6 @@ main (int argc, char **argv)
/* We need one byte prior and one after. */
char eolbytes[3] = { 0, eolbyte, 0 };
size_t match_size;
- validated_boundary = eolbytes + 1;
skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
== out_invert);
diff --git a/src/grep.h b/src/grep.h
index 86259fbe..02052b48 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -47,8 +47,4 @@ enum textbin
/* Input file type. */
extern enum textbin input_textbin;
-/* Validation boundary. Earlier bytes have already been validated by
- the PCRE matcher, which cares about this sort of thing. */
-extern char const *validated_boundary;
-
#endif
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 1fd5bdec..5451029e 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -156,7 +156,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
char const *line_start = buf;
int e = PCRE_ERROR_NOMATCH;
char const *line_end;
- char const *validated = validated_boundary;
/* If the input type is unknown, the caller is still testing the
input, which means the current buffer cannot contain encoding
@@ -210,34 +209,28 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
int options = 0;
if (!bol)
options |= PCRE_NOTBOL;
- if (multiline || p + search_bytes <= validated)
+ if (multiline)
options |= PCRE_NO_UTF8_CHECK;
- int valid_bytes = validated - p;
- if (valid_bytes <= 0)
+ e = pcre_exec (cre, extra, p, search_bytes, 0,
+ options, sub, NSUB);
+ if (e != PCRE_ERROR_BADUTF8)
{
- e = pcre_exec (cre, extra, p, search_bytes, 0,
- options, sub, NSUB);
- if (e != PCRE_ERROR_BADUTF8)
+ if (0 < e && multiline && sub[1] - sub[0] != 0)
{
- validated = p + search_bytes;
- if (0 < e && multiline && sub[1] - sub[0] != 0)
+ char const *nl = memchr (p + sub[0], eolbyte,
+ sub[1] - sub[0]);
+ if (nl)
{
- char const *nl = memchr (p + sub[0], eolbyte,
- sub[1] - sub[0]);
- if (nl)
- {
- /* This match crosses a line boundary; reject it. */
- p += sub[0];
- line_end = nl;
- continue;
- }
+ /* This match crosses a line boundary; reject it. */
+ p += sub[0];
+ line_end = nl;
+ continue;
}
- break;
}
- valid_bytes = sub[0];
- validated = p + valid_bytes;
+ break;
}
+ int valid_bytes = sub[0];
/* Try to match the string before the encoding error.
Again, handle the empty-match case specially, for speed. */
@@ -263,8 +256,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
bol = true;
}
- validated_boundary = validated;
-
if (e <= 0)
{
switch (e)