summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-09-27 20:55:13 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-09-27 20:56:42 -0700
commit054fed46cbaf92349a9691592d8a8d211c0cc1be (patch)
treee8461b077bc23115f69445adebeb083e2b5e1f4d
parent67822cff1f9f54b0de5f4645c76e0777359f0d44 (diff)
downloadgrep-054fed46cbaf92349a9691592d8a8d211c0cc1be.tar.gz
dfa: minor tweaks, mostly to remove __attribute__ ((noinline))
That attribute isn't portable, and I found a way to get similar performance with standard C features. * NEWS: Document the recently-installed performance improvement. * src/dfa.c (struct dfa): New member dfaexec. (dfaexec_main): Remove unnecessary 'const'. (dfaexec_mb, dfaexec_sb): Remove __attribute__ ((noinline)); no longer needed. (dfaexec): Use new dfaexec member. (dfainit, dfaoptimize, dfassbuild): Initialize it.
-rw-r--r--NEWS3
-rw-r--r--src/dfa.c32
2 files changed, 26 insertions, 9 deletions
diff --git a/NEWS b/NEWS
index 5bc89c0c..a146e9e8 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,9 @@ GNU grep NEWS -*- outline -*-
Performance has been greatly improved for searching files containing
holes, on platforms where lseek's SEEK_DATA flag works efficiently.
+ Performance has improved for rejecting data that cannot match even
+ the first part of a nontrivial pattern.
+
Performance has improved for very long strings in patterns.
If a file contains data improperly encoded for the current locale,
diff --git a/src/dfa.c b/src/dfa.c
index ff23c07c..4f45ffff 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -342,6 +342,9 @@ struct dfa
token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
mbstate_t mbs; /* Multibyte conversion state. */
+ /* dfaexec implementation. */
+ char *(*dfaexec) (struct dfa *, char const *, char *, int, size_t *, int *);
+
/* The following are valid only if MB_CUR_MAX > 1. */
/* The value of multibyte_prop[i] is defined by following rule.
@@ -3266,10 +3269,14 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
If COUNT is non-NULL, increment *COUNT once for each newline processed.
Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
encountered a back-reference (1) or not (0). The caller may use this
- to decide whether to fall back on a backtracking matcher. */
+ to decide whether to fall back on a backtracking matcher.
+
+ If MULTIBYTE, the input consists of multibyte characters and/or
+ encoding-error bytes. Otherwise, the input consists of single-byte
+ characters. */
static inline char *
dfaexec_main (struct dfa *d, char const *begin, char *end,
- int allow_nl, size_t *count, int *backref, bool const multibyte)
+ int allow_nl, size_t *count, int *backref, bool multibyte)
{
state_num s, s1; /* Current state. */
unsigned char const *p, *mbp; /* Current input character. */
@@ -3432,27 +3439,31 @@ dfaexec_main (struct dfa *d, char const *begin, char *end,
return (char *) p;
}
-static char *__attribute__((noinline))
+/* Specialized versions of dfaexec_main for multibyte and single-byte
+ cases. This is for performance. */
+
+static char *
dfaexec_mb (struct dfa *d, char const *begin, char *end,
- int allow_nl, size_t *count, int *backref)
+ int allow_nl, size_t *count, int *backref)
{
return dfaexec_main (d, begin, end, allow_nl, count, backref, true);
}
-static char *__attribute__((noinline))
+static char *
dfaexec_sb (struct dfa *d, char const *begin, char *end,
- int allow_nl, size_t *count, int *backref)
+ int allow_nl, size_t *count, int *backref)
{
return dfaexec_main (d, begin, end, allow_nl, count, backref, false);
}
+/* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, BACKREF, D->multibyte),
+ but faster. */
+
char *
dfaexec (struct dfa *d, char const *begin, char *end,
int allow_nl, size_t *count, int *backref)
{
- return (d->multibyte
- ? dfaexec_mb (d, begin, end, allow_nl, count, backref)
- : dfaexec_sb (d, begin, end, allow_nl, count, backref));
+ return d->dfaexec (d, begin, end, allow_nl, count, backref);
}
struct dfa *
@@ -3504,6 +3515,7 @@ dfainit (struct dfa *d)
{
memset (d, 0, sizeof *d);
d->multibyte = MB_CUR_MAX > 1;
+ d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
d->fast = !d->multibyte;
}
@@ -3544,6 +3556,7 @@ dfaoptimize (struct dfa *d)
free_mbdata (d);
d->multibyte = false;
+ d->dfaexec = dfaexec_sb;
}
static void
@@ -3557,6 +3570,7 @@ dfassbuild (struct dfa *d)
*sup = *d;
sup->multibyte = false;
+ sup->dfaexec = dfaexec_sb;
sup->multibyte_prop = NULL;
sup->mbcsets = NULL;
sup->superset = NULL;