diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-27 20:55:13 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-27 20:56:42 -0700 |
commit | 054fed46cbaf92349a9691592d8a8d211c0cc1be (patch) | |
tree | e8461b077bc23115f69445adebeb083e2b5e1f4d | |
parent | 67822cff1f9f54b0de5f4645c76e0777359f0d44 (diff) | |
download | grep-054fed46cbaf92349a9691592d8a8d211c0cc1be.tar.gz |
dfa: minor tweaks, mostly to remove __attribute__ ((noinline))
That attribute isn't portable, and I found a way to get similar
performance with standard C features.
* NEWS: Document the recently-installed performance improvement.
* src/dfa.c (struct dfa): New member dfaexec.
(dfaexec_main): Remove unnecessary 'const'.
(dfaexec_mb, dfaexec_sb): Remove __attribute__ ((noinline));
no longer needed.
(dfaexec): Use new dfaexec member.
(dfainit, dfaoptimize, dfassbuild): Initialize it.
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | src/dfa.c | 32 |
2 files changed, 26 insertions, 9 deletions
@@ -7,6 +7,9 @@ GNU grep NEWS -*- outline -*- Performance has been greatly improved for searching files containing holes, on platforms where lseek's SEEK_DATA flag works efficiently. + Performance has improved for rejecting data that cannot match even + the first part of a nontrivial pattern. + Performance has improved for very long strings in patterns. If a file contains data improperly encoded for the current locale, @@ -342,6 +342,9 @@ struct dfa token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */ mbstate_t mbs; /* Multibyte conversion state. */ + /* dfaexec implementation. */ + char *(*dfaexec) (struct dfa *, char const *, char *, int, size_t *, int *); + /* The following are valid only if MB_CUR_MAX > 1. */ /* The value of multibyte_prop[i] is defined by following rule. @@ -3266,10 +3269,14 @@ skip_remains_mb (struct dfa *d, unsigned char const *p, If COUNT is non-NULL, increment *COUNT once for each newline processed. Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we encountered a back-reference (1) or not (0). The caller may use this - to decide whether to fall back on a backtracking matcher. */ + to decide whether to fall back on a backtracking matcher. + + If MULTIBYTE, the input consists of multibyte characters and/or + encoding-error bytes. Otherwise, the input consists of single-byte + characters. */ static inline char * dfaexec_main (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref, bool const multibyte) + int allow_nl, size_t *count, int *backref, bool multibyte) { state_num s, s1; /* Current state. */ unsigned char const *p, *mbp; /* Current input character. */ @@ -3432,27 +3439,31 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, return (char *) p; } -static char *__attribute__((noinline)) +/* Specialized versions of dfaexec_main for multibyte and single-byte + cases. This is for performance. */ + +static char * dfaexec_mb (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) + int allow_nl, size_t *count, int *backref) { return dfaexec_main (d, begin, end, allow_nl, count, backref, true); } -static char *__attribute__((noinline)) +static char * dfaexec_sb (struct dfa *d, char const *begin, char *end, - int allow_nl, size_t *count, int *backref) + int allow_nl, size_t *count, int *backref) { return dfaexec_main (d, begin, end, allow_nl, count, backref, false); } +/* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, BACKREF, D->multibyte), + but faster. */ + char * dfaexec (struct dfa *d, char const *begin, char *end, int allow_nl, size_t *count, int *backref) { - return (d->multibyte - ? dfaexec_mb (d, begin, end, allow_nl, count, backref) - : dfaexec_sb (d, begin, end, allow_nl, count, backref)); + return d->dfaexec (d, begin, end, allow_nl, count, backref); } struct dfa * @@ -3504,6 +3515,7 @@ dfainit (struct dfa *d) { memset (d, 0, sizeof *d); d->multibyte = MB_CUR_MAX > 1; + d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb; d->fast = !d->multibyte; } @@ -3544,6 +3556,7 @@ dfaoptimize (struct dfa *d) free_mbdata (d); d->multibyte = false; + d->dfaexec = dfaexec_sb; } static void @@ -3557,6 +3570,7 @@ dfassbuild (struct dfa *d) *sup = *d; sup->multibyte = false; + sup->dfaexec = dfaexec_sb; sup->multibyte_prop = NULL; sup->mbcsets = NULL; sup->superset = NULL; |