summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2017-01-19 21:08:19 +0200
committerArnold D. Robbins <arnold@skeeve.com>2017-01-19 21:08:19 +0200
commit5a619e1986724cf8e27b637509925a8da36837e8 (patch)
treeffe527c6bc2bd170ca225d459b74d70cb74dfa70
parentbaadccc7297fa9a0cd1bcc276385872fa0ca8b6e (diff)
downloadgawk-5a619e1986724cf8e27b637509925a8da36837e8.tar.gz
Speed up programs that toggle IGNORECASE a lot.
-rw-r--r--ChangeLog19
-rw-r--r--NEWS2
-rw-r--r--awk.h9
-rw-r--r--awkgram.c10
-rw-r--r--awkgram.y10
-rw-r--r--io.c17
-rw-r--r--main.c2
-rw-r--r--re.c60
-rw-r--r--symbol.c6
9 files changed, 83 insertions, 52 deletions
diff --git a/ChangeLog b/ChangeLog
index 62b9e858..44aa7304 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2017-01-04 Arnold Robbins <arnold@skeeve.com>
+
+ Trade space for time for programs that toggle IGNORECASE a lot.
+ Brings 25% to 39% speedup. NODE does not actually grow in size.
+
+ * awk.h (NODE::preg): Now an array of size two.
+ [CASE]: Flag no longer needed, so removed.
+ (IGNORECASE): Change type from int to bool.
+ * awkgram.y (make_regnode): Build two copies of the compiled regexp,
+ one without ignorecase, and one with.
+ * io.c (RS_re): Array replacing RS_re_yes_case and RS_re_no_case.
+ (set_RS): Use RS_re[IGNORECASE] as appropriate. Free and recompute
+ as needed.
+ * main.c (IGNORECASE): Change type from int to bool.
+ * re.c (re_update): Simplify the code. No need to check CASE flag
+ any longer. Recompute only if text of regexp changed.
+ * symbol.c (free_bc_internal): Adjust to free both elements of
+ m_re_reg.
+
2017-01-18 Andrew J. Schorr <aschorr@telemetry-investments.com>
* interpret.h (r_interpret): Increase robustness of the optimization
diff --git a/NEWS b/NEWS
index 40bf9cdc..71867cc0 100644
--- a/NEWS
+++ b/NEWS
@@ -98,6 +98,8 @@ Changes from 4.1.x to 4.2.0
recommend that you do so. Fortunately, the changes are fairly minor
and straightforward.
+24. Programs that toggle IGNORECASE a lot should now be noticeably faster.
+
Changes from 4.1.3 to 4.1.4
---------------------------
diff --git a/awk.h b/awk.h
index 278f54c5..d5c88fd9 100644
--- a/awk.h
+++ b/awk.h
@@ -343,7 +343,7 @@ typedef struct exp_node {
} l;
union {
struct exp_node *rptr;
- Regexp *preg;
+ Regexp *preg[2];
struct exp_node **av;
BUCKET **bv;
void *aq;
@@ -361,9 +361,8 @@ typedef struct exp_node {
struct exp_node *rn;
unsigned long cnt;
unsigned long reflags;
-# define CASE 1
-# define CONSTANT 2
-# define FS_DFLT 4
+# define CONSTANT 1
+# define FS_DFLT 2
} nodep;
struct {
@@ -1083,7 +1082,7 @@ extern long NF;
extern long NR;
extern long FNR;
extern int BINMODE;
-extern int IGNORECASE;
+extern bool IGNORECASE;
extern bool RS_is_null;
extern char *OFS;
extern int OFSlen;
diff --git a/awkgram.c b/awkgram.c
index b80caa11..c6f47dbd 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -7422,8 +7422,14 @@ make_regnode(int type, NODE *exp)
n->re_cnt = 1;
if (type == Node_regex) {
- n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false);
- if (n->re_reg == NULL) {
+ n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false);
+ if (n->re_reg[0] == NULL) {
+ freenode(n);
+ return NULL;
+ }
+ n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, false);
+ if (n->re_reg[1] == NULL) {
+ refree(n->re_reg[0]);
freenode(n);
return NULL;
}
diff --git a/awkgram.y b/awkgram.y
index 7f957bc6..80278810 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -5002,8 +5002,14 @@ make_regnode(int type, NODE *exp)
n->re_cnt = 1;
if (type == Node_regex) {
- n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false);
- if (n->re_reg == NULL) {
+ n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false);
+ if (n->re_reg[0] == NULL) {
+ freenode(n);
+ return NULL;
+ }
+ n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, false);
+ if (n->re_reg[1] == NULL) {
+ refree(n->re_reg[0]);
freenode(n);
return NULL;
}
diff --git a/io.c b/io.c
index 688723fd..d65f2aaa 100644
--- a/io.c
+++ b/io.c
@@ -318,8 +318,7 @@ static long read_default_timeout;
static struct redirect *red_head = NULL;
static NODE *RS = NULL;
-static Regexp *RS_re_yes_case; /* regexp for RS when ignoring case */
-static Regexp *RS_re_no_case; /* regexp for RS when not ignoring case */
+static Regexp *RS_re[2]; /* index 0 - don't ignore case, index 1, do */
static Regexp *RS_regexp;
static const char nonfatal[] = "NONFATAL";
@@ -3870,7 +3869,7 @@ set_RS()
* set_IGNORECASE() relies on this routine to call
* set_FS().
*/
- RS_regexp = (IGNORECASE ? RS_re_no_case : RS_re_yes_case);
+ RS_regexp = RS_re[IGNORECASE];
goto set_FS;
}
unref(save_rs);
@@ -3882,9 +3881,9 @@ set_RS()
* Please do not remerge the if condition; hinders memory deallocation
* in case of fatal error in make_regexp.
*/
- refree(RS_re_yes_case); /* NULL argument is ok */
- refree(RS_re_no_case);
- RS_re_yes_case = RS_re_no_case = RS_regexp = NULL;
+ refree(RS_re[0]); /* NULL argument is ok */
+ refree(RS_re[1]);
+ RS_re[0] = RS_re[1] = RS_regexp = NULL;
if (RS->stlen == 0) {
RS_is_null = true;
@@ -3892,9 +3891,9 @@ set_RS()
} else if (RS->stlen > 1 && ! do_traditional) {
static bool warned = false;
- RS_re_yes_case = make_regexp(RS->stptr, RS->stlen, false, true, true);
- RS_re_no_case = make_regexp(RS->stptr, RS->stlen, true, true, true);
- RS_regexp = (IGNORECASE ? RS_re_no_case : RS_re_yes_case);
+ RS_re[0] = make_regexp(RS->stptr, RS->stlen, false, true, true);
+ RS_re[1] = make_regexp(RS->stptr, RS->stlen, true, true, true);
+ RS_regexp = RS_re[IGNORECASE];
matchrec = rsrescan;
diff --git a/main.c b/main.c
index 5c814b96..56482f51 100644
--- a/main.c
+++ b/main.c
@@ -85,7 +85,7 @@ long NF;
long NR;
long FNR;
int BINMODE;
-int IGNORECASE;
+bool IGNORECASE;
char *OFS;
char *ORS;
char *OFMT;
diff --git a/re.c b/re.c
index 5be3d178..73e75cbb 100644
--- a/re.c
+++ b/re.c
@@ -349,50 +349,48 @@ re_update(NODE *t)
NODE *t1;
if (t->type == Node_val && (t->flags & REGEX) != 0)
- return t->typed_re->re_reg;
-
- if ((t->re_flags & CASE) == IGNORECASE) {
- /* regex was compiled with settings matching IGNORECASE */
- if ((t->re_flags & CONSTANT) != 0) {
- /* it's a constant, so just return it as is */
- assert(t->type == Node_regex);
- return t->re_reg;
- }
- t1 = t->re_exp;
- if (t->re_text != NULL) {
- /* if contents haven't changed, just return it */
- if (cmp_nodes(t->re_text, t1, true) == 0)
- return t->re_reg;
- /* things changed, fall through to recompile */
- unref(t->re_text);
- }
- /* get fresh copy of the text of the regexp */
- t->re_text = dupnode(t1);
+ return t->typed_re->re_reg[IGNORECASE];
+
+ if ((t->re_flags & CONSTANT) != 0) {
+ /* it's a constant, so just return it as is */
+ assert(t->type == Node_regex);
+ return t->re_reg[IGNORECASE];
}
- /* was compiled with different IGNORECASE or text changed */
+ t1 = t->re_exp;
+ if (t->re_text != NULL) {
+ /* if contents haven't changed, just return it */
+ if (cmp_nodes(t->re_text, t1, true) == 0)
+ return t->re_reg[IGNORECASE];
+ /* things changed, fall through to recompile */
+ unref(t->re_text);
+ }
+ /* get fresh copy of the text of the regexp */
+ t->re_text = dupnode(t1);
+
+ /* text changed */
/* free old */
- if (t->re_reg != NULL)
- refree(t->re_reg);
+ if (t->re_reg[0] != NULL)
+ refree(t->re_reg[0]);
+ if (t->re_reg[1] != NULL)
+ refree(t->re_reg[1]);
if (t->re_cnt > 0)
t->re_cnt++;
if (t->re_cnt > 10)
t->re_cnt = 0;
- if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) {
+ if (t->re_text == NULL) {
/* reset regexp text if needed */
t1 = t->re_exp;
unref(t->re_text);
t->re_text = dupnode(t1);
}
/* compile it */
- t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
- IGNORECASE, t->re_cnt, true);
-
- /* clear case flag */
- t->re_flags &= ~CASE;
- /* set current value of case flag */
- t->re_flags |= IGNORECASE;
- return t->re_reg;
+ t->re_reg[0] = make_regexp(t->re_text->stptr, t->re_text->stlen,
+ false, t->re_cnt, true);
+ t->re_reg[1] = make_regexp(t->re_text->stptr, t->re_text->stlen,
+ true, t->re_cnt, true);
+
+ return t->re_reg[IGNORECASE];
}
/* resetup --- choose what kind of regexps we match */
diff --git a/symbol.c b/symbol.c
index e1504300..65ed4d90 100644
--- a/symbol.c
+++ b/symbol.c
@@ -881,8 +881,10 @@ free_bc_internal(INSTRUCTION *cp)
case Op_match:
case Op_nomatch:
m = cp->memory;
- if (m->re_reg != NULL)
- refree(m->re_reg);
+ if (m->re_reg[0] != NULL)
+ refree(m->re_reg[0]);
+ if (m->re_reg[1] != NULL)
+ refree(m->re_reg[1]);
if (m->re_exp != NULL)
unref(m->re_exp);
if (m->re_text != NULL)