summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <eblake@redhat.com>2021-04-17 14:08:53 -0500
committerEric Blake <eblake@redhat.com>2021-04-21 08:52:04 -0500
commit16a0c347aa6ac4792c5965064b3ff15f247b9275 (patch)
tree8737a7d057aa5293bb795e84c98ddf2b18cf136c
parentd3691f12b70adbc34634d4e4516180bd218fd6fc (diff)
downloadm4-16a0c347aa6ac4792c5965064b3ff15f247b9275.tar.gz
symtab: sort by hash before name
It is faster to do an integer compare than a string compare when managing hash table collisions (reserving a string compare for ties). Testing with CFLAGS=-DDEBUG_SYM=1 and 'time M4=src/m4 autoconf -f', the results are noticeable; on my machine, execution speeds up from 2.3s to 2.2s, and the debug trace that used to report: m4: lookup mode 0 called 1243301 times, 7859589 compares, 6734330 misses, 23941043 bytes now reports m4: lookup mode 0 called 1243301 times, 1125259 compares, 0 misses, 12433237 bytes * src/m4.h (struct symbol): Add hash member. * src/symtab.c (lookup_symbol): Sort by hash first, then name. (symtab_print_list): Add hash debug.
-rw-r--r--src/m4.h1
-rw-r--r--src/symtab.c15
2 files changed, 11 insertions, 5 deletions
diff --git a/src/m4.h b/src/m4.h
index a156376e..efb21c04 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -367,6 +367,7 @@ struct symbol
bool_bitfield deleted : 1;
int pending_expansions;
+ size_t hash;
char *name;
token_data data;
};
diff --git a/src/symtab.c b/src/symtab.c
index 4266d54f..45f9da31 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -23,7 +23,7 @@
symbol table is a simple chained hash table. Each symbol is described
by a struct symbol, which is placed in the hash table based upon the
symbol name. Symbols that hash to the same entry in the table are
- kept on a list, sorted by name. As a special case, to facilitate the
+ kept on a list, sorted by hash. As a special case, to facilitate the
"pushdef" and "popdef" builtins, a symbol can be several times in the
symbol table, one for each definition. Since the name is the same,
all the entries for the symbol will be on the same list, and will
@@ -182,7 +182,9 @@ lookup_symbol (const char *name, symbol_lookup mode)
for (prev = NULL; sym != NULL; prev = sym, sym = sym->next)
{
- cmp = strcmp (SYMBOL_NAME (sym), name);
+ cmp = (h > sym->hash) - (h < sym->hash);
+ if (cmp == 0)
+ cmp = strcmp (SYMBOL_NAME (sym), name);
if (cmp >= 0)
break;
}
@@ -216,6 +218,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
sym = (symbol *) xmalloc (sizeof (symbol));
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = SYMBOL_TRACED (old);
+ sym->hash = h;
SYMBOL_NAME (sym) = xstrdup (name);
SYMBOL_MACRO_ARGS (sym) = false;
SYMBOL_BLIND_NO_ARGS (sym) = false;
@@ -240,6 +243,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
sym = (symbol *) xmalloc (sizeof (symbol));
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = false;
+ sym->hash = h;
SYMBOL_NAME (sym) = xstrdup (name);
SYMBOL_MACRO_ARGS (sym) = false;
SYMBOL_BLIND_NO_ARGS (sym) = false;
@@ -298,6 +302,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
sym = (symbol *) xmalloc (sizeof (symbol));
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = true;
+ sym->hash = h;
SYMBOL_NAME (sym) = xstrdup (name);
SYMBOL_MACRO_ARGS (sym) = false;
SYMBOL_BLIND_NO_ARGS (sym) = false;
@@ -395,9 +400,9 @@ symtab_print_list (int i)
for (h = 0; h < hash_table_size; h++)
for (bucket = symtab[h]; bucket != NULL; bucket = bucket->next)
for (sym = bucket; sym; sym = sym->stack)
- xprintf ("\tname %s, bucket %lu, addr %p, stack %p, next %p, "
- "flags%s%s, pending %d\n",
- SYMBOL_NAME (sym),
+ xprintf ("\tname %s, hash %lu, bucket %lu, addr %p, stack %p, "
+ "next %p, flags%s%s, pending %d\n",
+ SYMBOL_NAME (sym), (unsigned long int) sym->hash,
(unsigned long int) h, sym, SYMBOL_STACK (sym),
SYMBOL_NEXT (sym),
SYMBOL_TRACED (sym) ? " traced" : "",