diff options
author | Eric Blake <eblake@redhat.com> | 2021-04-17 14:08:53 -0500 |
---|---|---|
committer | Eric Blake <eblake@redhat.com> | 2021-04-21 08:52:04 -0500 |
commit | 16a0c347aa6ac4792c5965064b3ff15f247b9275 (patch) | |
tree | 8737a7d057aa5293bb795e84c98ddf2b18cf136c | |
parent | d3691f12b70adbc34634d4e4516180bd218fd6fc (diff) | |
download | m4-16a0c347aa6ac4792c5965064b3ff15f247b9275.tar.gz |
symtab: sort by hash before name
It is faster to do an integer compare than a string compare when
managing hash table collisions (reserving a string compare for ties).
Testing with CFLAGS=-DDEBUG_SYM=1 and 'time M4=src/m4 autoconf -f',
the results are noticeable; on my machine, execution speeds up from
2.3s to 2.2s, and the debug trace that used to report:
m4: lookup mode 0 called 1243301 times, 7859589 compares, 6734330 misses, 23941043 bytes
now reports
m4: lookup mode 0 called 1243301 times, 1125259 compares, 0 misses, 12433237 bytes
* src/m4.h (struct symbol): Add hash member.
* src/symtab.c (lookup_symbol): Sort by hash first, then name.
(symtab_print_list): Add hash debug.
-rw-r--r-- | src/m4.h | 1 | ||||
-rw-r--r-- | src/symtab.c | 15 |
2 files changed, 11 insertions, 5 deletions
@@ -367,6 +367,7 @@ struct symbol bool_bitfield deleted : 1; int pending_expansions; + size_t hash; char *name; token_data data; }; diff --git a/src/symtab.c b/src/symtab.c index 4266d54f..45f9da31 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -23,7 +23,7 @@ symbol table is a simple chained hash table. Each symbol is described by a struct symbol, which is placed in the hash table based upon the symbol name. Symbols that hash to the same entry in the table are - kept on a list, sorted by name. As a special case, to facilitate the + kept on a list, sorted by hash. As a special case, to facilitate the "pushdef" and "popdef" builtins, a symbol can be several times in the symbol table, one for each definition. Since the name is the same, all the entries for the symbol will be on the same list, and will @@ -182,7 +182,9 @@ lookup_symbol (const char *name, symbol_lookup mode) for (prev = NULL; sym != NULL; prev = sym, sym = sym->next) { - cmp = strcmp (SYMBOL_NAME (sym), name); + cmp = (h > sym->hash) - (h < sym->hash); + if (cmp == 0) + cmp = strcmp (SYMBOL_NAME (sym), name); if (cmp >= 0) break; } @@ -216,6 +218,7 @@ lookup_symbol (const char *name, symbol_lookup mode) sym = (symbol *) xmalloc (sizeof (symbol)); SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = SYMBOL_TRACED (old); + sym->hash = h; SYMBOL_NAME (sym) = xstrdup (name); SYMBOL_MACRO_ARGS (sym) = false; SYMBOL_BLIND_NO_ARGS (sym) = false; @@ -240,6 +243,7 @@ lookup_symbol (const char *name, symbol_lookup mode) sym = (symbol *) xmalloc (sizeof (symbol)); SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = false; + sym->hash = h; SYMBOL_NAME (sym) = xstrdup (name); SYMBOL_MACRO_ARGS (sym) = false; SYMBOL_BLIND_NO_ARGS (sym) = false; @@ -298,6 +302,7 @@ lookup_symbol (const char *name, symbol_lookup mode) sym = (symbol *) xmalloc (sizeof (symbol)); SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = true; + sym->hash = h; SYMBOL_NAME (sym) = xstrdup (name); SYMBOL_MACRO_ARGS (sym) = false; SYMBOL_BLIND_NO_ARGS (sym) = false; @@ -395,9 +400,9 @@ symtab_print_list (int i) for (h = 0; h < hash_table_size; h++) for (bucket = symtab[h]; bucket != NULL; bucket = bucket->next) for (sym = bucket; sym; sym = sym->stack) - xprintf ("\tname %s, bucket %lu, addr %p, stack %p, next %p, " - "flags%s%s, pending %d\n", - SYMBOL_NAME (sym), + xprintf ("\tname %s, hash %lu, bucket %lu, addr %p, stack %p, " + "next %p, flags%s%s, pending %d\n", + SYMBOL_NAME (sym), (unsigned long int) sym->hash, (unsigned long int) h, sym, SYMBOL_STACK (sym), SYMBOL_NEXT (sym), SYMBOL_TRACED (sym) ? " traced" : "", |