/* GNU m4 -- A simple macro processor Copyright (C) 2001, 2006-2010, 2013-2014 Free Software Foundation, Inc. Written by Gary V. Vaughan This file is part of GNU M4. GNU M4 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. GNU M4 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* TODO: - Use an obstack to manage the node memory. - Implement the macroized magic values with the API. */ #include #include "hash.h" #include "m4private.h" #include "bitrotate.h" #include typedef struct hash_node hash_node; struct m4_hash { size_t size; /* number of buckets allocated */ size_t length; /* number of elements inserted */ m4_hash_hash_func *hash_func; m4_hash_cmp_func *cmp_func; hash_node **buckets; #ifndef NDEBUG m4_hash_iterator *iter; /* current iterator */ #endif }; struct hash_node { hash_node *next; const void *key; void *value; }; struct m4_hash_iterator { const m4_hash *hash; /* contains the buckets */ hash_node * place; /* the node we are about to return */ hash_node * next; /* the next node, incase PLACE is removed */ size_t next_bucket; /* the next bucket index following NEXT */ #ifndef NDEBUG m4_hash_iterator *chain; /* multiple iterators visiting one hash */ #endif }; #define HASH_SIZE(hash) ((hash)->size) #define HASH_LENGTH(hash) ((hash)->length) #define HASH_BUCKETS(hash) ((hash)->buckets) #define HASH_HASH_FUNC(hash) ((hash)->hash_func) #define HASH_CMP_FUNC(hash) ((hash)->cmp_func) #define NODE_NEXT(node) ((node)->next) #define NODE_KEY(node) ((node)->key) #define NODE_VALUE(node) ((node)->value) #define ITERATOR_HASH(i) ((i)->hash) #define ITERATOR_PLACE(i) ((i)->place) #define ITERATOR_NEXT(i) ((i)->next) #define ITERATOR_NEXT_BUCKET(i) ((i)->next_bucket) /*#define ITERATOR_NEXT_NEXT(i) NODE_NEXT (ITERATOR_PLACE (i))*/ /* Helper macros. */ #define BUCKET_NTH(hash, n) (HASH_BUCKETS (hash)[n]) #define BUCKET_COUNT(hash, key) \ ((*HASH_HASH_FUNC (hash))(key) % HASH_SIZE (hash)) #define BUCKET_KEY(hash, key) \ (BUCKET_NTH ((hash), BUCKET_COUNT ((hash), (key)))) /* Debugging macros. */ #ifdef NDEBUG # define HASH_ITER(hash) 0 # define ITER_CHAIN(iter) 0 #else # define HASH_ITER(hash) (((m4_hash *) hash)->iter) # define ITER_CHAIN(iter) ((iter)->chain) #endif static void bucket_insert (m4_hash *hash, hash_node *bucket); static void bucket_delete (m4_hash *hash, size_t i); static hash_node * node_new (const void *key, void *value); static void node_insert (m4_hash *hash, hash_node *node); static hash_node * node_lookup (m4_hash *hash, const void *key); static void node_delete (m4_hash *hash, hash_node *node); static void maybe_grow (m4_hash *hash); static hash_node *free_list = NULL; /* Allocate and return a new, unpopulated but initialised m4_hash with SIZE buckets, where HASH_FUNC will be used to generate bucket numbers and CMP_FUNC will be called to compare keys. */ m4_hash * m4_hash_new (size_t size, m4_hash_hash_func *hash_func, m4_hash_cmp_func *cmp_func) { m4_hash *hash; assert (hash_func); assert (cmp_func); if (size == 0) size = M4_HASH_DEFAULT_SIZE; hash = (m4_hash *) xmalloc (sizeof *hash); HASH_SIZE (hash) = size; HASH_LENGTH (hash) = 0; HASH_BUCKETS (hash) = (hash_node **) xcalloc (size, sizeof *HASH_BUCKETS (hash)); HASH_HASH_FUNC (hash) = hash_func; HASH_CMP_FUNC (hash) = cmp_func; #ifndef NDEBUG HASH_ITER (hash) = NULL; #endif return hash; } m4_hash * m4_hash_dup (m4_hash *src, m4_hash_copy_func *copy) { m4_hash *dest; assert (src); assert (copy); dest = m4_hash_new (HASH_SIZE (src), HASH_HASH_FUNC (src), HASH_CMP_FUNC (src)); m4_hash_apply (src, (m4_hash_apply_func *) copy, dest); return dest; } /* Recycle each of the nodes in HASH onto the free list, and release the rest of the memory used by the table. Memory addressed by the recycled nodes is _NOT_ freed: this needs to be done manually to prevent memory leaks. This is not safe to call while HASH is being iterated. */ void m4_hash_delete (m4_hash *hash) { size_t i; assert (hash); assert (!HASH_ITER (hash)); for (i = 0; i < HASH_SIZE (hash); ++i) if (BUCKET_NTH (hash, i)) bucket_delete (hash, i); free (HASH_BUCKETS (hash)); free (hash); } /* Check that the nodes in bucket I have been cleared, and recycle each of the nodes in the bucket to the free list. Bucket I must not be empty when this function is called. */ static void bucket_delete (m4_hash *hash, size_t i) { hash_node *node; assert (hash); assert (BUCKET_NTH (hash, i)); assert (i < HASH_SIZE (hash)); for (node = BUCKET_NTH (hash, i); node->next; node = NODE_NEXT (node)) { assert (NODE_KEY (node) == NULL); --HASH_LENGTH (hash); } assert (NODE_KEY (node) == NULL); --HASH_LENGTH (hash); NODE_NEXT (node) = free_list; free_list = BUCKET_NTH (hash, i); BUCKET_NTH (hash, i) = NULL; } /* Create and initialise a new node with KEY and VALUE, by reusing a node from the free list if possible. */ static hash_node * node_new (const void *key, void *value) { hash_node *node = NULL; if (free_list) { node = free_list; free_list = NODE_NEXT (free_list); } else node = (hash_node *) xmalloc (sizeof *node); assert (node); NODE_NEXT (node) = NULL; NODE_KEY (node) = key; NODE_VALUE (node) = value; return node; } /* Check that NODE has been cleared, and recycle it to the free list. */ static void node_delete (m4_hash *hash, hash_node *node) { assert (node); assert (NODE_KEY (node) == NULL); NODE_NEXT (node) = free_list; free_list = node; --HASH_LENGTH (hash); } /* Create a new entry in HASH with KEY and VALUE, making use of nodes in the free list if possible, and potentially growing the size of the table if node density is too high. This is not safe to call while HASH is being iterated. Currently, it is not safe to call this if another entry already matches KEY. */ const void * m4_hash_insert (m4_hash *hash, const void *key, void *value) { hash_node *node; assert (hash); assert (!HASH_ITER (hash)); node = node_new (key, value); node_insert (hash, node); maybe_grow (hash); return key; } /* Push the unconnected NODE on to the front of the appropriate bucket, effectively preventing retrieval of other nodes with the same key (where "sameness" is determined by HASH's cmp_func). */ static void node_insert (m4_hash *hash, hash_node *node) { size_t n; assert (hash); assert (node); assert (NODE_NEXT (node) == NULL); n = BUCKET_COUNT (hash, NODE_KEY (node)); NODE_NEXT (node) = BUCKET_NTH (hash, n); BUCKET_NTH (hash, n) = node; ++HASH_LENGTH (hash); } /* Remove from HASH, the first node with key KEY; comparing keys with HASH's cmp_func. Any nodes with the same KEY previously hidden by the removed node will become visible again. The key field of the removed node is returned, or NULL if there was no match. This is unsafe if multiple iterators are visiting HASH, or when a lone iterator is visiting on a different key. */ void * m4_hash_remove (m4_hash *hash, const void *key) { size_t n; hash_node *node = NULL; #ifndef NDEBUG m4_hash_iterator *iter = HASH_ITER (hash); assert (hash); if (HASH_ITER (hash)) { assert (!ITER_CHAIN (iter)); assert (ITERATOR_PLACE (iter)); } #endif n = BUCKET_COUNT (hash, key); do { hash_node *next = node ? NODE_NEXT (node) : BUCKET_NTH (hash, n); if (next && ((*HASH_CMP_FUNC (hash)) (NODE_KEY (next), key) == 0)) { if (node) NODE_NEXT (node) = NODE_NEXT (next); else BUCKET_NTH (hash, n) = NODE_NEXT (next); key = NODE_KEY (next); #ifndef NDEBUG if (iter) assert (ITERATOR_PLACE (iter) == next); NODE_KEY (next) = NULL; #endif node_delete (hash, next); return (void *) key; /* Cast away const. */ } node = next; } while (node); return NULL; } /* Return the address of the value field of the first node in HASH that has a matching KEY. The address is returned so that an explicit NULL value can be distinguished from a failed lookup (also NULL). Fortuitously for M4, this also means that the value field can be changed `in situ' to implement a value stack. Safe to call even when an iterator is in force. */ void ** m4_hash_lookup (m4_hash *hash, const void *key) { hash_node *node; assert (hash); node = node_lookup (hash, key); return node ? &NODE_VALUE (node) : NULL; } /* Return the first node in HASH that has a matching KEY. */ static hash_node * node_lookup (m4_hash *hash, const void *key) { hash_node *node; assert (hash); node = BUCKET_KEY (hash, key); while (node && (*HASH_CMP_FUNC (hash)) (NODE_KEY (node), key)) node = NODE_NEXT (node); return node; } /* How many entries are currently contained by HASH. Safe to call even during an interation. */ size_t M4_GNUC_PURE m4_get_hash_length (m4_hash *hash) { assert (hash); return HASH_LENGTH (hash); } #if 0 /* Force the number of buckets to be the given value. You probably ought not to be using this function once the table has been in use, since the maximum density algorithm will grow the number of buckets back to what was there before if you try to shrink the table. It is useful to set a smaller or larger initial size if you know in advance what order of magnitude of entries will be in the table. Be aware that the efficiency of the lookup and grow features require that the size always be 1 less than a power of 2. Unsafe if HASH is being visited by an iterator. */ void m4_hash_resize (m4_hash *hash, size_t size) { hash_node **original_buckets; size_t original_size; assert (hash); assert (!HASH_ITER (hash)); original_size = HASH_SIZE (hash); original_buckets = HASH_BUCKETS (hash); HASH_SIZE (hash) = size; HASH_BUCKETS (hash) = (hash_node **) xcalloc (size, sizeof *HASH_BUCKETS (hash)); { size_t i; for (i = 0; i < original_size; ++i) if (original_buckets[i]) bucket_insert (hash, original_buckets[i]); } free (original_buckets); } #endif /* If the node density breaks the threshold, increase the size of HASH and repopulate with the original nodes. */ static void maybe_grow (m4_hash *hash) { float nodes_per_bucket; assert (hash); nodes_per_bucket = (float) HASH_LENGTH (hash) / (float) HASH_SIZE (hash); if (nodes_per_bucket > (float) M4_HASH_MAXIMUM_DENSITY) { size_t original_size = HASH_SIZE (hash); hash_node **original_buckets = HASH_BUCKETS (hash); /* HASH sizes are always 1 less than a power of 2. */ HASH_SIZE (hash) = (2 * (1 + original_size)) -1; HASH_BUCKETS (hash) = (hash_node **) xcalloc (HASH_SIZE (hash), sizeof *HASH_BUCKETS (hash)); { size_t i; for (i = 0; i < original_size; ++i) if (original_buckets[i]) bucket_insert (hash, original_buckets[i]); } free (original_buckets); } } /* Insert each node in BUCKET into HASH. Relative ordering of nodes is not preserved. This would need to change if we were to guarantee relative ordering within a bucket for identical keys. */ static void bucket_insert (m4_hash *hash, hash_node *bucket) { assert (hash); assert (bucket); do { hash_node *next = NODE_NEXT (bucket); /* Break link to rest of the bucket before reinserting. */ NODE_NEXT (bucket) = NULL; node_insert (hash, bucket); bucket = next; } while (bucket); } /* Reclaim all memory used by free nodes. Safe to call at any time, although only worth calling at program shutdown to verify no leaks. */ void m4_hash_exit (void) { while (free_list) { hash_node *stale = free_list; free_list = NODE_NEXT (stale); free (stale); } } /* Iterate over a given HASH. Start with PLACE being NULL, then repeat with PLACE being the previous return value. The return value is the current location of the iterator, or NULL when the walk is complete. Call m4_free_hash_iterator to abort iteration. During the iteration, it is safe to search the list, and if no other iterator is active, it is safe to remove the key pointed to by this iterator. All other actions that modify HASH are unsafe. */ m4_hash_iterator * m4_get_hash_iterator_next (const m4_hash *hash, m4_hash_iterator *place) { assert (hash); assert (!place || (ITERATOR_HASH (place) == hash)); /* On the first iteration, allocate an iterator. */ if (!place) { place = (m4_hash_iterator *) xzalloc (sizeof *place); ITERATOR_HASH (place) = hash; #ifndef NDEBUG ITER_CHAIN (place) = HASH_ITER (hash); HASH_ITER (hash) = place; #endif } next: ITERATOR_PLACE (place) = ITERATOR_NEXT (place); /* If there is another node in the current bucket, select it. */ if (ITERATOR_NEXT (place) && NODE_NEXT (ITERATOR_NEXT (place))) { ITERATOR_NEXT (place) = NODE_NEXT (ITERATOR_NEXT (place)); } else { /* Find the next non-empty bucket. */ while ((ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash)) && (BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place)) == NULL)) { ++ITERATOR_NEXT_BUCKET (place); } /* Select the first node in the new bucket. */ if (ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash)) { ITERATOR_NEXT (place) = BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place)); } else ITERATOR_NEXT (place) = NULL; /* Advance the `next' reference. */ ++ITERATOR_NEXT_BUCKET (place); } /* If there are no more nodes to return, recycle the iterator memory. */ if (! (ITERATOR_PLACE (place) || ITERATOR_NEXT (place))) { m4_free_hash_iterator (hash, place); return NULL; } /* On the first call we need to put the 1st node in PLACE and the 2nd node in NEXT. */ if (ITERATOR_NEXT (place) && !ITERATOR_PLACE (place)) goto next; assert (place && ITERATOR_PLACE (place)); return place; } /* Clean up the iterator PLACE within HASH when aborting an iteration early. */ void m4_free_hash_iterator (const m4_hash *hash, m4_hash_iterator *place) { #ifndef NDEBUG m4_hash_iterator *iter = NULL; m4_hash_iterator *next; assert (hash); assert (place && (ITERATOR_HASH (place) == hash)); do { next = iter ? ITER_CHAIN (iter) : HASH_ITER (hash); if (place == next) { if (iter) ITER_CHAIN (iter) = ITER_CHAIN (next); else HASH_ITER (hash) = ITER_CHAIN (next); break; } iter = next; } while (iter); assert (next); #endif free (place); } /* Return the key being visited by the iterator PLACE. */ const void * M4_GNUC_PURE m4_get_hash_iterator_key (m4_hash_iterator *place) { assert (place); return NODE_KEY (ITERATOR_PLACE (place)); } /* Return the value being visited by the iterator PLACE. */ void * M4_GNUC_PURE m4_get_hash_iterator_value (m4_hash_iterator *place) { assert (place); return NODE_VALUE (ITERATOR_PLACE (place)); } /* The following function is used for the cases where we want to do something to each and every entry in HASH. This function traverses the hash table, and calls a specified function FUNC for each entry in the table. FUNC is called with a pointer to the entry key, value, and the passed DATA argument. If FUNC returns non-NULL, abort the iteration and return that value; a return of NULL implies success on all entries. */ void * m4_hash_apply (m4_hash *hash, m4_hash_apply_func *func, void *userdata) { m4_hash_iterator *place = NULL; void * result = NULL; assert (hash); assert (func); while ((place = m4_get_hash_iterator_next (hash, place))) { result = (*func) (hash, m4_get_hash_iterator_key (place), m4_get_hash_iterator_value (place), userdata); if (result != NULL) { m4_free_hash_iterator (hash, place); break; } } return result; } /* Using a string (char * and size_t pair) as the hash key is common enough that we provide implementations here for use in client hash table routines. */ /* Return a hash value for a string, similar to gnulib's hash module, but with length factored in. */ size_t M4_GNUC_PURE m4_hash_string_hash (const void *ptr) { const m4_string *key = (const m4_string *) ptr; const char *s = key->str; size_t len = key->len; size_t val = len; while (len--) val = rotl_sz (val, 7) + to_uchar (*s++); return val; } /* Comparison function for hash keys -- used by the underlying hash table ADT when searching for a key match during name lookup. */ int M4_GNUC_PURE m4_hash_string_cmp (const void *key, const void *try) { const m4_string *a = (const m4_string *) key; const m4_string *b = (const m4_string *) try; if (a->len < b->len) return -1; if (b->len < a->len) return 1; return memcmp (a->str, b->str, a->len); }