m4/hash.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671

/* GNU m4 -- A simple macro processor
   Copyright (C) 2001, 2006-2010, 2013-2014 Free Software Foundation,
   Inc.
   Written by Gary V. Vaughan <gary@gnu.org>

   This file is part of GNU M4.

   GNU M4 is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   GNU M4 is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/* TODO:
   - Use an obstack to manage the node memory.
   - Implement the macroized magic values with the API.
 */

#include <config.h>

#include "hash.h"
#include "m4private.h"

#include "bitrotate.h"
#include <limits.h>

typedef struct hash_node hash_node;

struct m4_hash
{
  size_t size;                  /* number of buckets allocated */
  size_t length;                /* number of elements inserted */
  m4_hash_hash_func *hash_func;
  m4_hash_cmp_func *cmp_func;
  hash_node **buckets;
#ifndef NDEBUG
  m4_hash_iterator *iter;       /* current iterator */
#endif
};

struct hash_node
{
  hash_node *next;
  const void *key;
  void *value;
};


struct m4_hash_iterator
{
  const m4_hash *hash;          /* contains the buckets */
  hash_node *   place;          /* the node we are about to return */
  hash_node *   next;           /* the next node, incase PLACE is removed */
  size_t        next_bucket;    /* the next bucket index following NEXT */
#ifndef NDEBUG
  m4_hash_iterator *chain;      /* multiple iterators visiting one hash */
#endif
};


#define HASH_SIZE(hash)         ((hash)->size)
#define HASH_LENGTH(hash)       ((hash)->length)
#define HASH_BUCKETS(hash)      ((hash)->buckets)
#define HASH_HASH_FUNC(hash)    ((hash)->hash_func)
#define HASH_CMP_FUNC(hash)     ((hash)->cmp_func)

#define NODE_NEXT(node)         ((node)->next)
#define NODE_KEY(node)          ((node)->key)
#define NODE_VALUE(node)        ((node)->value)

#define ITERATOR_HASH(i)        ((i)->hash)
#define ITERATOR_PLACE(i)       ((i)->place)
#define ITERATOR_NEXT(i)        ((i)->next)
#define ITERATOR_NEXT_BUCKET(i) ((i)->next_bucket)

/*#define ITERATOR_NEXT_NEXT(i)   NODE_NEXT (ITERATOR_PLACE (i))*/

/* Helper macros. */
#define BUCKET_NTH(hash, n)     (HASH_BUCKETS (hash)[n])
#define BUCKET_COUNT(hash, key)                                 \
        ((*HASH_HASH_FUNC (hash))(key) % HASH_SIZE (hash))
#define BUCKET_KEY(hash, key)                                   \
        (BUCKET_NTH ((hash), BUCKET_COUNT ((hash), (key))))

/* Debugging macros.  */
#ifdef NDEBUG
# define HASH_ITER(hash)        0
# define ITER_CHAIN(iter)       0
#else
# define HASH_ITER(hash)        (((m4_hash *) hash)->iter)
# define ITER_CHAIN(iter)       ((iter)->chain)
#endif


static void             bucket_insert   (m4_hash *hash, hash_node *bucket);
static void             bucket_delete   (m4_hash *hash, size_t i);
static hash_node *      node_new        (const void *key, void *value);
static void             node_insert     (m4_hash *hash, hash_node *node);
static hash_node *      node_lookup     (m4_hash *hash, const void *key);
static void             node_delete     (m4_hash *hash, hash_node *node);
static void             maybe_grow      (m4_hash *hash);


static hash_node *free_list = NULL;


/* Allocate and return a new, unpopulated but initialised m4_hash with
   SIZE buckets, where HASH_FUNC will be used to generate bucket numbers
   and CMP_FUNC will be called to compare keys.  */
m4_hash *
m4_hash_new (size_t size, m4_hash_hash_func *hash_func,
             m4_hash_cmp_func *cmp_func)
{
  m4_hash *hash;

  assert (hash_func);
  assert (cmp_func);

  if (size == 0)
    size = M4_HASH_DEFAULT_SIZE;

  hash                  = (m4_hash *) xmalloc (sizeof *hash);
  HASH_SIZE (hash)      = size;
  HASH_LENGTH (hash)    = 0;
  HASH_BUCKETS (hash)   = (hash_node **) xcalloc (size,
                                                  sizeof *HASH_BUCKETS (hash));
  HASH_HASH_FUNC (hash) = hash_func;
  HASH_CMP_FUNC (hash)  = cmp_func;
#ifndef NDEBUG
  HASH_ITER (hash)      = NULL;
#endif

  return hash;
}

m4_hash *
m4_hash_dup (m4_hash *src, m4_hash_copy_func *copy)
{
  m4_hash *dest;

  assert (src);
  assert (copy);

  dest = m4_hash_new (HASH_SIZE (src), HASH_HASH_FUNC (src),
                      HASH_CMP_FUNC (src));

  m4_hash_apply (src, (m4_hash_apply_func *) copy, dest);

  return dest;
}

/* Recycle each of the nodes in HASH onto the free list, and release
   the rest of the memory used by the table.  Memory addressed by the
   recycled nodes is _NOT_ freed: this needs to be done manually to
   prevent memory leaks.  This is not safe to call while HASH is being
   iterated.  */
void
m4_hash_delete (m4_hash *hash)
{
  size_t i;

  assert (hash);
  assert (!HASH_ITER (hash));

  for (i = 0; i < HASH_SIZE (hash); ++i)
    if (BUCKET_NTH (hash, i))
      bucket_delete (hash, i);
  free (HASH_BUCKETS (hash));
  free (hash);
}

/* Check that the nodes in bucket I have been cleared, and recycle
   each of the nodes in the bucket to the free list.  Bucket I must
   not be empty when this function is called.  */
static void
bucket_delete (m4_hash *hash, size_t i)
{
  hash_node *node;

  assert (hash);
  assert (BUCKET_NTH (hash, i));
  assert (i < HASH_SIZE (hash));

  for (node = BUCKET_NTH (hash, i); node->next; node = NODE_NEXT (node))
    {
      assert (NODE_KEY (node) == NULL);
      --HASH_LENGTH (hash);
    }

  assert (NODE_KEY (node) == NULL);
  --HASH_LENGTH (hash);

  NODE_NEXT (node)      = free_list;
  free_list             = BUCKET_NTH (hash, i);
  BUCKET_NTH (hash, i)  = NULL;
}

/* Create and initialise a new node with KEY and VALUE, by reusing a
   node from the free list if possible.  */
static hash_node *
node_new (const void *key, void *value)
{
  hash_node *node = NULL;

  if (free_list)
    {
      node = free_list;
      free_list = NODE_NEXT (free_list);
    }
  else
    node = (hash_node *) xmalloc (sizeof *node);

  assert (node);

  NODE_NEXT  (node)     = NULL;
  NODE_KEY   (node)     = key;
  NODE_VALUE (node)     = value;

  return node;
}

/* Check that NODE has been cleared, and recycle it to the free list.  */
static void
node_delete (m4_hash *hash, hash_node *node)
{
  assert (node);
  assert (NODE_KEY (node) == NULL);

  NODE_NEXT (node)      = free_list;
  free_list             = node;

  --HASH_LENGTH (hash);
}

/* Create a new entry in HASH with KEY and VALUE, making use of nodes
   in the free list if possible, and potentially growing the size of
   the table if node density is too high.  This is not safe to call
   while HASH is being iterated.  Currently, it is not safe to call
   this if another entry already matches KEY.  */
const void *
m4_hash_insert (m4_hash *hash, const void *key, void *value)
{
  hash_node *node;

  assert (hash);
  assert (!HASH_ITER (hash));

  node = node_new (key, value);
  node_insert (hash, node);
  maybe_grow (hash);

  return key;
}

/* Push the unconnected NODE on to the front of the appropriate
   bucket, effectively preventing retrieval of other nodes with
   the same key (where "sameness" is determined by HASH's
   cmp_func).  */
static void
node_insert (m4_hash *hash, hash_node *node)
{
  size_t n;

  assert (hash);
  assert (node);
  assert (NODE_NEXT (node) == NULL);

  n = BUCKET_COUNT (hash, NODE_KEY (node));
  NODE_NEXT (node)      = BUCKET_NTH (hash, n);
  BUCKET_NTH (hash, n)  = node;

  ++HASH_LENGTH (hash);
}

/* Remove from HASH, the first node with key KEY; comparing keys with
   HASH's cmp_func.  Any nodes with the same KEY previously hidden by
   the removed node will become visible again.  The key field of the
   removed node is returned, or NULL if there was no match.  This is
   unsafe if multiple iterators are visiting HASH, or when a lone
   iterator is visiting on a different key.  */
void *
m4_hash_remove (m4_hash *hash, const void *key)
{
  size_t n;
  hash_node *node = NULL;

#ifndef NDEBUG
  m4_hash_iterator *iter = HASH_ITER (hash);

  assert (hash);
  if (HASH_ITER (hash))
    {
      assert (!ITER_CHAIN (iter));
      assert (ITERATOR_PLACE (iter));
    }
#endif

  n = BUCKET_COUNT (hash, key);
  do
    {
      hash_node *next = node ? NODE_NEXT (node) : BUCKET_NTH (hash, n);

      if (next && ((*HASH_CMP_FUNC (hash)) (NODE_KEY (next), key) == 0))
        {
          if (node)
            NODE_NEXT (node) = NODE_NEXT (next);
          else
            BUCKET_NTH (hash, n) = NODE_NEXT (next);

          key = NODE_KEY (next);
#ifndef NDEBUG
          if (iter)
            assert (ITERATOR_PLACE (iter) == next);
          NODE_KEY (next) = NULL;
#endif
          node_delete (hash, next);
          return (void *) key; /* Cast away const.  */
        }
      node = next;
    }
  while (node);

  return NULL;
}

/* Return the address of the value field of the first node in HASH
   that has a matching KEY.  The address is returned so that an
   explicit NULL value can be distinguished from a failed lookup (also
   NULL).  Fortuitously for M4, this also means that the value field
   can be changed `in situ' to implement a value stack.  Safe to call
   even when an iterator is in force.  */
void **
m4_hash_lookup (m4_hash *hash, const void *key)
{
  hash_node *node;

  assert (hash);

  node = node_lookup (hash, key);

  return node ? &NODE_VALUE (node) : NULL;
}

/* Return the first node in HASH that has a matching KEY.  */
static hash_node *
node_lookup (m4_hash *hash, const void *key)
{
  hash_node *node;

  assert (hash);

  node = BUCKET_KEY (hash, key);

  while (node && (*HASH_CMP_FUNC (hash)) (NODE_KEY (node), key))
    node = NODE_NEXT (node);

  return node;
}

/* How many entries are currently contained by HASH.  Safe to call
   even during an interation.  */
size_t M4_GNUC_PURE
m4_get_hash_length (m4_hash *hash)
{
  assert (hash);

  return HASH_LENGTH (hash);
}

#if 0
/* Force the number of buckets to be the given value.  You probably ought
   not to be using this function once the table has been in use, since
   the maximum density algorithm will grow the number of buckets back to
   what was there before if you try to shrink the table.  It is useful
   to set a smaller or larger initial size if you know in advance what
   order of magnitude of entries will be in the table.  Be aware that
   the efficiency of the lookup and grow features require that the size
   always be 1 less than a power of 2.  Unsafe if HASH is being visited
   by an iterator.  */
void
m4_hash_resize (m4_hash *hash, size_t size)
{
  hash_node **original_buckets;
  size_t original_size;

  assert (hash);
  assert (!HASH_ITER (hash));

  original_size         = HASH_SIZE (hash);
  original_buckets      = HASH_BUCKETS (hash);

  HASH_SIZE (hash)      = size;
  HASH_BUCKETS (hash)   = (hash_node **) xcalloc (size,
                                                  sizeof *HASH_BUCKETS (hash));

  {
    size_t i;
    for (i = 0; i < original_size; ++i)
      if (original_buckets[i])
        bucket_insert (hash, original_buckets[i]);
  }

  free (original_buckets);
}
#endif

/* If the node density breaks the threshold, increase the size of
   HASH and repopulate with the original nodes.  */
static void
maybe_grow (m4_hash *hash)
{
  float nodes_per_bucket;

  assert (hash);

  nodes_per_bucket = (float) HASH_LENGTH (hash) / (float) HASH_SIZE (hash);

  if (nodes_per_bucket > (float) M4_HASH_MAXIMUM_DENSITY)
    {
      size_t original_size = HASH_SIZE (hash);
      hash_node **original_buckets = HASH_BUCKETS (hash);

      /* HASH sizes are always 1 less than a power of 2.  */
      HASH_SIZE (hash)    = (2 * (1 + original_size)) -1;
      HASH_BUCKETS (hash) =
        (hash_node **) xcalloc (HASH_SIZE (hash), sizeof *HASH_BUCKETS (hash));

      {
        size_t i;
        for (i = 0; i < original_size; ++i)
          if (original_buckets[i])
            bucket_insert (hash, original_buckets[i]);
      }

      free (original_buckets);
    }
}

/* Insert each node in BUCKET into HASH.  Relative ordering of nodes
   is not preserved.  This would need to change if we were to
   guarantee relative ordering within a bucket for identical keys.  */
static void
bucket_insert (m4_hash *hash, hash_node *bucket)
{
  assert (hash);
  assert (bucket);

  do
    {
      hash_node *next = NODE_NEXT (bucket);

      /* Break link to rest of the bucket before reinserting.  */
      NODE_NEXT (bucket) = NULL;
      node_insert (hash, bucket);

      bucket = next;
    }
  while (bucket);
}

/* Reclaim all memory used by free nodes.  Safe to call at any time,
   although only worth calling at program shutdown to verify no
   leaks.  */
void
m4_hash_exit (void)
{
  while (free_list)
    {
      hash_node *stale = free_list;
      free_list = NODE_NEXT (stale);
      free (stale);
    }
}


/* Iterate over a given HASH.  Start with PLACE being NULL, then
   repeat with PLACE being the previous return value.  The return
   value is the current location of the iterator, or NULL when the
   walk is complete.  Call m4_free_hash_iterator to abort iteration.
   During the iteration, it is safe to search the list, and if no
   other iterator is active, it is safe to remove the key pointed to
   by this iterator.  All other actions that modify HASH are
   unsafe.  */
m4_hash_iterator *
m4_get_hash_iterator_next (const m4_hash *hash, m4_hash_iterator *place)
{
  assert (hash);
  assert (!place || (ITERATOR_HASH (place) == hash));

  /* On the first iteration, allocate an iterator.  */
  if (!place)
    {
      place = (m4_hash_iterator *) xzalloc (sizeof *place);
      ITERATOR_HASH (place) = hash;
#ifndef NDEBUG
      ITER_CHAIN (place) = HASH_ITER (hash);
      HASH_ITER (hash) = place;
#endif
    }

 next:
  ITERATOR_PLACE (place) = ITERATOR_NEXT (place);

  /* If there is another node in the current bucket, select it.  */
  if (ITERATOR_NEXT (place) && NODE_NEXT (ITERATOR_NEXT (place)))
    {
      ITERATOR_NEXT (place) = NODE_NEXT (ITERATOR_NEXT (place));
    }
  else
    {
      /* Find the next non-empty bucket.  */
      while ((ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash))
         && (BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place)) == NULL))
        {
          ++ITERATOR_NEXT_BUCKET (place);
        }

      /* Select the first node in the new bucket.  */
      if (ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash))
        {
          ITERATOR_NEXT (place)
            = BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place));
        }
      else
        ITERATOR_NEXT (place) = NULL;

      /* Advance the `next' reference.  */
      ++ITERATOR_NEXT_BUCKET (place);
    }

  /* If there are no more nodes to return, recycle the iterator memory.  */
  if (! (ITERATOR_PLACE (place) || ITERATOR_NEXT (place)))
    {
      m4_free_hash_iterator (hash, place);
      return NULL;
    }

  /* On the first call we need to put the 1st node in PLACE and
     the 2nd node in NEXT.  */
  if (ITERATOR_NEXT (place) && !ITERATOR_PLACE (place))
    goto next;

  assert (place && ITERATOR_PLACE (place));

  return place;
}

/* Clean up the iterator PLACE within HASH when aborting an iteration
   early.  */
void
m4_free_hash_iterator (const m4_hash *hash, m4_hash_iterator *place)
{
#ifndef NDEBUG
  m4_hash_iterator *iter = NULL;
  m4_hash_iterator *next;

  assert (hash);
  assert (place && (ITERATOR_HASH (place) == hash));

  do
    {
      next = iter ? ITER_CHAIN (iter) : HASH_ITER (hash);
      if (place == next)
        {
          if (iter)
            ITER_CHAIN (iter) = ITER_CHAIN (next);
          else
            HASH_ITER (hash) = ITER_CHAIN (next);
          break;
        }
      iter = next;
    }
  while (iter);
  assert (next);
#endif
  free (place);
}

/* Return the key being visited by the iterator PLACE.  */
const void * M4_GNUC_PURE
m4_get_hash_iterator_key (m4_hash_iterator *place)
{
  assert (place);

  return NODE_KEY (ITERATOR_PLACE (place));
}

/* Return the value being visited by the iterator PLACE.  */
void * M4_GNUC_PURE
m4_get_hash_iterator_value (m4_hash_iterator *place)
{
  assert (place);

  return NODE_VALUE (ITERATOR_PLACE (place));
}

/* The following function is used for the cases where we want to do
   something to each and every entry in HASH.  This function traverses
   the hash table, and calls a specified function FUNC for each entry
   in the table.  FUNC is called with a pointer to the entry key,
   value, and the passed DATA argument.  If FUNC returns non-NULL,
   abort the iteration and return that value; a return of NULL implies
   success on all entries.  */
void *
m4_hash_apply (m4_hash *hash, m4_hash_apply_func *func, void *userdata)
{
  m4_hash_iterator *place  = NULL;
  void *            result = NULL;

  assert (hash);
  assert (func);

  while ((place = m4_get_hash_iterator_next (hash, place)))
    {
      result = (*func) (hash, m4_get_hash_iterator_key (place),
                        m4_get_hash_iterator_value (place), userdata);

      if (result != NULL)
        {
          m4_free_hash_iterator (hash, place);
          break;
        }
    }

  return result;
}


/* Using a string (char * and size_t pair) as the hash key is common
   enough that we provide implementations here for use in client hash
   table routines.  */

/* Return a hash value for a string, similar to gnulib's hash module,
   but with length factored in.  */
size_t M4_GNUC_PURE
m4_hash_string_hash (const void *ptr)
{
  const m4_string *key = (const m4_string *) ptr;
  const char *s = key->str;
  size_t len = key->len;
  size_t val = len;

  while (len--)
    val = rotl_sz (val, 7) + to_uchar (*s++);
  return val;
}

/* Comparison function for hash keys -- used by the underlying
   hash table ADT when searching for a key match during name lookup.  */
int M4_GNUC_PURE
m4_hash_string_cmp (const void *key, const void *try)
{
  const m4_string *a = (const m4_string *) key;
  const m4_string *b = (const m4_string *) try;
  if (a->len < b->len)
    return -1;
  if (b->len < a->len)
    return 1;
  return memcmp (a->str, b->str, a->len);
}