summaryrefslogtreecommitdiff
path: root/module/ice-9/vlist.scm
blob: 0c0906c3c7211e2a6fd52fc082b2de26803f4d2c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
;;; -*- mode: scheme; coding: utf-8; -*-
;;;
;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 3 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

(define-module (ice-9 vlist)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-9 gnu)
  #:use-module (srfi srfi-26)
  #:use-module (ice-9 format)

  #:export (vlist? vlist-cons vlist-head vlist-tail vlist-null?
            vlist-null list->vlist vlist-ref vlist-drop vlist-take
            vlist-length vlist-fold vlist-fold-right vlist-map
            vlist-unfold vlist-unfold-right vlist-append
            vlist-reverse vlist-filter vlist-delete vlist->list
            vlist-for-each
            block-growth-factor

            vhash? vhash-cons vhash-consq vhash-consv
            vhash-assoc vhash-assq vhash-assv
            vhash-delete vhash-delq vhash-delv
            vhash-fold vhash-fold-right
            vhash-fold* vhash-foldq* vhash-foldv*
            alist->vhash))

;;; Author: Ludovic Courtès <ludo@gnu.org>
;;;
;;; Commentary:
;;;
;;; This module provides an implementations of vlists, a functional list-like
;;; data structure described by Phil Bagwell in "Fast Functional Lists,
;;; Hash-Lists, Dequeues and Variable-Length Arrays", EPFL Technical Report,
;;; 2002.
;;;
;;; The idea is to store vlist elements in increasingly large contiguous blocks
;;; (implemented as vectors here).  These blocks are linked to one another using
;;; a pointer to the next block (called `block-base' here) and an offset within
;;; that block (`block-offset' here).  The size of these blocks form a geometric
;;; series with ratio `block-growth-factor'.
;;;
;;; In the best case (e.g., using a vlist returned by `list->vlist'),
;;; elements from the first half of an N-element vlist are accessed in O(1)
;;; (assuming `block-growth-factor' is 2), and `vlist-length' takes only
;;; O(ln(N)).  Furthermore, the data structure improves data locality since
;;; vlist elements are adjacent, which plays well with caches.
;;;
;;; Code:


;;;
;;; VList Blocks and Block Descriptors.
;;;

(define block-growth-factor
  (make-fluid 2))

(define-inlinable (make-block base offset size hash-tab?)
  ;; Return a block (and block descriptor) of SIZE elements pointing to
  ;; BASE at OFFSET.  If HASH-TAB? is true, we also reserve space for a
  ;; "hash table".  Note: We use `next-free' instead of `last-used' as
  ;; suggested by Bagwell.
  (if hash-tab?
      (vector (make-vector (* size 3) #f)
              base offset size 0)
      (vector (make-vector size)
              base offset size 0)))

(define-syntax-rule (define-block-accessor name index)
  (define-inlinable (name block)
    (vector-ref block index)))

(define-block-accessor block-content 0)
(define-block-accessor block-base 1)
(define-block-accessor block-offset 2)
(define-block-accessor block-size 3)
(define-block-accessor block-next-free 4)

(define-inlinable (block-hash-table? block)
  (< (block-size block) (vector-length (block-content block))))

(define-inlinable (set-block-next-free! block next-free)
  (vector-set! block 4 next-free))

(define-inlinable (block-append! block value offset)
  ;; This is not thread-safe.  To fix it, see Section 2.8 of the paper.
  (and (< offset (block-size block))
       (= offset (block-next-free block))
       (begin
         (set-block-next-free! block (1+ offset))
         (vector-set! (block-content block) offset value)
         #t)))

;; Return the item at slot OFFSET.
(define-inlinable (block-ref content offset)
  (vector-ref content offset))

;; Return the offset of the next item in the hash bucket, after the one
;; at OFFSET.
(define-inlinable (block-hash-table-next-offset content size offset)
  (vector-ref content (+ size size offset)))

;; Save the offset of the next item in the hash bucket, after the one
;; at OFFSET.
(define-inlinable (block-hash-table-set-next-offset! content size offset
                                                     next-offset)
  (vector-set! content (+ size size offset) next-offset))

;; Returns the index of the last entry stored in CONTENT with
;; SIZE-modulo hash value KHASH.
(define-inlinable (block-hash-table-ref content size khash)
  (vector-ref content (+ size khash)))

(define-inlinable (block-hash-table-set! content size khash offset)
  (vector-set! content (+ size khash) offset))

;; Add hash table information for the item recently added at OFFSET,
;; with SIZE-modulo hash KHASH.
(define-inlinable (block-hash-table-add! content size khash offset)
  (block-hash-table-set-next-offset! content size offset
                                     (block-hash-table-ref content size khash))
  (block-hash-table-set! content size khash offset))

(define block-null
  ;; The null block.
  (make-block #f 0 0 #f))


;;;
;;; VLists.
;;;

(define-record-type <vlist>
  ;; A vlist is just a base+offset pair pointing to a block.

  ;; XXX: Allocating a <vlist> record in addition to the block at each
  ;; `vlist-cons' call is inefficient.  However, Bagwell's hack to avoid it
  ;; (Section 2.2) would require GC_ALL_INTERIOR_POINTERS, which would be a
  ;; performance hit for everyone.
  (make-vlist base offset)
  vlist?
  (base    vlist-base)
  (offset  vlist-offset))

(set-record-type-printer! <vlist>
                          (lambda (vl port)
                            (cond ((vlist-null? vl)
                                   (format port "#<vlist ()>"))
                                  ((vhash? vl)
                                   (format port "#<vhash ~x ~a pairs>"
                                           (object-address vl)
                                           (vlist-length vl)))
                                  (else
                                   (format port "#<vlist ~a>"
                                           (vlist->list vl))))))


(define vlist-null
  ;; The empty vlist.
  (make-vlist block-null 0))

;; Asserting that something is a vlist is actually a win if your next
;; step is to call record accessors, because that causes CSE to
;; eliminate the type checks in those accessors.
;;
(define-inlinable (assert-vlist val)
  (unless (vlist? val)
    (throw 'wrong-type-arg
           #f
           "Not a vlist: ~S"
           (list val)
           (list val))))

(define-inlinable (block-cons item vlist hash-tab?)
  (let ((base (vlist-base vlist))
        (offset (1+ (vlist-offset vlist))))
    (cond
     ((block-append! base item offset)
      ;; Fast path: We added the item directly to the block.
      (make-vlist base offset))
     (else
      ;; Slow path: Allocate a new block.
      (let* ((size (block-size base))
             (base (make-block
                    base
                    (1- offset)
                    (cond
                     ((zero? size) 1)
                     ((< offset size) 1) ;; new vlist head
                     (else (* (fluid-ref block-growth-factor) size)))
                    hash-tab?)))
        (set-block-next-free! base 1)
        (vector-set! (block-content base) 0 item)
        (make-vlist base 0))))))

(define (vlist-cons item vlist)
  "Return a new vlist with ITEM as its head and VLIST as its
tail."
  ;; Note: Although the result of `vlist-cons' on a vhash is a valid
  ;; vlist, it is not a valid vhash.  The new item does not get a hash
  ;; table entry.  If we allocate a new block, the new block will not
  ;; have a hash table.  Perhaps we can do something more sensible here,
  ;; but this is a hot function, so there are performance impacts.
  (assert-vlist vlist)
  (block-cons item vlist #f))

(define (vlist-head vlist)
  "Return the head of VLIST."
  (assert-vlist vlist)
  (let ((base   (vlist-base vlist))
        (offset (vlist-offset vlist)))
    (block-ref (block-content base) offset)))

(define (vlist-tail vlist)
  "Return the tail of VLIST."
  (assert-vlist vlist)
  (let ((base   (vlist-base vlist))
        (offset (vlist-offset vlist)))
    (if (> offset 0)
        (make-vlist base (- offset 1))
        (make-vlist (block-base base)
                    (block-offset base)))))

(define (vlist-null? vlist)
  "Return true if VLIST is empty."
  (assert-vlist vlist)
  (let ((base (vlist-base vlist)))
    (and (not (block-base base))
         (= 0 (block-size base)))))


;;;
;;; VList Utilities.
;;;

(define (list->vlist lst)
  "Return a new vlist whose contents correspond to LST."
  (vlist-reverse (fold vlist-cons vlist-null lst)))

(define (vlist-fold proc init vlist)
  "Fold over VLIST, calling PROC for each element."
  ;; FIXME: Handle multiple lists.
  (assert-vlist vlist)
  (let loop ((base   (vlist-base vlist))
             (offset (vlist-offset vlist))
             (result init))
    (if (eq? base block-null)
        result
        (let* ((next  (- offset 1))
               (done? (< next 0)))
          (loop (if done? (block-base base) base)
                (if done? (block-offset base) next)
                (proc (block-ref (block-content base) offset) result))))))

(define (vlist-fold-right proc init vlist)
  "Fold over VLIST, calling PROC for each element, starting from
the last element."
  (assert-vlist vlist)
  (let loop ((index  (1- (vlist-length vlist)))
             (result init))
    (if (< index 0)
        result
        (loop (1- index)
          (proc (vlist-ref vlist index) result)))))

(define (vlist-reverse vlist)
  "Return a new VLIST whose content are those of VLIST in reverse
order."
  (vlist-fold vlist-cons vlist-null vlist))

(define (vlist-map proc vlist)
  "Map PROC over the elements of VLIST and return a new vlist."
  (vlist-fold (lambda (item result)
                (vlist-cons (proc item) result))
              vlist-null
              (vlist-reverse vlist)))

(define (vlist->list vlist)
  "Return a new list whose contents match those of VLIST."
  (vlist-fold-right cons '() vlist))

(define (vlist-ref vlist index)
  "Return the element at index INDEX in VLIST."
  (assert-vlist vlist)
  (let loop ((index   index)
             (base    (vlist-base vlist))
             (offset  (vlist-offset vlist)))
    (if (<= index offset)
        (block-ref (block-content base) (- offset index))
        (loop (- index offset 1)
              (block-base base)
              (block-offset base)))))

(define (vlist-drop vlist count)
  "Return a new vlist that does not contain the COUNT first elements of
VLIST."
  (assert-vlist vlist)
  (let loop ((count  count)
             (base   (vlist-base vlist))
             (offset (vlist-offset vlist)))
    (if (<= count offset)
        (make-vlist base (- offset count))
        (loop (- count offset 1)
              (block-base base)
              (block-offset base)))))

(define (vlist-take vlist count)
  "Return a new vlist that contains only the COUNT first elements of
VLIST."
  (let loop ((count  count)
             (vlist  vlist)
             (result vlist-null))
    (if (= 0 count)
        (vlist-reverse result)
        (loop (- count 1)
              (vlist-tail vlist)
              (vlist-cons (vlist-head vlist) result)))))

(define (vlist-filter pred vlist)
  "Return a new vlist containing all the elements from VLIST that
satisfy PRED."
  (vlist-fold-right (lambda (e v)
                      (if (pred e)
                          (vlist-cons e v)
                          v))
                    vlist-null
                    vlist))

(define* (vlist-delete x vlist #:optional (equal? equal?))
  "Return a new vlist corresponding to VLIST without the elements
EQUAL? to X."
  (vlist-filter (lambda (e)
                  (not (equal? e x)))
                vlist))

(define (vlist-length vlist)
  "Return the length of VLIST."
  (assert-vlist vlist)
  (let loop ((base (vlist-base vlist))
             (len  (vlist-offset vlist)))
    (if (eq? base block-null)
        len
        (loop (block-base base)
              (+ len 1 (block-offset base))))))

(define* (vlist-unfold p f g seed
                       #:optional (tail-gen (lambda (x) vlist-null)))
  "Return a new vlist.  See the description of SRFI-1 `unfold' for details."
  (let uf ((seed seed))
    (if (p seed)
        (tail-gen seed)
        (vlist-cons (f seed)
                    (uf (g seed))))))

(define* (vlist-unfold-right p f g seed #:optional (tail vlist-null))
  "Return a new vlist.  See the description of SRFI-1 `unfold-right' for
details."
  (let uf ((seed seed) (lis tail))
    (if (p seed)
        lis
        (uf (g seed) (vlist-cons (f seed) lis)))))

(define (vlist-append . vlists)
  "Append the given lists."
  (if (null? vlists)
      vlist-null
      (fold-right (lambda (vlist result)
                    (vlist-fold-right (lambda (e v)
                                        (vlist-cons e v))
                                      result
                                      vlist))
                  vlist-null
                  vlists)))

(define (vlist-for-each proc vlist)
  "Call PROC on each element of VLIST.  The result is unspecified."
  (vlist-fold (lambda (item x)
                (proc item))
              (if #f #f)
              vlist))


;;;
;;; Hash Lists, aka. `VHash'.
;;;

;; Assume keys K1 and K2, H = hash(K1) = hash(K2), and two values V1 and V2
;; associated with K1 and K2, respectively.  The resulting layout is a
;; follows:
;;
;;             ,--------------------.
;;            0| ,-> (K1 . V1)      | Vlist array
;;            1| |                  |
;;            2| |   (K2 . V2)      |
;;            3| |                  |
;;        size +-|------------------+
;;            0| |                  | Hash table
;;            1| |                  |
;;            2| +-- O <------------- H
;;            3| |                  |
;;    size * 2 +-|------------------+
;;            0| `-> 2              | Chain links
;;            1|                    |
;;            2|    #f              |
;;            3|                    |
;;    size * 3 `--------------------'
;;
;; The backing store for the vhash is partitioned into three areas: the
;; vlist part, the hash table part, and the chain links part.  In this
;; example we have a hash H which, when indexed into the hash table
;; part, indicates that a value with this hash can be found at offset 0
;; in the vlist part.  The corresponding index (in this case, 0) of the
;; chain links array holds the index of the next element in this block
;; with this hash value, or #f if we reached the end of the chain.
;;
;; This API potentially requires users to repeat which hash function and
;; which equality predicate to use.  This can lead to unpredictable
;; results if they are used in consistenly, e.g., between `vhash-cons'
;; and `vhash-assoc', which is undesirable, as argued in
;; http://savannah.gnu.org/bugs/?22159 .  OTOH, two arguments can be
;; made in favor of this API:
;;
;;  - It's consistent with how alists are handled in SRFI-1.
;;
;;  - In practice, users will probably consistenly use either the `q',
;;    the `v', or the plain variant (`vlist-cons' and `vlist-assoc'
;;    without any optional argument), i.e., they will rarely explicitly
;;    pass a hash function or equality predicate.

(define (vhash? obj)
  "Return true if OBJ is a hash list."
  (and (vlist? obj)
       (block-hash-table? (vlist-base obj))))

(define* (vhash-cons key value vhash #:optional (hash hash))
  "Return a new hash list based on VHASH where KEY is associated
with VALUE.  Use HASH to compute KEY's hash."
  (assert-vlist vhash)
  ;; We should also assert that it is a hash table.  Need to check the
  ;; performance impacts of that.  Also, vlist-null is a valid hash
  ;; table, which does not pass vhash?.  A bug, perhaps.
  (let* ((vhash     (block-cons (cons key value) vhash #t))
         (base      (vlist-base vhash))
         (offset    (vlist-offset vhash))
         (size      (block-size base))
         (khash     (hash key size))
         (content   (block-content base)))
    (block-hash-table-add! content size khash offset)
    vhash))

(define vhash-consq (cut vhash-cons <> <> <> hashq))
(define vhash-consv (cut vhash-cons <> <> <> hashv))

(define-inlinable (%vhash-fold* proc init key vhash equal? hash)
  ;; Fold over all the values associated with KEY in VHASH.
  (define (visit-block base max-offset result)
    (let* ((size (block-size base))
           (content (block-content base))
           (khash (hash key size)))
      (let loop ((offset (block-hash-table-ref content size khash))
                 (result result))
        (if offset
            (loop (block-hash-table-next-offset content size offset)
                  (if (and (<= offset max-offset)
                           (equal? key (car (block-ref content offset))))
                      (proc (cdr (block-ref content offset)) result)
                      result))
            (let ((next-block (block-base base)))
              (if (> (block-size next-block) 0)
                  (visit-block next-block (block-offset base) result)
                  result))))))

  (assert-vlist vhash)
  (if (> (block-size (vlist-base vhash)) 0)
      (visit-block (vlist-base vhash)
                   (vlist-offset vhash)
                   init)
      init))

(define* (vhash-fold* proc init key vhash
                      #:optional (equal? equal?) (hash hash))
  "Fold over all the values associated with KEY in VHASH, with each
call to PROC having the form ‘(proc value result)’, where
RESULT is the result of the previous call to PROC and INIT the
value of RESULT for the first call to PROC."
  (%vhash-fold* proc init key vhash equal? hash))

(define (vhash-foldq* proc init key vhash)
  "Same as ‘vhash-fold*’, but using ‘hashq’ and ‘eq?’."
  (%vhash-fold* proc init key vhash eq? hashq))

(define (vhash-foldv* proc init key vhash)
  "Same as ‘vhash-fold*’, but using ‘hashv’ and ‘eqv?’."
  (%vhash-fold* proc init key vhash eqv? hashv))

(define-inlinable (%vhash-assoc key vhash equal? hash)
  ;; A specialization of `vhash-fold*' that stops when the first value
  ;; associated with KEY is found or when the end-of-list is reached.  Inline to
  ;; make sure `vhash-assq' gets to use the `eq?' instruction instead of calling
  ;; the `eq?'  subr.
  (define (visit-block base max-offset)
    (let* ((size (block-size base))
           (content (block-content base))
           (khash (hash key size)))
      (let loop ((offset (block-hash-table-ref content size khash)))
        (if offset
            (if (and (<= offset max-offset)
                     (equal? key (car (block-ref content offset))))
                (block-ref content offset)
                (loop (block-hash-table-next-offset content size offset)))
            (let ((next-block (block-base base)))
              (and (> (block-size next-block) 0)
                   (visit-block next-block (block-offset base))))))))

  (assert-vlist vhash)
  (and (> (block-size (vlist-base vhash)) 0)
       (visit-block (vlist-base vhash)
                    (vlist-offset vhash))))

(define* (vhash-assoc key vhash #:optional (equal? equal?) (hash hash))
  "Return the first key/value pair from VHASH whose key is equal to
KEY according to the EQUAL? equality predicate."
  (%vhash-assoc key vhash equal? hash))

(define (vhash-assq key vhash)
  "Return the first key/value pair from VHASH whose key is ‘eq?’ to
KEY."
  (%vhash-assoc key vhash eq? hashq))

(define (vhash-assv key vhash)
  "Return the first key/value pair from VHASH whose key is ‘eqv?’ to
KEY."
  (%vhash-assoc key vhash eqv? hashv))

(define* (vhash-delete key vhash #:optional (equal? equal?) (hash hash))
  "Remove all associations from VHASH with KEY, comparing keys
with EQUAL?."
  (if (vhash-assoc key vhash equal? hash)
      (vlist-fold (lambda (k+v result)
                    (let ((k (car k+v))
                          (v (cdr k+v)))
                      (if (equal? k key)
                          result
                          (vhash-cons k v result hash))))
                  vlist-null
                  vhash)
      vhash))

(define vhash-delq (cut vhash-delete <> <> eq? hashq))
(define vhash-delv (cut vhash-delete <> <> eqv? hashv))

(define (vhash-fold proc init vhash)
  "Fold over the key/pair elements of VHASH from left to right, with
each call to PROC having the form ‘(PROC key value result)’,
where RESULT is the result of the previous call to PROC and
INIT the value of RESULT for the first call to PROC."
  (vlist-fold (lambda (key+value result)
                (proc (car key+value) (cdr key+value)
                      result))
              init
              vhash))

(define (vhash-fold-right proc init vhash)
  "Fold over the key/pair elements of VHASH from right to left, with
each call to PROC having the form ‘(PROC key value result)’,
where RESULT is the result of the previous call to PROC and
INIT the value of RESULT for the first call to PROC."
  (vlist-fold-right (lambda (key+value result)
                      (proc (car key+value) (cdr key+value)
                            result))
                    init
                    vhash))

(define* (alist->vhash alist #:optional (hash hash))
  "Return the vhash corresponding to ALIST, an association list."
  (fold-right (lambda (pair result)
                (vhash-cons (car pair) (cdr pair) result hash))
              vlist-null
              alist))

;;; vlist.scm ends here