summaryrefslogtreecommitdiff
path: root/module/language/cps/slot-allocation.scm
blob: 247d648696af45985933319aade66fdf37264412 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
;; Continuation-passing style (CPS) intermediate language (IL)

;; Copyright (C) 2013-2019 Free Software Foundation, Inc.

;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

;;; Commentary:
;;;
;;; A module to assign stack slots to variables in a CPS term.
;;;
;;; Code:

(define-module (language cps slot-allocation)
  #:use-module (ice-9 control)
  #:use-module (ice-9 match)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-11)
  #:use-module (srfi srfi-26)
  #:use-module (language cps)
  #:use-module (language cps utils)
  #:use-module (language cps intmap)
  #:use-module (language cps intset)
  #:export (allocate-slots
            lookup-slot
            lookup-maybe-slot
            lookup-representation
            lookup-nlocals
            lookup-call-proc-slot
            lookup-parallel-moves
            lookup-slot-map))

(define-record-type $allocation
  (make-allocation slots representations call-allocs shuffles frame-size)
  allocation?

  ;; A map of VAR to slot allocation.  A slot allocation is an integer,
  ;; if the variable has been assigned a slot.
  ;;
  (slots allocation-slots)

  ;; A map of VAR to representation.  A representation is 'scm, 'f64,
  ;; 'u64, or 's64.
  ;;
  (representations allocation-representations)

  ;; A map of LABEL to /call allocs/, for expressions that continue to
  ;; $kreceive continuations: non-tail calls and $prompt terms.
  ;;
  ;; A call alloc contains two pieces of information: the call's /proc
  ;; slot/ and a /dead slot map/.  The proc slot indicates the slot of a
  ;; procedure in a procedure call, or where the procedure would be in a
  ;; multiple-value return.
  ;;
  ;; The dead slot map indicates, what slots should be ignored by GC
  ;; when marking the frame.  A dead slot map is a bitfield, as an
  ;; integer.
  ;;
  (call-allocs allocation-call-allocs)

  ;; A map of LABEL to /parallel moves/.  Parallel moves shuffle locals
  ;; into position for a $call, $callk, or $values, or shuffle returned
  ;; values back into place in a $kreceive.
  ;;
  ;; A set of moves is expressed as an ordered list of (SRC . DST)
  ;; moves, where SRC and DST are slots.  This may involve a temporary
  ;; variable.
  ;;
  (shuffles allocation-shuffles)

  ;; The number of local slots needed for this function.  Because we can
  ;; contify common clause tails, we use one frame size for all clauses
  ;; to avoid having to adjust the frame size when continuing to labels
  ;; from other clauses.
  ;;
  (frame-size allocation-frame-size))

(define-record-type $call-alloc
  (make-call-alloc proc-slot slot-map)
  call-alloc?
  (proc-slot call-alloc-proc-slot)
  (slot-map call-alloc-slot-map))

(define (lookup-maybe-slot var allocation)
  (intmap-ref (allocation-slots allocation) var (lambda (_) #f)))

(define (lookup-slot var allocation)
  (intmap-ref (allocation-slots allocation) var))

(define (lookup-representation var allocation)
  (intmap-ref (allocation-representations allocation) var))

(define *absent* (list 'absent))

(define (lookup-call-alloc k allocation)
  (intmap-ref (allocation-call-allocs allocation) k))

(define (lookup-call-proc-slot k allocation)
  (or (call-alloc-proc-slot (lookup-call-alloc k allocation))
      (error "Call has no proc slot" k)))

(define (lookup-parallel-moves k allocation)
  (intmap-ref (allocation-shuffles allocation) k))

(define (lookup-slot-map k allocation)
  (or (call-alloc-slot-map (lookup-call-alloc k allocation))
      (error "Call has no slot map" k)))

(define (lookup-nlocals allocation)
  (allocation-frame-size allocation))

(define-syntax-rule (persistent-intmap2 exp)
  (call-with-values (lambda () exp)
    (lambda (a b)
      (values (persistent-intmap a) (persistent-intmap b)))))

(define (compute-defs-and-uses cps)
  "Return two LABEL->VAR... maps indicating values defined at and used
by a label, respectively."
  (define (vars->intset vars)
    (fold (lambda (var set) (intset-add set var)) empty-intset vars))
  (persistent-intmap2
   (intmap-fold
    (lambda (label cont defs uses)
      (define (get-defs k)
        (match (intmap-ref cps k)
          (($ $kargs names vars) (vars->intset vars))
          (_ empty-intset)))
      (define (return d u)
        (values (intmap-add! defs label d)
                (intmap-add! uses label u)))
      (match cont
        (($ $kfun src meta self)
         (return (if self (intset self) empty-intset) empty-intset))
        (($ $kargs _ _ ($ $continue k src exp))
         (match exp
           ((or ($ $const) ($ $const-fun) ($ $code))
            (return (get-defs k) empty-intset))
           (($ $call proc args)
            (return (get-defs k) (intset-add (vars->intset args) proc)))
           (($ $callk _ proc args)
            (let ((args (vars->intset args)))
              (return (get-defs k) (if proc (intset-add args proc) args))))
           (($ $primcall name param args)
            (return (get-defs k) (vars->intset args)))
           (($ $values args)
            (return (get-defs k) (vars->intset args)))))
        (($ $kargs _ _ ($ $branch kf kt src op param args))
         (return empty-intset (vars->intset args)))
        (($ $kargs _ _ ($ $prompt k kh src escape? tag))
         (return empty-intset (intset tag)))
        (($ $kargs _ _ ($ $throw src op param args))
         (return empty-intset (vars->intset args)))
        (($ $kclause arity body alt)
         (return (get-defs body) empty-intset))
        (($ $kreceive arity kargs)
         (return (get-defs kargs) empty-intset))
        (($ $ktail)
         (return empty-intset empty-intset))))
    cps
    empty-intmap
    empty-intmap)))

(define (compute-reverse-control-flow-order preds)
  "Return a LABEL->ORDER bijection where ORDER is a contiguous set of
integers starting from 0 and incrementing in sort order.  There is a
precondition that labels in PREDS are already renumbered in reverse post
order."
  (define (has-back-edge? preds)
    (let/ec return
      (intmap-fold (lambda (label labels)
                     (intset-fold (lambda (pred)
                                    (if (<= label pred)
                                        (return #t)
                                        (values)))
                                  labels)
                     (values))
                   preds)
      #f))
  (if (has-back-edge? preds)
      ;; This is more involved than forward control flow because not all
      ;; live labels are reachable from the tail.
      (persistent-intmap
       (fold2 (lambda (component order n)
                (intset-fold (lambda (label order n)
                               (values (intmap-add! order label n)
                                       (1+ n)))
                             component order n))
              (reverse (compute-sorted-strongly-connected-components preds))
              empty-intmap 0))
      ;; Just reverse forward control flow.
      (let ((max (intmap-prev preds)))
        (intmap-map (lambda (label labels) (- max label)) preds))))

(define* (add-prompt-control-flow-edges conts succs #:key complete?)
  "For all prompts in DFG in the range [MIN-LABEL, MIN-LABEL +
LABEL-COUNT), invoke F with arguments PROMPT, HANDLER, and BODY for each
body continuation in the prompt."
  (define (intset-filter pred set)
    (intset-fold (lambda (i set)
                   (if (pred i) set (intset-remove set i)))
                 set
                 set))
  (define (intset-any pred set)
    (intset-fold (lambda (i res)
                   (if (or res (pred i)) #t res))
                 set
                 #f))
  (define (compute-prompt-body label)
    (persistent-intset
     (let visit-cont ((label label) (level 1) (labels empty-intset))
       (cond
        ((zero? level) labels)
        ((intset-ref labels label) labels)
        (else
         (let ((labels (intset-add! labels label)))
           (match (intmap-ref conts label)
             (($ $kreceive arity k) (visit-cont k level labels))
             (($ $kargs names syms ($ $continue k src ($ $primcall 'wind)))
              (visit-cont k (1+ level) labels))
             (($ $kargs names syms ($ $continue k src ($ $primcall 'unwind)))
              (visit-cont k (1- level) labels))
             (($ $kargs names syms ($ $continue k src exp))
              (visit-cont k level labels))
             (($ $kargs names syms ($ $branch kf kt))
              (visit-cont kf level (visit-cont kt level labels)))
             (($ $kargs names syms ($ $prompt k kh src escape? tag))
              (visit-cont kh level (visit-cont k (1+ level) labels)))
             (($ $kargs names syms ($ $throw)) labels))))))))
  (define (visit-prompt label handler succs)
    (let ((body (compute-prompt-body label)))
      (define (out-or-back-edge? label)
        ;; Most uses of visit-prompt-control-flow don't need every body
        ;; continuation, and would be happy getting called only for
        ;; continuations that postdominate the rest of the body.  Unless
        ;; you pass #:complete? #t, we only invoke F on continuations
        ;; that can leave the body, or on back-edges in loops.
        (not (intset-any (lambda (succ)
                           (and (intset-ref body succ) (< label succ)))
                         (intmap-ref succs label))))
      (intset-fold (lambda (pred succs)
                     (intmap-replace succs pred handler intset-add))
                   (if complete? body (intset-filter out-or-back-edge? body))
                   succs)))
  (intmap-fold
   (lambda (label cont succs)
     (match cont
       (($ $kargs _ _ ($ $prompt k kh))
        (visit-prompt k kh succs))
       (_ succs)))
   conts
   succs))

(define (rename-keys map old->new)
  (persistent-intmap
   (intmap-fold (lambda (k v out)
                  (intmap-add! out (intmap-ref old->new k) v))
                map
                empty-intmap)))

(define (rename-intset set old->new)
  (intset-fold (lambda (old set) (intset-add set (intmap-ref old->new old)))
               set empty-intset))

(define (rename-graph graph old->new)
  (persistent-intmap
   (intmap-fold (lambda (pred succs out)
                  (intmap-add! out
                               (intmap-ref old->new pred)
                               (rename-intset succs old->new)))
                graph
                empty-intmap)))

(define (compute-live-variables cps defs uses)
  "Compute and return two values mapping LABEL->VAR..., where VAR... are
the definitions that are live before and after LABEL, as intsets."
  (let* ((succs (add-prompt-control-flow-edges cps (compute-successors cps)))
         (preds (invert-graph succs))
         (old->new (compute-reverse-control-flow-order preds))
         (init (persistent-intmap (intmap-fold
                                   (lambda (old new init)
                                     (intmap-add! init new empty-intset))
                                   old->new empty-intmap))))
    (call-with-values
        (lambda ()
          (solve-flow-equations (rename-graph preds old->new)
                                init init
                                (rename-keys defs old->new)
                                (rename-keys uses old->new)
                                intset-subtract intset-union intset-union))
      (lambda (in out)
        ;; As a reverse control-flow problem, the values flowing into a
        ;; node are actually the live values after the node executes.
        ;; Funny, innit?  So we return them in the reverse order.
        (let ((new->old (invert-bijection old->new)))
          (values (rename-keys out new->old)
                  (rename-keys in new->old)))))))

(define (compute-needs-slot cps defs uses)
  (define (get-defs k) (intmap-ref defs k))
  (define (get-uses label) (intmap-ref uses label))
  (intmap-fold
   (lambda (label cont needs-slot)
     (intset-union
      needs-slot
      (match cont
        (($ $kargs)
         (intset-union (get-defs label) (get-uses label)))
        (($ $kreceive arity k)
         ;; Only allocate results of function calls to slots if they are
         ;; used.
         empty-intset)
        (($ $kclause arity body alternate)
         (get-defs label))
        (($ $kfun src meta self)
         (if self (intset self) empty-intset))
        (($ $ktail)
         empty-intset))))
   cps
   empty-intset))

(define (compute-lazy-vars cps live-in live-out defs needs-slot)
  "Compute and return a set of vars whose allocation can be delayed
until their use is seen.  These are \"lazy\" vars.  A var is lazy if its
uses are calls, it is always dead after the calls, and if the uses flow
to the definition.  A flow continues across a node iff the node kills no
values that need slots, and defines only lazy vars.  Calls also kill
flows; there's no sense in trying to juggle a pending frame while there
is an active call."
  (define (list->intset list)
    (persistent-intset
     (fold (lambda (i set) (intset-add! set i)) empty-intset list)))

  (let* ((succs (compute-successors cps))
         (gens (intmap-map
                (lambda (label cont)
                  (match cont
                    (($ $kargs _ _ ($ $continue _ _ ($ $call proc args)))
                     (intset-subtract (intset-add (list->intset args) proc)
                                      (intmap-ref live-out label)))
                    (($ $kargs _ _ ($ $continue _ _ ($ $callk _ proc args)))
                     (let ((args (list->intset args)))
                       (intset-subtract (if proc (intset-add args proc) args)
                                        (intmap-ref live-out label))))
                    (($ $kargs _ _ ($ $continue k _($ $values args)))
                     (match (intmap-ref cps k)
                       (($ $ktail) (list->intset args))
                       (_ #f)))
                    (_ #f)))
                cps))
         (kills (intmap-map
                 (lambda (label in)
                   (let* ((out (intmap-ref live-out label))
                          (killed (intset-subtract in out))
                          (killed-slots (intset-intersect killed needs-slot)))
                     (and (eq? killed-slots empty-intset)
                          ;; Kill output variables that need slots.
                          (intset-intersect (intmap-ref defs label)
                                            needs-slot))))
                 live-in))
         (preds (invert-graph succs))
         (old->new (compute-reverse-control-flow-order preds)))
    (define (subtract lazy kill)
      (cond
       ((eq? lazy empty-intset)
        lazy)
       ((not kill)
        empty-intset)
       ((and lazy (eq? empty-intset (intset-subtract kill lazy)))
        (intset-subtract lazy kill))
       (else
        empty-intset)))
    (define (add live gen) (or gen live))
    (define (meet in out)
      ;; Initial in is #f.
      (if in (intset-intersect in out) out))
    (call-with-values
        (lambda ()
          (let ((succs (rename-graph preds old->new))
                (init (persistent-intmap
                       (intmap-fold
                        (lambda (old new in)
                          (intmap-add! in new #f))
                        old->new empty-intmap)))
                (kills (rename-keys kills old->new))
                (gens (rename-keys gens old->new)))
            (solve-flow-equations succs init init kills gens
                                  subtract add meet)))
      (lambda (in out)
        ;; A variable is lazy if its uses reach its definition.
        (intmap-fold (lambda (label out lazy)
                       (match (intmap-ref cps label)
                         (($ $kargs names vars)
                          (let ((defs (list->intset vars)))
                            (intset-union lazy (intset-intersect out defs))))
                         (_ lazy)))
                     (rename-keys out (invert-bijection old->new))
                     empty-intset)))))

(define (find-first-zero n)
  ;; Naive implementation.
  (let lp ((slot 0))
    (if (logbit? slot n)
        (lp (1+ slot))
        slot)))

(define (find-first-trailing-zero n)
  (let lp ((slot (let lp ((count 2))
                   (if (< n (ash 1 (1- count)))
                       count
                       ;; Grow upper bound slower than factor 2 to avoid
                       ;; needless bignum allocation on 32-bit systems
                       ;; when there are more than 16 locals.
                       (lp (+ count (ash count -1)))))))
    (if (or (zero? slot) (logbit? (1- slot) n))
        slot
        (lp (1- slot)))))

(define (integers from count)
  (if (zero? count)
      '()
      (cons from (integers (1+ from) (1- count)))))

(define (solve-parallel-move src dst tmp)
  "Solve the parallel move problem between src and dst slot lists, which
are comparable with eqv?.  A tmp slot may be used."

  ;; This algorithm is taken from: "Tilting at windmills with Coq:
  ;; formal verification of a compilation algorithm for parallel moves"
  ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy
  ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf>

  (define (split-move moves reg)
    (let loop ((revhead '()) (tail moves))
      (match tail
        (((and s+d (s . d)) . rest)
         (if (eqv? s reg)
             (cons d (append-reverse revhead rest))
             (loop (cons s+d revhead) rest)))
        (_ #f))))

  (define (replace-last-source reg moves)
    (match moves
      ((moves ... (s . d))
       (append moves (list (cons reg d))))))

  (let loop ((to-move (map cons src dst))
             (being-moved '())
             (moved '())
             (last-source #f))
    ;; 'last-source' should always be equivalent to:
    ;; (and (pair? being-moved) (car (last being-moved)))
    (match being-moved
      (() (match to-move
            (() (reverse moved))
            (((and s+d (s . d)) . t1)
             (if (or (eqv? s d) ; idempotent
                     (not s))   ; src is a constant and can be loaded directly
                 (loop t1 '() moved #f)
                 (loop t1 (list s+d) moved s)))))
      (((and s+d (s . d)) . b)
       (match (split-move to-move d)
         ((r . t1) (loop t1 (acons d r being-moved) moved last-source))
         (#f (match b
               (() (loop to-move '() (cons s+d moved) #f))
               (_ (if (eqv? d last-source)
                      (loop to-move
                            (replace-last-source tmp b)
                            (cons s+d (acons d tmp moved))
                            tmp)
                      (loop to-move b (cons s+d moved) last-source))))))))))

(define (compute-shuffles cps slots call-allocs live-in)
  (define (add-live-slot slot live-slots)
    (logior live-slots (ash 1 slot)))

  (define (get-cont label)
    (intmap-ref cps label))

  (define (get-slot var)
    (intmap-ref slots var (lambda (_) #f)))

  (define (get-slots vars)
    (let lp ((vars vars))
      (match vars
        ((var . vars) (cons (get-slot var) (lp vars)))
        (_ '()))))

  (define (get-proc-slot label)
    (call-alloc-proc-slot (intmap-ref call-allocs label)))

  (define (compute-live-slots label)
    (intset-fold (lambda (var live)
                   (match (get-slot var)
                     (#f live)
                     (slot (add-live-slot slot live))))
                 (intmap-ref live-in label)
                 0))

  ;; Although some parallel moves may proceed without a temporary slot,
  ;; in general one is needed.  That temporary slot must not be part of
  ;; the source or destination sets, and that slot should not correspond
  ;; to a live variable.  Usually the source and destination sets are a
  ;; subset of the union of the live sets before and after the move.
  ;; However for stack slots that don't have names -- those slots that
  ;; correspond to function arguments or to function return values -- it
  ;; could be that they are out of the computed live set.  In that case
  ;; they need to be adjoined to the live set, used when choosing a
  ;; temporary slot.
  (define (compute-tmp-slot live stack-slots)
    (find-first-zero (fold add-live-slot live stack-slots)))

  (define (parallel-move src-slots dst-slots tmp-slot)
    (solve-parallel-move src-slots dst-slots tmp-slot))

  (define (compute-receive-shuffles label proc-slot)
    (match (get-cont label)
      (($ $kreceive arity kargs)
       (let* ((results (match (get-cont kargs)
                         (($ $kargs names vars) vars)))
              (value-slots (integers proc-slot (length results)))
              (result-slots (get-slots results))
              ;; Filter out unused results.
              (value-slots (filter-map (lambda (val result) (and result val))
                                       value-slots result-slots))
              (result-slots (filter (lambda (x) x) result-slots))
              (live (compute-live-slots kargs)))
         (parallel-move value-slots
                        result-slots
                        (compute-tmp-slot live value-slots))))))
    
  (define (add-call-shuffles label k args shuffles)
    (match (get-cont k)
      (($ $ktail)
       (let* ((live (compute-live-slots label))
              (tail-slots (integers 0 (length args)))
              (moves (parallel-move (get-slots args)
                                    tail-slots
                                    (compute-tmp-slot live tail-slots))))
         (intmap-add! shuffles label moves)))
      (($ $kreceive)
       (let* ((live (compute-live-slots label))
              (proc-slot (get-proc-slot label))
              (call-slots (integers proc-slot (length args)))
              (arg-moves (parallel-move (get-slots args)
                                        call-slots
                                        (compute-tmp-slot live call-slots))))
         (intmap-add! (intmap-add! shuffles label arg-moves)
                      k (compute-receive-shuffles k proc-slot))))))
    
  (define (add-values-shuffles label k args shuffles)
    (match (get-cont k)
      (($ $ktail)
       (let* ((live (compute-live-slots label))
              (src-slots (get-slots args))
              (dst-slots (integers 0 (length args)))
              (moves (parallel-move src-slots dst-slots
                                    (compute-tmp-slot live dst-slots))))
         (intmap-add! shuffles label moves)))
      (($ $kargs _ dst-vars)
       (let* ((live (logior (compute-live-slots label)
                            (compute-live-slots k)))
              (src-slots (get-slots args))
              (dst-slots (get-slots dst-vars))
              (moves (parallel-move src-slots dst-slots
                                    (compute-tmp-slot live '()))))
         (intmap-add! shuffles label moves)))))

  (define (add-prompt-shuffles label k handler shuffles)
    (intmap-add! shuffles handler
                 (compute-receive-shuffles handler (get-proc-slot label))))

  (define (compute-shuffles label cont shuffles)
    (match cont
      (($ $kargs names vars ($ $continue k src exp))
       (match exp
         (($ $call proc args)
          (add-call-shuffles label k (cons proc args) shuffles))
         (($ $callk _ proc args)
          (add-call-shuffles label k (if proc (cons proc args) args) shuffles))
         (($ $values args)
          (add-values-shuffles label k args shuffles))
         (_ shuffles)))
      (($ $kargs names vars ($ $prompt k kh src escape? tag))
       (add-prompt-shuffles label k kh shuffles))
      (_ shuffles)))

  (persistent-intmap
   (intmap-fold compute-shuffles cps empty-intmap)))

(define (compute-frame-size cps slots call-allocs shuffles)
  ;; Minimum frame has one slot: the closure.
  (define minimum-frame-size 1)
  (define (get-shuffles label)
    (intmap-ref shuffles label))
  (define (get-proc-slot label)
    (match (intmap-ref call-allocs label (lambda (_) #f))
      (#f 0) ;; Tail call.
      (($ $call-alloc proc-slot) proc-slot)))
  (define (max-size var size)
    (match (intmap-ref slots var (lambda (_) #f))
      (#f size)
      (slot (max size (1+ slot)))))
  (define (max-size* vars size)
    (fold max-size size vars))
  (define (shuffle-size moves size)
    (match moves
      (() size)
      (((src . dst) . moves)
       (shuffle-size moves (max size (1+ src) (1+ dst))))))
  (define (call-size label nargs size)
    (shuffle-size (get-shuffles label)
                  (max (+ (get-proc-slot label) nargs) size)))
  (define (measure-cont label cont size)
    (match cont
      (($ $kargs names vars term)
       (let ((size (max-size* vars size)))
         (match term
           (($ $continue _ _ ($ $call proc args))
            (call-size label (1+ (length args)) size))
           (($ $continue _ _ ($ $callk _ proc args))
            (let ((nclosure (if proc 1 0)))
              (call-size label (+ nclosure (length args)) size)))
           (($ $continue _ _ ($ $values args))
            (shuffle-size (get-shuffles label) size))
           (_ size))))
      (($ $kreceive)
       (shuffle-size (get-shuffles label) size))
      (_ size)))

  (intmap-fold measure-cont cps minimum-frame-size))

(define (allocate-args cps)
  (match (intmap-ref cps (intmap-next cps))
    (($ $kfun _ _ has-self?)
     (intmap-fold (lambda (label cont slots)
                    (match cont
                      (($ $kfun src meta self)
                       (if has-self?
                           (intmap-add! slots self 0)
                           slots))
                      (($ $kclause arity body alt)
                       (match (intmap-ref cps body)
                         (($ $kargs names vars)
                          (let lp ((vars vars) (slots slots)
                                   (n (if has-self? 1 0)))
                            (match vars
                              (() slots)
                              ((var . vars)
                               (lp vars
                                   (intmap-add! slots var n)
                                   (1+ n))))))))
                      (_ slots)))
                  cps empty-intmap))))

(define-inlinable (add-live-slot slot live-slots)
  (logior live-slots (ash 1 slot)))

(define-inlinable (kill-dead-slot slot live-slots)
  (logand live-slots (lognot (ash 1 slot))))

(define-inlinable (compute-slot live-slots hint)
  (if (and hint (not (logbit? hint live-slots)))
      hint
      (find-first-zero live-slots)))

(define (allocate-lazy-vars cps slots call-allocs live-in lazy)
  (define (compute-live-slots slots label)
    (intset-fold (lambda (var live)
                   (match (intmap-ref slots var (lambda (_) #f))
                     (#f live)
                     (slot (add-live-slot slot live))))
                 (intmap-ref live-in label)
                 0))

  (define (allocate var hint slots live)
    (match (and hint (intmap-ref slots var (lambda (_) #f)))
      (#f (if (intset-ref lazy var)
              (let ((slot (compute-slot live hint)))
                (values (intmap-add! slots var slot)
                        (add-live-slot slot live)))
              (values slots live)))
      (slot (values slots (add-live-slot slot live)))))

  (define (allocate* vars hints slots live)
    (match (vector vars hints)
      (#(() ()) slots)
      (#((var . vars) (hint . hints))
       (let-values (((slots live) (allocate var hint slots live)))
         (allocate* vars hints slots live)))))

  (define (get-proc-slot label)
    (match (intmap-ref call-allocs label (lambda (_) #f))
      (#f 0)
      (call (call-alloc-proc-slot call))))

  (define (allocate-call label args slots)
    (allocate* args (integers (get-proc-slot label) (length args))
               slots (compute-live-slots slots label)))

  (define (allocate-values label k args slots)
    (match (intmap-ref cps k)
      (($ $ktail)
       (allocate* args (integers 0 (length args))
                  slots (compute-live-slots slots label)))
      (($ $kargs names vars)
       (allocate* args
                  (map (cut intmap-ref slots <> (lambda (_) #f)) vars)
                  slots (compute-live-slots slots label)))))

  (define (allocate-lazy label cont slots)
    (match cont
      (($ $kargs names vars ($ $continue k src exp))
       (match exp
         (($ $call proc args)
          (allocate-call label (cons proc args) slots))
         (($ $callk _ proc args)
          (allocate-call label (if proc (cons proc args) args) slots))
         (($ $values args)
          (allocate-values label k args slots))
         (_ slots)))
      (_
       slots)))

  ;; Sweep right to left to visit uses before definitions.
  (persistent-intmap
   (intmap-fold-right allocate-lazy cps slots)))

(define (compute-var-representations cps)
  (define (get-defs k)
    (match (intmap-ref cps k)
      (($ $kargs names vars) vars)
      (_ '())))
  (intmap-fold
   (lambda (label cont representations)
     (match cont
       (($ $kargs _ _ ($ $continue k _ exp))
        (match (get-defs k)
          (() representations)
          ((var)
           (match exp
             (($ $values (arg))
              (intmap-add representations var
                          (intmap-ref representations arg)))
             (($ $primcall (or 'scm->f64 'load-f64
                               'f32-ref 'f64-ref
                               'fadd 'fsub 'fmul 'fdiv))
              (intmap-add representations var 'f64))
             (($ $primcall (or 'scm->u64 'scm->u64/truncate 'load-u64
                               's64->u64
                               'assume-u64
                               'uadd 'usub 'umul
                               'ulogand 'ulogior 'ulogxor 'ulogsub 'ursh 'ulsh
                               'uadd/immediate 'usub/immediate 'umul/immediate
                               'ursh/immediate 'ulsh/immediate
                               'u8-ref 'u16-ref 'u32-ref 'u64-ref
                               'word-ref 'word-ref/immediate
                               'untag-char))
              (intmap-add representations var 'u64))
             (($ $primcall (or 'untag-fixnum
                               'assume-s64
                               'scm->s64 'load-s64 'u64->s64
                               'srsh 'srsh/immediate
                               's8-ref 's16-ref 's32-ref 's64-ref))
              (intmap-add representations var 's64))
             (($ $primcall (or 'pointer-ref/immediate
                               'tail-pointer-ref/immediate))
              (intmap-add representations var 'ptr))
             (($ $code)
              (intmap-add representations var 'u64))
             (_
              (intmap-add representations var 'scm))))
          (vars
           (match exp
             (($ $values args)
              (fold (lambda (arg var representations)
                      (intmap-add representations var
                                  (intmap-ref representations arg)))
                    representations args vars))))))
       (($ $kargs _ _ (or ($ $branch) ($ $prompt) ($ $throw)))
        representations)
       (($ $kfun src meta self)
        (if self
            (intmap-add representations self 'scm)
            representations))
       (($ $kclause arity body alt)
        (fold1 (lambda (var representations)
                 (intmap-add representations var 'scm))
               (get-defs body) representations))
       (($ $kreceive arity kargs)
        (fold1 (lambda (var representations)
                 (intmap-add representations var 'scm))
               (get-defs kargs) representations))
       (($ $ktail) representations)))
   cps
   empty-intmap))

(define* (allocate-slots cps #:key (precolor-calls? #t))
  (let*-values (((defs uses) (compute-defs-and-uses cps))
                ((representations) (compute-var-representations cps))
                ((live-in live-out) (compute-live-variables cps defs uses))
                ((needs-slot) (compute-needs-slot cps defs uses))
                ((lazy) (if precolor-calls?
                            (compute-lazy-vars cps live-in live-out defs
                                               needs-slot)
                            empty-intset)))

    (define frame-size 3)

    (define (empty-live-slots)
      #b0)

    (define (compute-call-proc-slot live-slots)
      (+ frame-size (find-first-trailing-zero live-slots)))

    (define (compute-prompt-handler-proc-slot live-slots)
      (find-first-trailing-zero live-slots))

    (define (get-cont label)
      (intmap-ref cps label))

    (define (get-slot slots var)
      (intmap-ref slots var (lambda (_) #f)))

    (define (get-slots slots vars)
      (let lp ((vars vars))
        (match vars
          ((var . vars) (cons (get-slot slots var) (lp vars)))
          (_ '()))))

    (define (compute-live-slots* slots label live-vars)
      (intset-fold (lambda (var live)
                     (match (get-slot slots var)
                       (#f live)
                       (slot (add-live-slot slot live))))
                   (intmap-ref live-vars label)
                   0))

    (define (compute-live-in-slots slots label)
      (compute-live-slots* slots label live-in))

    (define (compute-live-out-slots slots label)
      (compute-live-slots* slots label live-out))

    (define slot-desc-dead 0)
    (define slot-desc-live-raw 1)
    (define slot-desc-live-scm 2)
    (define slot-desc-unused 3)

    (define (compute-slot-map slots live-vars nslots)
      (intset-fold
       (lambda (var slot-map)
         (match (get-slot slots var)
           (#f slot-map)
           (slot
            (let ((desc (match (intmap-ref representations var)
                          ((or 'u64 'f64 's64 'ptr) slot-desc-live-raw)
                          ('scm slot-desc-live-scm))))
              (logior slot-map (ash desc (* 2 slot)))))))
       live-vars 0))

    (define (allocate var hint slots live)
      (cond
       ((not (intset-ref needs-slot var))
        (values slots live))
       ((get-slot slots var)
        => (lambda (slot)
             (values slots (add-live-slot slot live))))
       ((and (not hint) (intset-ref lazy var))
        (values slots live))
       (else
        (let ((slot (compute-slot live hint)))
          (values (intmap-add! slots var slot)
                  (add-live-slot slot live))))))

    (define (allocate* vars hints slots live)
      (match (vector vars hints)
        (#(() ()) (values slots live))
        (#((var . vars) (hint . hints))
         (call-with-values (lambda () (allocate var hint slots live))
           (lambda (slots live)
             (allocate* vars hints slots live))))))

    (define (allocate-defs label vars slots)
      (let ((live (compute-live-in-slots slots label))
            (live-vars (intmap-ref live-in label)))
        (let lp ((vars vars) (slots slots) (live live))
          (match vars
            (() (values slots live))
            ((var . vars)
             (call-with-values (lambda () (allocate var #f slots live))
               (lambda (slots live)
                 (lp vars slots
                     (let ((slot (get-slot slots var)))
                       (if (and slot (not (intset-ref live-vars var)))
                           (kill-dead-slot slot live)
                           live))))))))))

    ;; PRE-LIVE are the live slots coming into the term.  POST-LIVE
    ;; is the subset of PRE-LIVE that is still live after the term
    ;; uses its inputs.
    (define (allocate-call label k args slots call-allocs pre-live)
      (match (get-cont k)
        (($ $ktail)
         (let ((tail-slots (integers 0 (length args))))
           (values (allocate* args tail-slots slots pre-live)
                   call-allocs)))
        (($ $kreceive arity kargs)
         (let*-values
             (((post-live) (compute-live-out-slots slots label))
              ((proc-slot) (compute-call-proc-slot post-live))
              ((call-slots) (integers proc-slot (length args)))
              ((slots pre-live) (allocate* args call-slots slots pre-live))
              ;; Allow the first result to be hinted by its use, but
              ;; hint the remaining results to stay in place.  This
              ;; strikes a balance between avoiding shuffling,
              ;; especially for unused extra values, and avoiding frame
              ;; size growth due to sparse locals.
              ((slots result-live)
               (match (get-cont kargs)
                 (($ $kargs () ())
                  (values slots post-live))
                 (($ $kargs (_ . _) (_ . results))
                  (let ((result-slots (integers (+ proc-slot 1)
                                                (length results))))
                    (allocate* results result-slots slots post-live)))))
              ((slot-map) (compute-slot-map slots (intmap-ref live-out label)
                                            (- proc-slot frame-size)))
              ((call) (make-call-alloc proc-slot slot-map)))
           (values slots
                   (intmap-add! call-allocs label call))))))
    
    (define (allocate-values label k args slots call-allocs)
      (match (get-cont k)
        (($ $ktail)
         (values slots call-allocs))
        (($ $kargs (_) (dst))
         ;; When there is only one value in play, we allow the dst to be
         ;; hinted (see compute-lazy-vars).  If the src doesn't have a
         ;; slot, then the actual slot for the dst would end up being
         ;; decided by the call that args it.  Because we don't know the
         ;; slot, we can't really compute the parallel moves in that
         ;; case, so just bail and rely on the bytecode emitter to
         ;; handle the one-value case specially.
         (match args
           ((src)
            (let ((post-live (compute-live-out-slots slots label)))
              (values (allocate dst (get-slot slots src) slots post-live)
                      call-allocs)))))
        (($ $kargs _ dst-vars)
         (let ((src-slots (get-slots slots args))
               (post-live (compute-live-out-slots slots label)))
           (values (allocate* dst-vars src-slots slots post-live)
                   call-allocs)))))

    (define (allocate-prompt label k handler slots call-allocs)
      (match (get-cont handler)
        (($ $kreceive arity kargs)
         (let*-values
             (((handler-live) (compute-live-in-slots slots handler))
              ((proc-slot) (compute-prompt-handler-proc-slot handler-live))
              ((slot-map)  (compute-slot-map slots (intmap-ref live-in handler)
                                             (- proc-slot frame-size)))
              ((result-vars) (match (get-cont kargs)
                               (($ $kargs names vars) vars)))
              ((value-slots) (integers proc-slot (length result-vars)))
              ((slots result-live) (allocate* result-vars value-slots
                                              slots handler-live)))
           (values slots
                   (intmap-add! call-allocs label
                                (make-call-alloc proc-slot slot-map)))))))

    (define (allocate-cont label cont slots call-allocs)
      (match cont
        (($ $kargs names vars term)
         (let-values (((slots live) (allocate-defs label vars slots)))
           (match term
             (($ $continue k src ($ $call proc args))
              (allocate-call label k (cons proc args) slots call-allocs live))
             (($ $continue k src ($ $callk _ proc args))
              (allocate-call label k (if proc (cons proc args) args)
                             slots call-allocs live))
             (($ $continue k src ($ $values args))
              (allocate-values label k args slots call-allocs))
             (($ $prompt k kh src escape? tag)
              (allocate-prompt label k kh slots call-allocs))
             (_
              (values slots call-allocs)))))
        (_
         (values slots call-allocs))))

    (call-with-values (lambda ()
                        (let ((slots (allocate-args cps)))
                          (intmap-fold allocate-cont cps slots empty-intmap)))
      (lambda (slots calls)
        (let* ((slots (allocate-lazy-vars cps slots calls live-in lazy))
               (shuffles (compute-shuffles cps slots calls live-in))
               (frame-size (compute-frame-size cps slots calls shuffles)))
          (make-allocation slots representations calls shuffles frame-size))))))