summaryrefslogtreecommitdiff
path: root/module/statprof.scm
blob: e334c2bebfd8740c791046cbe1b40fb4d3b8317e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
;;;; (statprof) -- a statistical profiler for Guile
;;;; -*-scheme-*-
;;;;
;;;; 	Copyright (C) 2009, 2010, 2011, 2013-2018, 2020  Free Software Foundation, Inc.
;;;;    Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com>
;;;;    Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
;;;; 
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;; 
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;; 


;;; Commentary:
;;;
;;; @code{(statprof)} is a statistical profiler for Guile.  See the
;;; "Statprof" section in the manual, for more information.
;;;
;;; Code:

(define-module (statprof)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-9 gnu)
  #:use-module (ice-9 format)
  #:use-module (ice-9 match)
  #:use-module (ice-9 vlist)
  #:use-module (system vm vm)
  #:use-module (system vm frame)
  #:use-module (system vm debug)
  #:use-module (system vm program)
  #:export (statprof-active?
            statprof-start
            statprof-stop
            statprof-reset

            statprof-accumulated-time
            statprof-sample-count
            statprof-fold-call-data
            statprof-proc-call-data
            statprof-call-data-name
            statprof-call-data-calls
            statprof-call-data-cum-samples
            statprof-call-data-self-samples
            statprof-call-data->stats
           
            statprof-stats-proc-name
            statprof-stats-proc-source
            statprof-stats-%-time-in-proc
            statprof-stats-cum-secs-in-proc
            statprof-stats-self-secs-in-proc
            statprof-stats-calls
            statprof-stats-self-secs-per-call
            statprof-stats-cum-secs-per-call

            statprof-display
            statprof-display-anomalies

            statprof-fetch-stacks
            statprof-fetch-call-tree

            statprof
            gcprof))


;;; ~ Implementation notes ~
;;;
;;; Statprof can be divided into two pieces: data collection and data
;;; analysis.
;;;
;;; The data collection runs concurrently with the program, and is
;;; designed to be as cheap as possible.  The main data collection
;;; instrument is the stack sampler, driven by SIGPROF signals that are
;;; scheduled with periodic setitimer calls.  The stack sampler simply
;;; looks at every frame on the stack, and writes a representation of
;;; the frame's procedure into a growable buffer.
;;;
;;; For most frames, this representation is the instruction pointer of
;;; that frame, because it's cheap to get and you can map from
;;; instruction pointer to procedure fairly cheaply.  This won't
;;; distinguish between different closures which share the same code,
;;; but that is usually what we want anyway.
;;;
;;; The other part of data collection is the exact call counter, which
;;; uses the VM's "apply" hook to record each procedure call.
;;; Naturally, this is quite expensive, and it is off by default.
;;; Running code at every procedure call effectively penalizes procedure
;;; calls.  Still, it's useful sometimes.  If the profiler state has a
;;; call-counts table, then calls will be counted.  As with the stack
;;; counter, the key in the hash table is the code pointer of the
;;; procedure being called.  The call counter can also see calls of
;;; non-programs, for example in the case of applicable structs.  In
;;; that case the key is the procedure itself.
;;;
;;; After collection is finished, the data can be analyzed.  The first
;;; step is usually to run over the stack traces, tabulating sample
;;; counts by procedure; the stack-samples->procedure-data does that.
;;; The result of stack-samples->procedure-data is a hash table mapping
;;; procedures to "call data" records.  The call data values are exposed
;;; to users via the statprof-fold-call-data procedure.
;;;
;;; Usually all the analysis is triggered by calling statprof-display,
;;; or having the statprof procedure call it for you.
;;;
;;; The other thing we can do is to look at the stacks themselves, for
;;; example via statprof-fetch-call-tree.
;;;

;;; ~ Threads and state ~
;;;
;;; The state of the profiler is contained in a <state> record, which is
;;; bound to a thread-local parameter.  The accurate call counter uses
;;; the VM apply hook, which is also local to the current thread, so all
;;; is good there.
;;;
;;; The problem comes in the statistical stack sampler's use of
;;; `setitimer' and SIGPROF.  The timer manipulated by setitimer is a
;;; whole-process timer, so it decrements as other threads execute,
;;; which is the wrong thing if you want to profile just one thread.  On
;;; the other hand, SIGPROF is delivered to the process as a whole,
;;; which is fine given Guile's signal-handling thread, but then only
;;; delivered to the thread running statprof, which isn't the right
;;; thing if you want to profile the whole system.
;;;
;;; The summary is that statprof works more or less well as a per-thread
;;; profiler if no other threads are running on their own when
;;; profiling.  If the other threads are running on behalf of the thread
;;; being profiled (as via futures or parallel marking) things still
;;; mostly work as expected.  You can run statprof in one thread,
;;; finish, and then run statprof in another thread, and the profile
;;; runs won't affect each other.  But if you want true per-thread
;;; profiles when other things are happening in the process, including
;;; other statprof runs, or whole-process profiles with per-thread
;;; breakdowns, the use of setitimer currently prevents that.
;;;
;;; The solution would be to switch to POSIX.1-2001's timer_create(2),
;;; and to add some more threading-related API to statprof.  Some other
;;; day.
;;;

(define-record-type <state>
  (make-state accumulated-time last-start-time sample-count
              sampling-period remaining-prof-time profile-level
              call-counts gc-time-taken inside-profiler?
              prev-sigprof-handler outer-cut buffer buffer-pos)
  state?
  ;; Total time so far.
  (accumulated-time accumulated-time set-accumulated-time!)
  ;; Start-time when timer is active.
  (last-start-time last-start-time set-last-start-time!)
  ;; Total count of sampler calls.
  (sample-count sample-count set-sample-count!)
  ;; Microseconds.
  (sampling-period sampling-period set-sampling-period!)
  ;; Time remaining when prof suspended.
  (remaining-prof-time remaining-prof-time set-remaining-prof-time!)
  ;; For user start/stop nesting.
  (profile-level profile-level set-profile-level!)
  ;; Hash table mapping ip -> call count, or #f if not counting calls.
  (call-counts call-counts set-call-counts!)
  ;; GC time between statprof-start and statprof-stop.
  (gc-time-taken gc-time-taken set-gc-time-taken!)
  ;; True if we are inside the profiler.
  (inside-profiler? inside-profiler? set-inside-profiler?!)
  ;; Previous sigprof handler.
  (prev-sigprof-handler prev-sigprof-handler set-prev-sigprof-handler!)
  ;; Outer stack cut, or 0.
  (outer-cut outer-cut)
  ;; Stack samples.
  (buffer buffer set-buffer!)
  (buffer-pos buffer-pos set-buffer-pos!))

(define profiler-state (make-parameter #f))

(define (fresh-buffer)
  (make-vector 1024 #f))

(define (expand-buffer buf)
  (let* ((size (vector-length buf))
         (new (make-vector (* size 2) #f)))
    (vector-move-left! buf 0 (vector-length buf) new 0)
    new))

(define* (fresh-profiler-state #:key (count-calls? #f)
                               (sampling-period 10000)
                               (outer-cut 0))
  (make-state 0 #f 0
              sampling-period 0 0
              (and count-calls? (make-hash-table)) 0 #f
              #f outer-cut (fresh-buffer) 0))

(define (ensure-profiler-state)
  (or (profiler-state)
      (let ((state (fresh-profiler-state)))
        (profiler-state state)
        state)))

(define (existing-profiler-state)
  (or (profiler-state)
      (error "expected there to be a profiler state")))

(define (accumulate-time state stop-time)
  (set-accumulated-time! state
                         (+ (accumulated-time state)
                            (- stop-time (last-start-time state)))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; SIGPROF handler

(define (sample-stack-procs state stack)
  (set-sample-count! state (+ (sample-count state) 1))

  (let lp ((frame (stack-ref stack 0))
           (len (stack-length stack))
           (buffer (buffer state))
           (pos (buffer-pos state)))
    (define (write-sample sample)
      (vector-set! buffer pos sample))
    (define (continue pos)
      (lp (frame-previous frame) (1- len) buffer pos))
    (define (write-sample-and-continue sample)
      (write-sample sample)
      (continue (1+ pos)))
    (cond
     ((= pos (vector-length buffer))
      (lp frame len (expand-buffer buffer) pos))
     ((or (zero? len) (not frame))
      (write-sample #f)
      (set-buffer! state buffer)
      (set-buffer-pos! state (1+ pos)))
     (else
      (write-sample-and-continue (frame-instruction-pointer frame))))))

(define (reset-sigprof-timer usecs)
  ;; Guile's setitimer binding is terrible.
  (let ((prev (setitimer ITIMER_PROF 0 0 0 usecs)))
    (+ (* (caadr prev) #e1e6) (cdadr prev))))

(define profile-signal-handler
  (let ()
    (define (profile-signal-handler sig)
      (define state (existing-profiler-state))

      (set-inside-profiler?! state #t)

      (when (positive? (profile-level state))
        (let* ((stop-time (get-internal-run-time))
               ;; Cut down to the signal handler.  Note that this will
               ;; only work if statprof.scm is compiled; otherwise we
               ;; get `eval' on the stack instead, because if it's not
               ;; compiled, profile-signal-handler is a thunk that
               ;; tail-calls eval.  For the same reason we define the
               ;; handler in an inner letrec, so that the compiler sees
               ;; the inner reference to profile-signal-handler as the
               ;; same as the procedure, and therefore keeps slot 0
               ;; alive.  Nastiness, that.  Finally we cut one more
               ;; inner frame, corresponding to the handle-interrupts
               ;; trampoline.
               (stack
                (or (make-stack #t profile-signal-handler (outer-cut state) 1)
                    (pk 'what! (make-stack #t)))))

          (sample-stack-procs state stack)
          (accumulate-time state stop-time)
          (set-last-start-time! state (get-internal-run-time))

          (reset-sigprof-timer (sampling-period state))))

      (set-inside-profiler?! state #f))
    profile-signal-handler))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Count total calls.

(define (count-call frame)
  (let ((state (existing-profiler-state)))
    (unless (inside-profiler? state)
      (let* ((key (frame-instruction-pointer frame))
             (handle (hashv-create-handle! (call-counts state) key 0)))
        (set-cdr! handle (1+ (cdr handle)))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define (statprof-active?)
  "Returns @code{#t} if @code{statprof-start} has been called more times
than @code{statprof-stop}, @code{#f} otherwise."
  (define state (profiler-state))
  (and state (positive? (profile-level state))))

;; Do not call this from statprof internal functions -- user only.
(define* (statprof-start #:optional (state (ensure-profiler-state)))
  "Start the profiler.@code{}"
  ;; After some head-scratching, I don't *think* I need to mask/unmask
  ;; signals here, but if I'm wrong, please let me know.
  (set-profile-level! state (+ (profile-level state) 1))
  (when (= (profile-level state) 1)
    (let ((rpt (remaining-prof-time state)))
      (set-remaining-prof-time! state 0)
      ;; FIXME: Use per-thread run time.
      (set-last-start-time! state (get-internal-run-time))
      (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken))
      (let ((prev (sigaction SIGPROF profile-signal-handler)))
        (set-prev-sigprof-handler! state (car prev)))
      (reset-sigprof-timer (if (zero? rpt) (sampling-period state) rpt))
      (when (call-counts state)
        (vm-add-apply-hook! count-call)
        (set-vm-trace-level! (1+ (vm-trace-level))))
      #t)))
  
;; Do not call this from statprof internal functions -- user only.
(define* (statprof-stop #:optional (state (ensure-profiler-state)))
  "Stop the profiler.@code{}"
  ;; After some head-scratching, I don't *think* I need to mask/unmask
  ;; signals here, but if I'm wrong, please let me know.
  (set-profile-level! state (- (profile-level state) 1))
  (when (zero? (profile-level state))
    (when (call-counts state)
      (set-vm-trace-level! (1- (vm-trace-level)))
      (vm-remove-apply-hook! count-call))
    (set-gc-time-taken! state
                        (- (assq-ref (gc-stats) 'gc-time-taken)
                           (gc-time-taken state)))
    ;; I believe that we need to do this before getting the time
    ;; (unless we want to make things even more complicated).
    (set-remaining-prof-time! state (reset-sigprof-timer 0))
    (accumulate-time state (get-internal-run-time))
    (sigaction SIGPROF (prev-sigprof-handler state))
    (set-prev-sigprof-handler! state #f)
    (set-last-start-time! state #f)))

(define* (statprof-reset sample-seconds sample-microseconds count-calls?
                         #:optional full-stacks?)
  "Reset the statprof sampler interval to @var{sample-seconds} and
@var{sample-microseconds}. If @var{count-calls?} is true, arrange to
instrument procedure calls as well as collecting statistical profiling
data.  (The optional @var{full-stacks?} argument is deprecated; statprof
always collects full stacks.)"
  (when (statprof-active?)
    (error "Can't reset profiler while profiler is running."))
  (profiler-state
   (fresh-profiler-state #:count-calls? count-calls?
                         #:sampling-period (+ (* sample-seconds #e1e6)
                                              sample-microseconds)))
  (values))

(define-record-type call-data
  (make-call-data name printable source
                  call-count cum-sample-count self-sample-count)
  call-data?
  (name call-data-name)
  (printable call-data-printable)
  (source call-data-source)
  (call-count call-data-call-count set-call-data-call-count!)
  (cum-sample-count call-data-cum-sample-count set-call-data-cum-sample-count!)
  (self-sample-count call-data-self-sample-count set-call-data-self-sample-count!))

(define (source->string source)
  (format #f "~a:~a:~a"
          (or (source-file source) "<current input>")
          (source-line-for-user source)
          (source-column source)))

(define (addr->pdi addr cache)
  (cond
   ((hashv-get-handle cache addr) => cdr)
   (else
    (let ((data (find-program-debug-info addr)))
      (hashv-set! cache addr data)
      data))))

(define (addr->printable addr pdi)
  (or (and=> (and=> pdi program-debug-info-name) symbol->string)
      (and=> (primitive-code-name addr) symbol->string)
      (string-append "anon #x" (number->string addr 16))))

(define (inc-call-data-cum-sample-count! cd)
  (set-call-data-cum-sample-count! cd (1+ (call-data-cum-sample-count cd))))
(define (inc-call-data-self-sample-count! cd)
  (set-call-data-self-sample-count! cd (1+ (call-data-self-sample-count cd))))

(define (skip-count-call buffer start len)
  ;; If we are counting all procedure calls, count-call might be on the
  ;; stack.  If it is, skip that part of the stack.
  (match (program-address-range count-call)
    ((lo . hi)
     (let lp ((pos start))
       (if (< pos len)
           (let ((key (vector-ref buffer pos)))
             (cond
              ((not key)
               ;; End of stack; count-call not on the stack.
               start)
              ((and (number? key) (<= lo key) (< key hi))
               ;; Found count-call.
               (1+ pos))
              (else
               ;; Otherwise keep going.
               (lp (1+ pos)))))
           start)))))

(define (stack-samples->procedure-data state)
  (let ((table (make-hash-table))
        (addr-cache (make-hash-table))
        (call-counts (call-counts state))
        (buffer (buffer state))
        (len (buffer-pos state)))
    (define (addr->call-data addr)
      (let* ((pdi (addr->pdi addr addr-cache))
             (entry (if pdi (program-debug-info-addr pdi) addr)))
        (or (hashv-ref table entry)
            (let ((data (make-call-data (and=> pdi program-debug-info-name)
                                        (addr->printable entry pdi)
                                        (find-source-for-addr entry)
                                        (and call-counts
                                             (hashv-ref call-counts entry))
                                        0
                                        0)))
              (hashv-set! table entry data)
              data))))

    (when call-counts
      (hash-for-each (lambda (callee count)
                       (unless (number? callee)
                         (error "unexpected callee" callee))
                       (addr->call-data callee))
                     call-counts))

    (let visit-stacks ((pos 0))
      (cond
       ((< pos len)
        (let ((pos (if call-counts
                        (skip-count-call buffer pos len)
                        pos)))
          (inc-call-data-self-sample-count!
           (addr->call-data (vector-ref buffer pos)))
          (let visit-stack ((pos pos))
            (cond
             ((vector-ref buffer pos)
              => (lambda (ip)
                   (inc-call-data-cum-sample-count! (addr->call-data ip))
                   (visit-stack (1+ pos))))
             (else
              (visit-stacks (1+ pos)))))))
       (else table)))))

(define (stack-samples->callee-lists state)
  (let ((buffer (buffer state))
        (len (buffer-pos state)))
    (let visit-stacks ((pos 0) (out '()))
      (cond
       ((< pos len)
        (let visit-stack ((pos (if (call-counts state)
                                   (skip-count-call buffer pos len)
                                   pos))
                          (stack '()))
          (cond
           ((vector-ref buffer pos)
            => (lambda (callee)
                 (visit-stack (1+ pos) (cons callee stack))))
           (else
            (visit-stacks (1+ pos) (cons (reverse stack) out))))))
       (else (reverse out))))))

(define* (statprof-fold-call-data proc init #:optional
                                  (state (existing-profiler-state)))
  "Fold @var{proc} over the call-data accumulated by statprof. Cannot be
called while statprof is active. @var{proc} should take two arguments,
@code{(@var{call-data} @var{prior-result})}.

Note that a given proc-name may appear multiple times, but if it does,
it represents different functions with the same name."
  (when (statprof-active?)
    (error "Can't call statprof-fold-call-data while profiler is running."))
  (hash-fold
   (lambda (key value prior-result)
     (proc value prior-result))
   init
   (stack-samples->procedure-data state)))

(define* (statprof-proc-call-data proc #:optional
                                  (state (existing-profiler-state)))
  "Returns the call-data associated with @var{proc}, or @code{#f} if
none is available."
  (when (statprof-active?)
    (error "Can't call statprof-proc-call-data while profiler is running."))
  (unless (program? proc)
    (error "statprof-call-data only works for VM programs"))
  (hashv-ref (stack-samples->procedure-data state) (program-code proc)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Stats

(define-record-type stats
  (make-stats proc-name proc-source
              %-time-in-proc cum-secs-in-proc self-secs-in-proc
              calls)
  stats?
  (proc-name statprof-stats-proc-name)
  (proc-source statprof-stats-proc-source)
  (%-time-in-proc statprof-stats-%-time-in-proc)
  (cum-secs-in-proc statprof-stats-cum-secs-in-proc)
  (self-secs-in-proc statprof-stats-self-secs-in-proc)
  (calls statprof-stats-calls))

(define (statprof-stats-self-secs-per-call stats)
  (let ((calls (statprof-stats-calls stats)))
    (and calls
         (/ (statprof-stats-self-secs-in-proc stats)
            calls))))

(define (statprof-stats-cum-secs-per-call stats)
  (let ((calls (statprof-stats-calls stats)))
    (and calls
         (/ (statprof-stats-cum-secs-in-proc stats)
            ;; `calls' might be 0 if we entered statprof during the
            ;; dynamic extent of the call.
            (max calls 1)))))

(define (statprof-call-data->stats call-data)
  "Returns an object of type @code{statprof-stats}."
  (define state (existing-profiler-state))

  (let* ((proc-name (call-data-name call-data))
         (proc-source (and=> (call-data-source call-data) source->string))
         (self-samples (call-data-self-sample-count call-data))
         (cum-samples (call-data-cum-sample-count call-data))
         (all-samples (statprof-sample-count state))
         (secs-per-sample (/ (statprof-accumulated-time state)
                             (statprof-sample-count state)))
         (num-calls (and (call-counts state)
                         (statprof-call-data-calls call-data))))

    (make-stats (or proc-name
                    ;; If there is no name and no source, fall back to
                    ;; printable.
                    (and (not proc-source) (call-data-printable call-data)))
                proc-source
                (* (/ self-samples all-samples) 100.0)
                (* cum-samples secs-per-sample 1.0)
                (* self-samples secs-per-sample 1.0)
                num-calls)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define (stats-sorter x y)
  (let ((diff (- (statprof-stats-self-secs-in-proc x)
                 (statprof-stats-self-secs-in-proc y))))
    (positive?
     (if (= diff 0)
         (- (statprof-stats-cum-secs-in-proc x)
            (statprof-stats-cum-secs-in-proc y))
         diff))))

(define* (statprof-display/flat port state)
  "Displays a gprof-like summary of the statistics collected. Unless an
optional @var{port} argument is passed, uses the current output port."
  (cond
   ((zero? (statprof-sample-count state))
    (format port "No samples recorded.\n"))
   (else
    (let* ((stats-list (statprof-fold-call-data
                        (lambda (data prior-value)
                          (cons (statprof-call-data->stats data)
                                prior-value))
                        '()
                        state))
           (sorted-stats (sort stats-list stats-sorter)))

      (define (display-stats-line stats)
        (format port "~6,2f ~9,2f ~9,2f"
                (statprof-stats-%-time-in-proc stats)
                (statprof-stats-cum-secs-in-proc stats)
                (statprof-stats-self-secs-in-proc stats))
        (if (call-counts state)
            (if (statprof-stats-calls stats)
                (format port " ~7d  "
                        (statprof-stats-calls stats))
                (format port "                            "))
            (display "  " port))
        (let ((source (statprof-stats-proc-source stats))
              (name (statprof-stats-proc-name stats)))
          (when source
            (display source port)
            (when name
              (display ":" port)))
          (when name
            (display name port))
          (newline port)))
    
      (if (call-counts state)
          (begin
            (format  port "~5a ~10a   ~7a  ~8a\n"
                     "%  " "cumulative" "self" "")
            (format  port "~5a  ~9a  ~8a  ~7a ~a\n"
                     "time" "seconds" "seconds" "calls" "procedure"))
          (begin
            (format  port "~5a ~10a   ~7a  ~8a\n"
                     "%" "cumulative" "self" "")
            (format  port "~5a  ~10a  ~7a  ~a\n"
                     "time" "seconds" "seconds" "procedure")))

      (for-each display-stats-line sorted-stats)

      (display "---\n" port)
      (format port "Sample count: ~A\n" (statprof-sample-count state))
      (format port "Total time: ~A seconds (~A seconds in GC)\n"
              (statprof-accumulated-time state)
              (/ (gc-time-taken state)
                 1.0 internal-time-units-per-second))))))

(define* (statprof-display-anomalies #:optional (state
                                                 (existing-profiler-state)))
  "A sanity check that attempts to detect anomalies in statprof's
statistics.@code{}"
  (statprof-fold-call-data
   (lambda (data prior-value)
     (when (and (call-counts state)
                (zero? (call-data-call-count data))
                (positive? (call-data-cum-sample-count data)))
       (format #t
               "==[~A ~A ~A]\n"
               (call-data-name data)
               (call-data-call-count data)
               (call-data-cum-sample-count data))))
   #f
   state)
  (format #t "Total time: ~A\n" (statprof-accumulated-time state))
  (format #t "Sample count: ~A\n" (statprof-sample-count state)))

(define* (statprof-accumulated-time #:optional (state
                                                (existing-profiler-state)))
  "Returns the time accumulated during the last statprof run.@code{}"
  (/ (accumulated-time state) 1.0 internal-time-units-per-second))

(define* (statprof-sample-count #:optional (state (existing-profiler-state)))
  "Returns the number of samples taken during the last statprof run.@code{}"
  (sample-count state))

(define statprof-call-data-name call-data-name)
(define statprof-call-data-calls call-data-call-count)
(define statprof-call-data-cum-samples call-data-cum-sample-count)
(define statprof-call-data-self-samples call-data-self-sample-count)

(define* (statprof-fetch-stacks #:optional (state (existing-profiler-state)))
  "Returns a list of stacks, as they were captured since the last call
to @code{statprof-reset}."
  (stack-samples->callee-lists state))

;; tree ::= (car n . tree*)

(define (lists->trees lists equal?)
  (let lp ((in lists) (n-terminal 0) (tails '()))
    (cond
     ((null? in)
      (let ((trees (map (lambda (tail)
                          (cons (car tail)
                                (lists->trees (cdr tail) equal?)))
                        tails)))
        (cons (apply + n-terminal (map cadr trees))
              (sort trees
                    (lambda (a b) (> (cadr a) (cadr b)))))))
     ((null? (car in))
      (lp (cdr in) (1+ n-terminal) tails))
     ((find (lambda (x) (equal? (car x) (caar in)))
            tails)
      => (lambda (tail)
           (lp (cdr in)
               n-terminal
               (assq-set! tails
                          (car tail)
                          (cons (cdar in) (cdr tail))))))
     (else
      (lp (cdr in)
          n-terminal
          (acons (caar in) (list (cdar in)) tails))))))

(define (collect-cycles items)
  (define (find-cycle item stack)
    (match (vhash-assoc item stack)
      (#f #f)
      ((_ . pos)
       (let ((size (- (vlist-length stack) pos)))
         (and (<= (1- (* size 2)) (vlist-length stack))
              (let lp ((i 0))
                (if (= i (1- size))
                    size
                    (and (equal? (car (vlist-ref stack i))
                                 (car (vlist-ref stack (+ i size))))
                         (lp (1+ i))))))))))
  (define (collect-cycle stack size)
    (vlist-fold-right (lambda (pair cycle)
                        (cons (car pair) cycle))
                      '()
                      (vlist-take stack size)))
  (define (detect-cycle items stack)
    (match items
      (() stack)
      ((item . items)
       (let* ((cycle-size (find-cycle item stack)))
         (if cycle-size
             (chomp-cycles (collect-cycle stack cycle-size)
                           items
                           (vlist-drop stack (1- (* cycle-size 2))))
             (chomp-cycles (list item) items stack))))))
  (define (skip-cycles cycle items)
    (let lp ((a cycle) (b items))
      (match a
        (() (skip-cycles cycle b))
        ((a . a*)
         (match b
           (() items)
           ((b . b*)
            (if (equal? a b)
                (lp a* b*)
                items)))))))
  (define (chomp-cycles cycle items stack)
    (detect-cycle (skip-cycles cycle items)
                  (vhash-cons (match cycle
                                ((item) item)
                                (cycle cycle))
                              (vlist-length stack)
                              stack)))
  (vlist-fold
   (lambda (pair out)
     (cons (car pair) out))
   '()
   (detect-cycle items vlist-null)))

(define* (statprof-fetch-call-tree #:optional (state (existing-profiler-state))
                                   #:key precise?)
  "Return a call tree for the previous statprof run.

The return value is a list of nodes, each of which is of the type:
@code
 node ::= (@var{proc} @var{count} . @var{nodes})
@end code"
  (define-syntax-rule (define-memoized (fn arg) body)
    (define fn
      (let ((table (make-hash-table)))
        (lambda (arg)
          (cond
           ((hash-get-handle table arg) => cdr)
           (else
            (let ((res body))
              (hash-set! table arg res)
              res)))))))
  (define-memoized (callee->printable callee)
    (cond
     ((number? callee)
      (let* ((pdi (find-program-debug-info callee))
             (name (or (and=> (and pdi (program-debug-info-name pdi))
                              symbol->string)
                       (string-append "#x" (number->string callee 16))))
             (loc (and=> (find-source-for-addr
                          (or (and (not precise?)
                                   (and=> pdi program-debug-info-addr))
                              callee))
                         source->string)))
        (if loc
            (string-append name " at " loc)
            name)))
     (else
      (with-output-to-string (lambda () (write callee))))))
  (define (munge-stack stack)
    ;; We collect the sample in newest-to-oldest
    ;; order.  Change to have the oldest first.
    (let ((stack (reverse stack)))
      (define (cycle->printable item)
        (if (string? item)
            item
            (string-join (map cycle->printable item) ", ")))
      (map cycle->printable (collect-cycles (map callee->printable stack)))))
  (let ((stacks (map munge-stack (stack-samples->callee-lists state))))
    (cons #t (lists->trees stacks equal?))))

(define (statprof-display/tree port state)
  (match (statprof-fetch-call-tree state)
    ((#t total-count . trees)
     (define (print-tree tree indent)
       (define (print-subtree tree) (print-tree tree (+ indent 2)))
       (match tree
         ((callee count . trees)
          (format port "~vt~,1f% ~a\n" indent (* 100. (/ count total-count))
                  callee)
          (for-each print-subtree trees))))
     (for-each (lambda (tree) (print-tree tree 0)) trees)))
  (display "---\n" port)
  (format port "Sample count: ~A\n" (statprof-sample-count state))
  (format port "Total time: ~A seconds (~A seconds in GC)\n"
          (statprof-accumulated-time state)
          (/ (gc-time-taken state)
             1.0 internal-time-units-per-second)))

(define* (statprof-display #:optional (port (current-output-port))
                           (state (existing-profiler-state))
                           #:key (style 'flat))
  "Displays a summary of the statistics collected. Unless an optional
@var{port} argument is passed, uses the current output port."
  (case style
    ((flat) (statprof-display/flat port state))
    ((anomalies)
     (with-output-to-port port
       (lambda ()
         (statprof-display-anomalies state))))
    ((tree) (statprof-display/tree port state))
    (else (error "Unknown statprof display style" style))))

(define (call-thunk thunk)
  (call-with-values (lambda () (thunk))
    (lambda results
      (apply values results))))

;; Prevent inlining of 'call-thunk' so that it can be used to determine
;; the stack boundaries passed to 'make-stack'.
(set! call-thunk call-thunk)

(define* (statprof thunk #:key (loop 1) (hz 100) (count-calls? #f)
                   (port (current-output-port)) full-stacks?
                   (display-style 'flat))
  "Profile the execution of @var{thunk}, and return its return values.

The stack will be sampled @var{hz} times per second, and the thunk
itself will be called @var{loop} times.

If @var{count-calls?} is true, all procedure calls will be recorded. This
operation is somewhat expensive."
  
  (let ((state (fresh-profiler-state #:count-calls? count-calls?
                                     #:sampling-period
                                     (inexact->exact (round (/ 1e6 hz)))
                                     #:outer-cut
                                     (program-address-range call-thunk))))
    (parameterize ((profiler-state state))
      (dynamic-wind
        (lambda ()
          (statprof-start state))
        (lambda ()
          (let lp ((i loop))
            (unless (= i 1)
              (call-thunk thunk)
              (lp (1- i))))
          (call-thunk thunk))
        (lambda ()
          (statprof-stop state)
          (statprof-display port state #:style display-style))))))

(define* (gcprof thunk #:key (loop 1) full-stacks? (port (current-output-port)))
  "Do an allocation profile of the execution of @var{thunk}.

The stack will be sampled soon after every garbage collection, yielding
an approximate idea of what is causing allocation in your program.

Since GC does not occur very frequently, you may need to use the
@var{loop} parameter, to cause @var{thunk} to be called @var{loop}
times."
  
  (let ((state (fresh-profiler-state #:outer-cut
                                     (program-address-range call-thunk))))
    (parameterize ((profiler-state state))
      (define (gc-callback)
        (unless (inside-profiler? state)
          (set-inside-profiler?! state #t)

          (let ((stop-time (get-internal-run-time))
                ;; Cut down to gc-callback, and then two more (the
                ;; after-gc async and the handle-interrupts trampoline).
                ;; See the note in profile-signal-handler also.
                (stack (or (make-stack #t gc-callback (outer-cut state) 2)
                           (pk 'what! (make-stack #t)))))
            (sample-stack-procs state stack)
            (accumulate-time state stop-time)
            (set-last-start-time! state (get-internal-run-time)))

          (set-inside-profiler?! state #f)))

      (dynamic-wind
        (lambda ()
          (set-profile-level! state 1)
          (set-last-start-time! state (get-internal-run-time))
          (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken))
          (add-hook! after-gc-hook gc-callback))
        (lambda ()
          (let lp ((i loop))
            (unless (zero? i)
              (call-thunk thunk)
              (lp (1- i)))))
        (lambda ()
          (remove-hook! after-gc-hook gc-callback)
          (set-gc-time-taken! state
                              (- (assq-ref (gc-stats) 'gc-time-taken)
                                 (gc-time-taken state)))
          (accumulate-time state (get-internal-run-time))
          (set-profile-level! state 0)
          (statprof-display port state))))))