summaryrefslogtreecommitdiff
path: root/module/system/vm/coverage.scm
blob: f1d65cc6c2817b855c11d6e0125b10f6717580fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
;;; -*- mode: scheme; coding: utf-8; -*-
;;;
;;; Copyright (C) 2010, 2013, 2018 Free Software Foundation, Inc.
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 3 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

(define-module (system vm coverage)
  #:use-module (system vm vm)
  #:use-module (system vm frame)
  #:use-module (system vm program)
  #:use-module (system vm debug)
  #:use-module (ice-9 format)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-9 gnu)
  #:use-module (srfi srfi-11)
  #:use-module (srfi srfi-26)
  #:use-module (ice-9 match)
  #:export (with-code-coverage
            coverage-data?
            instrumented-source-files
            instrumented/executed-lines
            line-execution-counts
            procedure-execution-count
            coverage-data->lcov))

;;; Author: Ludovic Courtès
;;;
;;; Commentary:
;;;
;;; This module provides support to gather code coverage data by instrumenting
;;; the VM.
;;;
;;; Code:


;;;
;;; Gathering coverage data.
;;;

(define (with-code-coverage thunk)
  "Run THUNK, a zero-argument procedure, while instrumenting Guile's VM to
collect code coverage data.  Return code coverage data and the values returned
by THUNK."

  (define ip-counts
    ;; A table mapping instruction pointers to the number of times they were
    ;; executed.
    (make-hash-table 5000))

  (define (collect! frame)
    ;; Update IP-COUNTS with info from FRAME.
    (let* ((ip (frame-instruction-pointer frame))
           (ip-entry (hashv-create-handle! ip-counts ip 0)))
      (set-cdr! ip-entry (+ (cdr ip-entry) 1))))

  ;; FIXME: It's unclear what the dynamic-wind is for, given that if the
  ;; VM is different from the current one, continuations will not be
  ;; resumable.
  (call-with-values (lambda ()
                      (let ((level   (vm-trace-level)))
                        (dynamic-wind
                          (lambda ()
                            (set-vm-trace-level! (+ level 1))
                            (vm-add-next-hook! collect!))
                          (lambda ()
                            (call-with-vm thunk))
                          (lambda ()
                            (set-vm-trace-level! level)
                            (vm-remove-next-hook! collect!)))))
    (lambda args
      (apply values (make-coverage-data ip-counts) args))))




;;;
;;; Source chunks.
;;;

(define-record-type <source-chunk>
  (make-source-chunk base length sources)
  source-chunk?
  (base source-chunk-base)
  (length source-chunk-length)
  (sources source-chunk-sources))

(set-record-type-printer!
 <source-chunk>
 (lambda (obj port)
   (format port "<source-chunk #x~x-#x~x>"
           (source-chunk-base obj)
           (+ (source-chunk-base obj) (source-chunk-length obj)))))

(define (compute-source-chunk ctx)
  "Build a sorted vector of source information for a given debugging
context (ELF image).  The return value is a @code{<source-chunk>}, which also
records the address range to which the source information applies."
  (make-source-chunk
   (debug-context-base ctx)
   (debug-context-length ctx)
   ;; The source locations are sorted already, but collected in reverse order.
   (list->vector (reverse! (fold-source-locations cons '() ctx)))))

(define (all-source-information)
  "Build and return a vector of source information corresponding to all
loaded code.  The vector will be sorted by ascending address order."
  (sort! (list->vector (fold-all-debug-contexts
                        (lambda (ctx seed)
                          (cons (compute-source-chunk ctx) seed))
                        '()))
         (lambda (x y)
           (< (source-chunk-base x) (source-chunk-base y)))))


;;;
;;; Coverage data summary.
;;;

(define-record-type <coverage-data>
  (%make-coverage-data ip-counts
                       sources
                       file->procedures
                       file->line-counts)
  coverage-data?

  ;; Mapping from instruction pointers to the number of times they were
  ;; executed, as a sorted vector of IP-count pairs.
  (ip-counts data-ip-counts)

  ;; Complete source census at the time the coverage analysis was run, as a
  ;; sorted vector of <source-chunk> values.
  (sources data-sources)

  ;; Mapping from source file names to lists of procedures defined in the file.
  ;; FIXME.
  (file->procedures     data-file->procedures)

  ;; Mapping from file names to hash tables, which in turn map from line numbers
  ;; to execution counts.
  (file->line-counts    data-file->line-counts))

(set-record-type-printer!
 <coverage-data>
 (lambda (obj port)
   (format port "<coverage-data ~x>" (object-address obj))))

(define (make-coverage-data ip-counts)
  ;; Return a `coverage-data' object based on the coverage data available in
  ;; IP-COUNTS.  Precompute the other hash tables that make up `coverage-data'
  ;; objects.
  (let* ((all-sources (all-source-information))
         (all-counts (sort! (list->vector (hash-fold acons '() ip-counts))
                            (lambda (x y)
                              (< (car x) (car y)))))
         (file->procedures   (make-hash-table 100))
         (file->line-counts  (make-hash-table 100))
         (data               (%make-coverage-data all-counts
                                                  all-sources
                                                  file->procedures
                                                  file->line-counts)))

    (define (observe-execution-count! file line count)
      ;; Make the execution count of FILE:LINE the maximum of its current value
      ;; and COUNT.  This is so that LINE's execution count is correct when
      ;; several instruction pointers map to LINE.
      (when file
        (let ((file-entry (hash-create-handle! file->line-counts file #f)))
          (if (not (cdr file-entry))
              (set-cdr! file-entry (make-hash-table 500)))
          (let ((line-entry (hashv-create-handle! (cdr file-entry) line 0)))
            (set-cdr! line-entry (max (cdr line-entry) count))))))

    ;; First, visit every known source location and mark it as instrumented but
    ;; unvisited.
    ;;
    ;; FIXME: This is not always necessary.  It's important to have the ability
    ;; to know when a source location is not reached, but sometimes all we need
    ;; to know is that a particular site *was* reached.  In that case we
    ;; wouldn't need to load up all the DWARF sections.  As it is, though, we
    ;; use the complete source census as part of the later phase.
    (let visit-chunk ((chunk-idx 0))
      (when (< chunk-idx (vector-length all-sources))
        (match (vector-ref all-sources chunk-idx)
          (($ <source-chunk> base chunk-length chunk-sources)
           (let visit-source ((source-idx 0))
             (when (< source-idx (vector-length chunk-sources))
               (let ((s (vector-ref chunk-sources source-idx)))
                 (observe-execution-count! (source-file s) (source-line s) 0)
                 (visit-source (1+ source-idx)))))))
        (visit-chunk (1+ chunk-idx))))

    ;; Then, visit the measured execution counts, walking the complete source
    ;; census at the same time.  This allows us to map observed addresses to
    ;; source locations.  Record observed execution counts.
    (let visit-chunk ((chunk-idx 0) (count-idx 0))
      (when (< chunk-idx (vector-length all-sources))
        (match (vector-ref all-sources chunk-idx)
          (($ <source-chunk> base chunk-length chunk-sources)
           (let visit-count ((count-idx count-idx) (source-idx 0) (source #f))
             (when (< count-idx (vector-length all-counts))
               (match (vector-ref all-counts count-idx)
                 ((ip . count)
                  (cond
                   ((< ip base)
                    ;; Address before chunk base; no corresponding source.
                    (visit-count (1+ count-idx) source-idx source))
                   ((< ip (+ base chunk-length))
                    ;; Address in chunk; count it.
                    (let visit-source ((source-idx source-idx) (source source))
                      (define (finish)
                        (when source
                          (observe-execution-count! (source-file source)
                                                    (source-line source)
                                                    count))
                        (visit-count (1+ count-idx) source-idx source))
                      (cond
                       ((< source-idx (vector-length chunk-sources))
                        (let ((source* (vector-ref chunk-sources source-idx)))
                          (if (<= (source-pre-pc source*) ip)
                              (visit-source (1+ source-idx) source*)
                              (finish))))
                       (else
                        (finish)))))
                   (else
                    ;; Address past chunk; fetch the next chunk.
                    (visit-chunk (1+ chunk-idx) count-idx)))))))))))

    data))

(define (procedure-execution-count data proc)
  "Return the number of times PROC's code was executed, according to DATA.  When
PROC is a closure, the number of times its code was executed is returned, not
the number of times this code associated with this particular closure was
executed."
  (define (binary-search v key val)
    (let lp ((start 0) (end (vector-length v)))
      (and (not (eqv? start end))
           (let* ((idx (floor/ (+ start end) 2))
                  (elt (vector-ref v idx))
                  (val* (key elt)))
             (cond
              ((< val val*)
               (lp start idx))
              ((< val* val)
               (lp (1+ idx) end))
              (else elt))))))
  (and (program? proc)
       (match (binary-search (data-ip-counts data) car (program-code proc))
         (#f 0)
         ((ip . code) code))))

(define (instrumented/executed-lines data file)
  "Return the number of instrumented and the number of executed source lines in
FILE according to DATA."
  (define instr+exec
    (and=> (hash-ref (data-file->line-counts data) file)
           (lambda (line-counts)
             (hash-fold (lambda (line count instr+exec)
                          (let ((instr (car instr+exec))
                                (exec  (cdr instr+exec)))
                            (cons (+ 1 instr)
                                  (if (> count 0)
                                      (+ 1 exec)
                                      exec))))
                        '(0 . 0)
                        line-counts))))

  (values (car instr+exec) (cdr instr+exec)))

(define (line-execution-counts data file)
  "Return a list of line number/execution count pairs for FILE, or #f if FILE
is not among the files covered by DATA."
  (and=> (hash-ref (data-file->line-counts data) file)
         (lambda (line-counts)
           (hash-fold alist-cons '() line-counts))))

(define (instrumented-source-files data)
  "Return the list of `instrumented' source files, i.e., source files whose code
was loaded at the time DATA was collected."
  (hash-fold (lambda (file counts files)
               (cons file files))
             '()
             (data-file->line-counts data)))


;;;
;;; LCOV output.
;;;

(define* (coverage-data->lcov data port #:key (modules #f))
  "Traverse code coverage information DATA, as obtained with
`with-code-coverage', and write coverage information in the LCOV format to PORT.
The report will include all the modules loaded at the time coverage data was
gathered, even if their code was not executed."

  ;; FIXME: Re-enable this code, but using for-each-elf-symbol on each source
  ;; chunk.  Use that to build a map of file -> proc-addr + line + name.  Then
  ;; use something like procedure-execution-count to get the execution count.
  #;
  (define (dump-function proc)
    ;; Dump source location and basic coverage data for PROC.
    (and (or (program? proc))
         (let ((sources (program-sources* data proc)))
           (and (pair? sources)
                (let* ((line (source:line-for-user (car sources)))
                       (name (or (procedure-name proc)
                                 (format #f "anonymous-l~a" line))))
                  (format port "FN:~A,~A~%" line name)
                  (and=> (procedure-execution-count data proc)
                         (lambda (count)
                           (format port "FNDA:~A,~A~%" count name))))))))

  ;; Output per-file coverage data.
  (format port "TN:~%")
  (define source-files
    (filter
     (lambda (file)
       (or (not modules) (member file modules)))
     (instrumented-source-files data)))

  (for-each (lambda (file)
              (let ((path (search-path %load-path file)))
                (if (string? path)
                    (begin
                      (format port "SF:~A~%" path)
                      #;
                      (for-each dump-function procs)
                      (for-each (lambda (line+count)
                                  (let ((line  (car line+count))
                                        (count (cdr line+count)))
                                    (format port "DA:~A,~A~%"
                                            (+ 1 line) count)))
                                (line-execution-counts data file))
                      (let-values (((instr exec)
                                    (instrumented/executed-lines data file)))
                        (format port "LH: ~A~%" exec)
                        (format port "LF: ~A~%" instr))
                      (format port "end_of_record~%"))
                    (begin
                      (format (current-error-port)
                              "skipping source file: ~a~%"
                              file)))))
            source-files))