src/intel/ds/intel_pps_driver.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

/*
 * Copyright © 2020-2021 Collabora, Ltd.
 * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
 *
 * SPDX-License-Identifier: MIT
 */

#pragma once

#include <pps/pps_driver.h>

extern "C" {
struct intel_perf_query_info;
};

namespace pps
{

class IntelPerf;

/// @brief Variable length sequence of bytes generated by Intel Obstervation Architecture (OA)
struct PerfRecord {
   /// Timestamp in the GPU clock domain
   uint64_t timestamp;

   /// drm_i915_perf_record_header + report data
   std::vector<uint8_t> data;
};

/// @brief PPS Driver implementation for Intel graphics devices.
/// When sampling it may collect multiple perf-records at once. Each perf-record holds multiple
/// counter values. Those values are continuously incremented by the GPU. In order to get a delta,
/// the driver computes an _accumulation_ (`last_perf_record - previous_perf_record`).
/// For optimization purposes, it might ignore some perf-records, considering only those
/// perf-records close to the boundary of the sampling period range.
class IntelDriver : public Driver
{
   public:
   IntelDriver();
   ~IntelDriver();

   uint64_t get_min_sampling_period_ns() override;
   bool init_perfcnt() override;
   void enable_counter(uint32_t counter_id) override;
   void enable_all_counters() override;
   void enable_perfcnt(uint64_t sampling_period_ns) override;
   void disable_perfcnt() override;
   bool dump_perfcnt() override;
   uint64_t next() override;
   uint32_t gpu_clock_id() const override;
   uint64_t gpu_timestamp() const override;

   private:
   /// @brief Requests the next perf sample
   /// @return The sample GPU timestamp
   uint64_t gpu_next();

   /// @param data Buffer of bytes to parse
   /// @param byte_count Number of bytes to parse
   /// @return A list of perf records parsed from raw data passed as input
   std::vector<PerfRecord> parse_perf_records(const std::vector<uint8_t> &data, size_t byte_count);

   /// @brief Reads data from the GPU metric set
   void read_data_from_metric_set();

   /// Sampling period in nanoseconds requested by the datasource
   uint64_t sampling_period_ns = 0;

   /// Last upper 32bits of the GPU timestamp in the parsed reports
   uint64_t gpu_timestamp_udw = 0;

   /// Keep track of the timestamp of the last sample generated (upper & lower
   /// 32bits)
   uint64_t last_gpu_timestamp = 0;

   /// Data buffer used to store data read from the metric set
   std::vector<uint8_t> metric_buffer = std::vector<uint8_t>(1024, 0);
   /// Number of bytes read so far still un-parsed.
   /// Reset once bytes from the metric buffer are parsed to perf records
   size_t total_bytes_read = 0;

   /// List of OA perf records read so far
   std::vector<PerfRecord> records;

   std::unique_ptr<IntelPerf> perf;

   // Gpu clock ID used to correlate GPU/CPU timestamps
   uint32_t clock_id = 0;

   // Selected query
   intel_perf_query_info *selected_query = nullptr;
};

} // namespace pps