summaryrefslogtreecommitdiff
path: root/lib/gitlab/memory/reporter.rb
blob: 5effafc9f5b1e3939aa02ceff987474cd5325ab1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# frozen_string_literal: true

module Gitlab
  module Memory
    class Reporter
      COMPRESS_CMD = %w[gzip --fast].freeze

      attr_reader :reports_path

      def initialize(reports_path: nil, logger: Gitlab::AppLogger)
        @reports_path = reports_path || ENV["GITLAB_DIAGNOSTIC_REPORTS_PATH"] || Dir.mktmpdir
        @logger = logger

        @worker_id = ::Prometheus::PidProvider.worker_id
        @worker_uuid = SecureRandom.uuid

        init_prometheus_metrics
      end

      def run_report(report)
        return false unless report.active?

        @logger.info(
          log_labels(
            message: 'started',
            perf_report: report.name
          ))

        start_monotonic_time = Gitlab::Metrics::System.monotonic_time
        start_thread_cpu_time = Gitlab::Metrics::System.thread_cpu_time

        report_file = store_report(report)

        cpu_s = Gitlab::Metrics::System.thread_cpu_duration(start_thread_cpu_time)
        duration_s = Gitlab::Metrics::System.monotonic_time - start_monotonic_time

        @logger.info(
          log_labels(
            message: 'finished',
            perf_report: report.name,
            cpu_s: cpu_s.round(2),
            duration_s: duration_s.round(2),
            perf_report_file: report_file,
            perf_report_size_bytes: file_size(report_file)
          ))

        @report_duration_counter.increment({ report: report.name }, duration_s)

        true
      rescue StandardError => e
        @logger.error(
          log_labels(
            message: 'failed',
            perf_report: report.name,
            error: e.inspect
          ))

        false
      end

      private

      def store_report(report)
        # Store report in tmp subdir while it is still streaming.
        # This will clearly separate finished reports from the files we are still writing to.
        tmp_dir = File.join(@reports_path, 'tmp')
        FileUtils.mkdir_p(tmp_dir)

        report_file = file_name(report)
        tmp_file_path = File.join(tmp_dir, report_file)

        write_heap_dump_file(report, tmp_file_path)

        File.join(@reports_path, report_file).tap do |report_file_path|
          FileUtils.mv(tmp_file_path, report_file_path)
        end
      end

      def write_heap_dump_file(report, path)
        io_r, io_w = IO.pipe
        err_r, err_w = IO.pipe
        pid = nil
        status = nil
        File.open(path, 'wb') do |file|
          extras = {
            in: io_r,
            out: file,
            err: err_w
          }
          pid = Process.spawn(*COMPRESS_CMD, **extras)
          io_r.close
          err_w.close

          report.run(io_w)
          io_w.close

          _, status = Process.wait2(pid)
        end

        errors = err_r.read&.strip
        err_r.close
        raise StandardError, "exit #{status.exitstatus}: #{errors}" if !status&.success? && errors.present?
      ensure
        [io_r, io_w, err_r, err_w].each(&:close)
        # Make sure we don't leave any running processes behind.
        Gitlab::ProcessManagement.signal(pid, :KILL) if pid
      end

      def log_labels(**extra_labels)
        {
          pid: $$,
          worker_id: @worker_id,
          perf_report_worker_uuid: @worker_uuid
        }.merge(extra_labels)
      end

      def file_name(report)
        timestamp = Time.current.strftime('%Y-%m-%d.%H:%M:%S:%L')

        report_id = [@worker_id, @worker_uuid].join(".")

        [report.name, timestamp, report_id, 'gz'].compact_blank.join('.')
      end

      def file_size(file_path)
        File.size(file_path.to_s)
      rescue Errno::ENOENT
        0
      end

      def init_prometheus_metrics
        default_labels = { pid: @worker_id }

        @report_duration_counter = Gitlab::Metrics.counter(
          :gitlab_diag_report_duration_seconds_total,
          'Total time elapsed for running diagnostic report',
          default_labels
        )
      end
    end
  end
end