summaryrefslogtreecommitdiff
path: root/lib/gitlab/memory/reports_daemon.rb
blob: ed1da8baab5b6c35969fa3ed2fdf943ed9dcb226 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# frozen_string_literal: true

module Gitlab
  module Memory
    class ReportsDaemon < Daemon
      DEFAULT_SLEEP_S = 7200 # 2 hours
      DEFAULT_SLEEP_MAX_DELTA_S = 600 # 0..10 minutes
      DEFAULT_SLEEP_BETWEEN_REPORTS_S = 120 # 2 minutes

      DEFAULT_REPORTS_PATH = '/tmp'

      def initialize(**options)
        super

        @alive = true

        @sleep_s =
          ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_S']&.to_i || DEFAULT_SLEEP_S
        @sleep_max_delta_s =
          ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_MAX_DELTA_S']&.to_i || DEFAULT_SLEEP_MAX_DELTA_S
        @sleep_between_reports_s =
          ENV['GITLAB_DIAGNOSTIC_REPORTS_SLEEP_BETWEEN_REPORTS_S']&.to_i || DEFAULT_SLEEP_BETWEEN_REPORTS_S

        @reports_path =
          ENV["GITLAB_DIAGNOSTIC_REPORTS_PATH"] || DEFAULT_REPORTS_PATH

        @reports = [Gitlab::Memory::Reports::JemallocStats.new(reports_path: reports_path)]

        init_prometheus_metrics
      end

      attr_reader :sleep_s, :sleep_max_delta_s, :sleep_between_reports_s, :reports_path

      def run_thread
        while alive
          sleep interval_with_jitter

          reports.select(&:active?).each do |report|
            start_monotonic_time = Gitlab::Metrics::System.monotonic_time
            start_thread_cpu_time = Gitlab::Metrics::System.thread_cpu_time

            file_path = report.run

            cpu_s = Gitlab::Metrics::System.thread_cpu_duration(start_thread_cpu_time)
            duration_s = Gitlab::Metrics::System.monotonic_time - start_monotonic_time

            log_report(label: report_label(report), cpu_s: cpu_s, duration_s: duration_s, size: file_size(file_path))
            @report_duration_counter.increment({ report: report_label(report) }, duration_s)

            sleep sleep_between_reports_s
          end
        end
      end

      private

      attr_reader :alive, :reports

      # Returns the sleep interval with a random adjustment.
      # The random adjustment is put in place to ensure continued availability.
      def interval_with_jitter
        sleep_s + rand(sleep_max_delta_s)
      end

      def log_report(label:, duration_s:, cpu_s:, size:)
        Gitlab::AppLogger.info(
          message: 'finished',
          pid: $$,
          worker_id: worker_id,
          perf_report: label,
          duration_s: duration_s.round(2),
          cpu_s: cpu_s.round(2),
          perf_report_size_bytes: size
        )
      end

      def worker_id
        ::Prometheus::PidProvider.worker_id
      end

      def report_label(report)
        report.class.to_s.demodulize.underscore
      end

      def stop_working
        @alive = false
      end

      def init_prometheus_metrics
        default_labels = { pid: worker_id }

        @report_duration_counter = Gitlab::Metrics.counter(
          :gitlab_diag_report_duration_seconds_total,
          'Total time elapsed for running diagnostic report',
          default_labels
        )
      end

      def file_size(file_path)
        File.size(file_path.to_s)
      rescue Errno::ENOENT
        0
      end
    end
  end
end