summaryrefslogtreecommitdiff
path: root/lib/gitlab/memory/reports/jemalloc_stats.rb
blob: b99bec4ac3e68d8036321b3d1bcd5d2805848db6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# frozen_string_literal: true

module Gitlab
  module Memory
    module Reports
      class JemallocStats
        # On prod, Jemalloc reports sizes were ~2.5 MB:
        # https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/15993#note_1014767214
        # We configured 1GB emptyDir per pod:
        # https://gitlab.com/gitlab-com/gl-infra/k8s-workloads/gitlab-com/-/merge_requests/1949
        # The pod will be evicted when the size limit is exceeded. We never want this to happen, for availability.
        #
        # With the default, we have a headroom (250*2.5MB=625<1000 MB) to fit into configured emptyDir.
        # It would allow us to keep 3+ days worth of reports for 6 workers running every 2 hours: 3*6*12=216<250
        #
        # The cleanup logic will be redundant after we'll implement the uploads, which would perform the cleanup.
        DEFAULT_MAX_REPORTS_STORED = 250

        def initialize(reports_path:)
          @reports_path = reports_path
        end

        def run
          return unless active?

          Gitlab::Memory::Jemalloc.dump_stats(path: reports_path, filename_label: worker_id).tap { cleanup }
        end

        def active?
          Feature.enabled?(:report_jemalloc_stats, type: :ops)
        end

        private

        attr_reader :reports_path

        def cleanup
          reports_files_modified_order[0...-max_reports_stored].each do |f|
            File.unlink(f) if File.exist?(f)
          rescue Errno::ENOENT
            # Path does not exist: Ignore. We already check `File.exist?`
            # Rescue to be extra safe, because each worker could perform a cleanup
          end
        end

        def reports_files_modified_order
          pattern = File.join(reports_path, "#{Gitlab::Memory::Jemalloc::FILENAME_PREFIX}*")

          Dir.glob(pattern).sort_by do |f|
            test('M', f)
          rescue Errno::ENOENT
            # Path does not exist: Return any timestamp to proceed with the sort
            Time.current
          end
        end

        def worker_id
          ::Prometheus::PidProvider.worker_id
        end

        def max_reports_stored
          ENV["GITLAB_DIAGNOSTIC_REPORTS_JEMALLOC_MAX_REPORTS_STORED"] || DEFAULT_MAX_REPORTS_STORED
        end
      end
    end
  end
end