summaryrefslogtreecommitdiff
path: root/lib/gitlab/cleanup/orphan_job_artifact_files.rb
blob: 05dfdcd448647ad03a841c4465ee7486d466221c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# frozen_string_literal: true

module Gitlab
  module Cleanup
    class OrphanJobArtifactFiles
      include Gitlab::Utils::StrongMemoize

      ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze
      LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze
      BATCH_SIZE = 500
      DEFAULT_NICENESS = 'best-effort'
      VALID_NICENESS_LEVELS = %w{none realtime best-effort idle}.freeze

      attr_accessor :batch, :total_found, :total_cleaned
      attr_reader :dry_run, :niceness, :logger

      def initialize(dry_run: true, niceness: nil, logger: nil)
        @dry_run = dry_run
        @niceness = (niceness || DEFAULT_NICENESS).downcase
        @logger = logger || Gitlab::AppLogger
        @total_found = @total_cleaned = 0

        new_batch!
      end

      def run!
        log_info('Looking for orphan job artifacts to clean up')

        find_artifacts do |artifact_file|
          batch << artifact_file

          clean_batch! if batch.full?

          if limit_reached?
            log_info("Exiting due to reaching limit of #{limit}.")
            break
          end
        end

        clean_batch!

        log_info("Processed #{total_found} job artifact(s) to find and cleaned #{total_cleaned} orphan(s).")
      end

      private

      def new_batch!
        self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch
          .new(batch_size: batch_size, logger: logger, dry_run: dry_run)
      end

      def clean_batch!
        batch.clean!

        update_stats!(batch)

        new_batch!
      end

      def update_stats!(batch)
        self.total_found += batch.artifact_files.count
        self.total_cleaned += batch.lost_and_found.count
      end

      def limit_reached?
        return false unless limit

        total_cleaned >= limit
      end

      def batch_size
        return BATCH_SIZE unless limit
        return if limit_reached?

        todo = limit - total_cleaned
        [BATCH_SIZE, todo].min
      end

      def find_artifacts
        Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread|
          stdout.each_line do |line|
            yield line.chomp
          end

          log_error(stderr.read.color(:red)) unless status_thread.value.success?
        end
      end

      def find_command
        strong_memoize(:find_command) do
          cmd = %W[find -L #{absolute_artifact_dir}]

          # Search for Job Artifact IDs, they are found 6 directory
          # levels deep. For example:
          # shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log
          #                  1  2  3       4          5   6
          #                  |  |  |       ^- date    |   ^- Job Artifact ID
          #                  |  |  |                  ^- Job ID
          #                  ^--+--+- components of hashed storage project path
          cmd += %w[-mindepth 6 -maxdepth 6]

          # Artifact directories are named on their ID
          cmd += %w[-type d]

          if ionice
            raise ArgumentError, 'Invalid niceness' unless VALID_NICENESS_LEVELS.include?(niceness)

            cmd.unshift(*%W[#{ionice} -c #{niceness}])
          end

          log_info("find command: '#{cmd.join(' ')}'")

          cmd
        end
      end

      def absolute_artifact_dir
        File.absolute_path(ABSOLUTE_ARTIFACT_DIR)
      end

      def ionice
        strong_memoize(:ionice) do
          Gitlab::Utils.which('ionice')
        end
      end

      def log_info(msg, params = {})
        logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
      end

      def log_error(msg, params = {})
        logger.error(msg)
      end

      def limit
        ENV['LIMIT']&.to_i
      end
    end
  end
end

Gitlab::Cleanup::OrphanJobArtifactFiles.prepend_mod_with('Gitlab::Cleanup::OrphanJobArtifactFiles')