1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
# frozen_string_literal: true
module Gitlab
module Cleanup
class OrphanJobArtifactFiles
include Gitlab::Utils::StrongMemoize
ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze
LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze
BATCH_SIZE = 500
DEFAULT_NICENESS = 'best-effort'
VALID_NICENESS_LEVELS = %w{none realtime best-effort idle}.freeze
attr_accessor :batch, :total_found, :total_cleaned
attr_reader :dry_run, :niceness, :logger
def initialize(dry_run: true, niceness: nil, logger: nil)
@dry_run = dry_run
@niceness = (niceness || DEFAULT_NICENESS).downcase
@logger = logger || Gitlab::AppLogger
@total_found = @total_cleaned = 0
new_batch!
end
def run!
log_info('Looking for orphan job artifacts to clean up')
find_artifacts do |artifact_file|
batch << artifact_file
clean_batch! if batch.full?
if limit_reached?
log_info("Exiting due to reaching limit of #{limit}.")
break
end
end
clean_batch!
log_info("Processed #{total_found} job artifact(s) to find and cleaned #{total_cleaned} orphan(s).")
end
private
def new_batch!
self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch
.new(batch_size: batch_size, logger: logger, dry_run: dry_run)
end
def clean_batch!
batch.clean!
update_stats!(batch)
new_batch!
end
def update_stats!(batch)
self.total_found += batch.artifact_files.count
self.total_cleaned += batch.lost_and_found.count
end
def limit_reached?
return false unless limit
total_cleaned >= limit
end
def batch_size
return BATCH_SIZE unless limit
return if limit_reached?
todo = limit - total_cleaned
[BATCH_SIZE, todo].min
end
def find_artifacts
Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread|
stdout.each_line do |line|
yield line.chomp
end
log_error(stderr.read.color(:red)) unless status_thread.value.success?
end
end
def find_command
strong_memoize(:find_command) do
cmd = %W[find -L #{absolute_artifact_dir}]
# Search for Job Artifact IDs, they are found 6 directory
# levels deep. For example:
# shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log
# 1 2 3 4 5 6
# | | | ^- date | ^- Job Artifact ID
# | | | ^- Job ID
# ^--+--+- components of hashed storage project path
cmd += %w[-mindepth 6 -maxdepth 6]
# Artifact directories are named on their ID
cmd += %w[-type d]
if ionice
raise ArgumentError, 'Invalid niceness' unless VALID_NICENESS_LEVELS.include?(niceness)
cmd.unshift(*%W[#{ionice} -c #{niceness}])
end
log_info("find command: '#{cmd.join(' ')}'")
cmd
end
end
def absolute_artifact_dir
File.absolute_path(ABSOLUTE_ARTIFACT_DIR)
end
def ionice
strong_memoize(:ionice) do
Gitlab::Utils.which('ionice')
end
end
def log_info(msg, params = {})
logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
end
def log_error(msg, params = {})
logger.error(msg)
end
def limit
ENV['LIMIT']&.to_i
end
end
end
end
Gitlab::Cleanup::OrphanJobArtifactFiles.prepend_if_ee('EE::Gitlab::Cleanup::OrphanJobArtifactFiles')
|