1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
# frozen_string_literal: true
module Gitlab
module Cleanup
class OrphanJobArtifactFiles
include Gitlab::Utils::StrongMemoize
ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze
LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze
BATCH_SIZE = 500
DEFAULT_NICENESS = 'Best-effort'
attr_accessor :batch, :total_found, :total_cleaned
attr_reader :limit, :dry_run, :niceness, :logger
def initialize(limit: nil, dry_run: true, niceness: nil, logger: nil)
@limit = limit
@dry_run = dry_run
@niceness = niceness || DEFAULT_NICENESS
@logger = logger || Rails.logger
@total_found = @total_cleaned = 0
new_batch!
end
def run!
log_info('Looking for orphan job artifacts to clean up')
find_artifacts do |artifact_file|
batch << artifact_file
clean_batch! if batch.full?
break if limit_reached?
end
clean_batch!
log_info("Processed #{total_found} job artifacts to find and clean #{total_cleaned} orphans.")
end
private
def new_batch!
self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch
.new(batch_size: batch_size, logger: logger, dry_run: dry_run)
end
def clean_batch!
batch.clean!
update_stats!(batch)
new_batch!
end
def update_stats!(batch)
self.total_found += batch.artifact_files.count
self.total_cleaned += batch.lost_and_found.count
end
def limit_reached?
return false unless limit
total_cleaned >= limit
end
def batch_size
return BATCH_SIZE unless limit
return if limit_reached?
todo = limit - total_cleaned
[BATCH_SIZE, todo].min
end
def find_artifacts
Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread|
stdout.each_line do |line|
yield line
end
log_error(stderr.read.color(:red)) unless status_thread.value.success?
end
end
def find_command
strong_memoize(:find_command) do
cmd = %W[find -L #{absolute_artifact_dir}]
# Search for Job Artifact IDs, they are found 6 directory
# levels deep. For example:
# shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log
# 1 2 3 4 5 6
# | | | ^- date | ^- Job Artifact ID
# | | | ^- Job ID
# ^--+--+- components of hashed storage project path
cmd += %w[-mindepth 6 -maxdepth 6]
# Artifact directories are named on their ID
cmd += %w[-type d]
if ionice
raise ArgumentError, 'Invalid niceness' unless niceness.match?(/^\w[\w\-]*$/)
cmd.unshift(*%W[#{ionice} --class #{niceness}])
end
log_info("find command: '#{cmd.join(' ')}'")
cmd
end
end
def absolute_artifact_dir
File.absolute_path(ABSOLUTE_ARTIFACT_DIR)
end
def ionice
strong_memoize(:ionice) do
Gitlab::Utils.which('ionice')
end
end
def log_info(msg, params = {})
logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
end
def log_error(msg, params = {})
logger.error(msg)
end
end
end
end
|