1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# frozen_string_literal: true
module Gitlab
module Cleanup
class ProjectUploads
LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
attr_reader :logger
def initialize(logger: nil)
@logger = logger || Gitlab::AppLogger
end
def run!(dry_run: true)
logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
each_orphan_file do |path, upload_path|
result = cleanup(path, upload_path, dry_run)
logger.info result
end
end
private
def cleanup(path, upload_path, dry_run)
# This happened in staging:
# `find` returned a path on which `File.delete` raised `Errno::ENOENT`
return "Cannot find file: #{path}" unless File.exist?(path)
correct_path = upload_path && find_correct_path(upload_path)
if correct_path
move(path, correct_path, 'fix', dry_run)
else
move_to_lost_and_found(path, dry_run)
end
end
# Accepts a path in the form of "#{hex_secret}/#{filename}"
# rubocop: disable CodeReuse/ActiveRecord
def find_correct_path(upload_path)
upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
return unless upload && upload.local? && upload.model
upload.absolute_path
rescue StandardError => e
logger.error e.message
# absolute_path depends on a lot of code. If it doesn't work, then it
# it doesn't matter if the upload file is in the right place. Treat it
# as uncorrectable.
# I.e. the project record might be missing, which raises an exception.
nil
end
# rubocop: enable CodeReuse/ActiveRecord
def move_to_lost_and_found(path, dry_run)
new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/o, LOST_AND_FOUND)
move(path, new_path, 'move to lost and found', dry_run)
end
def move(path, new_path, prefix, dry_run)
action = "#{prefix} #{path} -> #{new_path}"
if dry_run
"Can #{action}"
else
begin
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(path, new_path)
"Did #{action}"
rescue StandardError => e
"Error during #{action}: #{e.inspect}"
end
end
end
# Yields absolute paths of project upload files that are not in the
# uploads table
def each_orphan_file
ProjectUploadFileFinder.new.each_file_batch do |file_paths|
logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
file_paths.each do |path|
pup = ProjectUploadPath.from_path(path)
yield(path, pup.upload_path) if pup.orphan?
end
end
end
class ProjectUploadPath
PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
attr_reader :full_path, :upload_path
def initialize(full_path, upload_path)
@full_path = full_path
@upload_path = upload_path
end
def self.from_path(path)
path_matched = path.match(PROJECT_FULL_PATH_REGEX)
return new(nil, nil) unless path_matched
new(path_matched[1], path_matched[2])
end
# rubocop: disable CodeReuse/ActiveRecord
def orphan?
return true if full_path.nil? || upload_path.nil?
# It's possible to reduce to one query, but `where_full_path_in` is complex
!Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
end
# rubocop: enable CodeReuse/ActiveRecord
private
# rubocop: disable CodeReuse/ActiveRecord
def project_id
@project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end
|