diff options
author | Michael Kozono <mkozono@gmail.com> | 2018-07-26 14:23:33 -0700 |
---|---|---|
committer | Michael Kozono <mkozono@gmail.com> | 2018-07-31 11:09:42 -0700 |
commit | 3cbd8b13436be25074db17dfbd555588e917279d (patch) | |
tree | 483da69a036ae655db97ac56b056e0b811f25b15 /lib/gitlab/cleanup | |
parent | 400925c480d95df735862d7edd58cc36cb8fbf68 (diff) | |
download | gitlab-ce-3cbd8b13436be25074db17dfbd555588e917279d.tar.gz |
Add local project uploads cleanup task
Diffstat (limited to 'lib/gitlab/cleanup')
-rw-r--r-- | lib/gitlab/cleanup/project_upload_file_finder.rb | 66 | ||||
-rw-r--r-- | lib/gitlab/cleanup/project_uploads.rb | 125 |
2 files changed, 191 insertions, 0 deletions
diff --git a/lib/gitlab/cleanup/project_upload_file_finder.rb b/lib/gitlab/cleanup/project_upload_file_finder.rb new file mode 100644 index 00000000000..2ee8b60e76a --- /dev/null +++ b/lib/gitlab/cleanup/project_upload_file_finder.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + class ProjectUploadFileFinder + FIND_BATCH_SIZE = 500 + ABSOLUTE_UPLOAD_DIR = FileUploader.root.freeze + EXCLUDED_SYSTEM_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/-/*".freeze + EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze + EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze + + # Paths are relative to the upload directory + def each_file_batch(batch_size: FIND_BATCH_SIZE, &block) + cmd = build_find_command(ABSOLUTE_UPLOAD_DIR) + + Open3.popen2(*cmd) do |stdin, stdout, status_thread| + yield_paths_in_batches(stdout, batch_size, &block) + + raise "Find command failed" unless status_thread.value.success? + end + end + + private + + def yield_paths_in_batches(stdout, batch_size, &block) + paths = [] + + stdout.each_line("\0") do |line| + paths << line.chomp("\0") + + if paths.size >= batch_size + yield(paths) + paths = [] + end + end + + yield(paths) if paths.any? + end + + def build_find_command(search_dir) + cmd = %W[find -L #{search_dir} + -type f + ! ( -path #{EXCLUDED_SYSTEM_UPLOADS_PATH} -prune ) + ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) + ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) + -print0] + + ionice = which_ionice + cmd = %W[#{ionice} -c Idle] + cmd if ionice + + log_msg = "find command: \"#{cmd.join(' ')}\"" + Rails.logger.info log_msg + + cmd + end + + def which_ionice + Gitlab::Utils.which('ionice') + rescue StandardError + # In this case, returning false is relatively safe, + # even though it isn't very nice + false + end + end + end +end diff --git a/lib/gitlab/cleanup/project_uploads.rb b/lib/gitlab/cleanup/project_uploads.rb new file mode 100644 index 00000000000..b88e00311d5 --- /dev/null +++ b/lib/gitlab/cleanup/project_uploads.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + class ProjectUploads + LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found') + + attr_reader :logger + + def initialize(logger: nil) + @logger = logger || Rails.logger + end + + def run!(dry_run: true) + logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..." + + each_orphan_file do |path, upload_path| + result = cleanup(path, upload_path, dry_run) + + logger.info result + end + end + + private + + def cleanup(path, upload_path, dry_run) + # This happened in staging: + # `find` returned a path on which `File.delete` raised `Errno::ENOENT` + return "Cannot find file: #{path}" unless File.exist?(path) + + correct_path = upload_path && find_correct_path(upload_path) + + if correct_path + move(path, correct_path, 'fix', dry_run) + else + move_to_lost_and_found(path, dry_run) + end + end + + # Accepts a path in the form of "#{hex_secret}/#{filename}" + def find_correct_path(upload_path) + upload = Upload.find_by(uploader: 'FileUploader', path: upload_path) + return unless upload && upload.local? + + upload.absolute_path + rescue => e + logger.error e.message + + # absolute_path depends on a lot of code. If it doesn't work, then it + # it doesn't matter if the upload file is in the right place. Treat it + # as uncorrectable. + # I.e. the project record might be missing, which raises an exception. + nil + end + + def move_to_lost_and_found(path, dry_run) + new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND) + + move(path, new_path, 'move to lost and found', dry_run) + end + + def move(path, new_path, prefix, dry_run) + action = "#{prefix} #{path} -> #{new_path}" + + if dry_run + "Can #{action}" + else + begin + FileUtils.mkdir_p(File.dirname(new_path)) + FileUtils.mv(path, new_path) + + "Did #{action}" + rescue => e + "Error during #{action}: #{e.inspect}" + end + end + end + + # Yields absolute paths of project upload files that are not in the + # uploads table + def each_orphan_file + ProjectUploadFileFinder.new.each_file_batch do |file_paths| + logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}" + + file_paths.each do |path| + pup = ProjectUploadPath.from_path(path) + + yield(path, pup.upload_path) if pup.orphan? + end + end + end + + class ProjectUploadPath + PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze + + attr_reader :full_path, :upload_path + + def initialize(full_path, upload_path) + @full_path = full_path + @upload_path = upload_path + end + + def self.from_path(path) + path_matched = path.match(PROJECT_FULL_PATH_REGEX) + return new(nil, nil) unless path_matched + + new(path_matched[1], path_matched[2]) + end + + def orphan? + return true if full_path.nil? || upload_path.nil? + + # It's possible to reduce to one query, but `where_full_path_in` is complex + !Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader') + end + + private + + def project_id + @project_id ||= Project.where_full_path_in([full_path]).pluck(:id) + end + end + end + end +end |