summaryrefslogtreecommitdiff
path: root/lib/gitlab/cleanup/project_uploads.rb
blob: f55ab535efeda0db19676f986b4b82ed7e466fca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# frozen_string_literal: true

module Gitlab
  module Cleanup
    class ProjectUploads
      LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')

      attr_reader :logger

      def initialize(logger: nil)
        @logger = logger || Rails.logger
      end

      def run!(dry_run: true)
        logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."

        each_orphan_file do |path, upload_path|
          result = cleanup(path, upload_path, dry_run)

          logger.info result
        end
      end

      private

      def cleanup(path, upload_path, dry_run)
        # This happened in staging:
        # `find` returned a path on which `File.delete` raised `Errno::ENOENT`
        return "Cannot find file: #{path}" unless File.exist?(path)

        correct_path = upload_path && find_correct_path(upload_path)

        if correct_path
          move(path, correct_path, 'fix', dry_run)
        else
          move_to_lost_and_found(path, dry_run)
        end
      end

      # Accepts a path in the form of "#{hex_secret}/#{filename}"
      def find_correct_path(upload_path)
        upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
        return unless upload && upload.local? && upload.model

        upload.absolute_path
      rescue => e
        logger.error e.message

        # absolute_path depends on a lot of code. If it doesn't work, then it
        # it doesn't matter if the upload file is in the right place. Treat it
        # as uncorrectable.
        # I.e. the project record might be missing, which raises an exception.
        nil
      end

      def move_to_lost_and_found(path, dry_run)
        new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)

        move(path, new_path, 'move to lost and found', dry_run)
      end

      def move(path, new_path, prefix, dry_run)
        action = "#{prefix} #{path} -> #{new_path}"

        if dry_run
          "Can #{action}"
        else
          begin
            FileUtils.mkdir_p(File.dirname(new_path))
            FileUtils.mv(path, new_path)

            "Did #{action}"
          rescue => e
            "Error during #{action}: #{e.inspect}"
          end
        end
      end

      # Yields absolute paths of project upload files that are not in the
      # uploads table
      def each_orphan_file
        ProjectUploadFileFinder.new.each_file_batch do |file_paths|
          logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"

          file_paths.each do |path|
            pup = ProjectUploadPath.from_path(path)

            yield(path, pup.upload_path) if pup.orphan?
          end
        end
      end

      class ProjectUploadPath
        PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze

        attr_reader :full_path, :upload_path

        def initialize(full_path, upload_path)
          @full_path = full_path
          @upload_path = upload_path
        end

        def self.from_path(path)
          path_matched = path.match(PROJECT_FULL_PATH_REGEX)
          return new(nil, nil) unless path_matched

          new(path_matched[1], path_matched[2])
        end

        def orphan?
          return true if full_path.nil? || upload_path.nil?

          # It's possible to reduce to one query, but `where_full_path_in` is complex
          !Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
        end

        private

        def project_id
          @project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
        end
      end
    end
  end
end