summaryrefslogtreecommitdiff
path: root/lib/gitlab/cleanup/project_uploads.rb
blob: 77231665e7ea3234b0aed91078d2e37a43f3d7e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# frozen_string_literal: true

module Gitlab
  module Cleanup
    class ProjectUploads
      LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')

      attr_reader :logger

      def initialize(logger: nil)
        @logger = logger || Gitlab::AppLogger
      end

      def run!(dry_run: true)
        logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."

        each_orphan_file do |path, upload_path|
          result = cleanup(path, upload_path, dry_run)

          logger.info result
        end
      end

      private

      def cleanup(path, upload_path, dry_run)
        # This happened in staging:
        # `find` returned a path on which `File.delete` raised `Errno::ENOENT`
        return "Cannot find file: #{path}" unless File.exist?(path)

        correct_path = upload_path && find_correct_path(upload_path)

        if correct_path
          move(path, correct_path, 'fix', dry_run)
        else
          move_to_lost_and_found(path, dry_run)
        end
      end

      # Accepts a path in the form of "#{hex_secret}/#{filename}"
      # rubocop: disable CodeReuse/ActiveRecord
      def find_correct_path(upload_path)
        upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
        return unless upload && upload.local? && upload.model

        upload.absolute_path
      rescue => e
        logger.error e.message

        # absolute_path depends on a lot of code. If it doesn't work, then it
        # it doesn't matter if the upload file is in the right place. Treat it
        # as uncorrectable.
        # I.e. the project record might be missing, which raises an exception.
        nil
      end
      # rubocop: enable CodeReuse/ActiveRecord

      def move_to_lost_and_found(path, dry_run)
        new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)

        move(path, new_path, 'move to lost and found', dry_run)
      end

      def move(path, new_path, prefix, dry_run)
        action = "#{prefix} #{path} -> #{new_path}"

        if dry_run
          "Can #{action}"
        else
          begin
            FileUtils.mkdir_p(File.dirname(new_path))
            FileUtils.mv(path, new_path)

            "Did #{action}"
          rescue => e
            "Error during #{action}: #{e.inspect}"
          end
        end
      end

      # Yields absolute paths of project upload files that are not in the
      # uploads table
      def each_orphan_file
        ProjectUploadFileFinder.new.each_file_batch do |file_paths|
          logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"

          file_paths.each do |path|
            pup = ProjectUploadPath.from_path(path)

            yield(path, pup.upload_path) if pup.orphan?
          end
        end
      end

      class ProjectUploadPath
        PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze

        attr_reader :full_path, :upload_path

        def initialize(full_path, upload_path)
          @full_path = full_path
          @upload_path = upload_path
        end

        def self.from_path(path)
          path_matched = path.match(PROJECT_FULL_PATH_REGEX)
          return new(nil, nil) unless path_matched

          new(path_matched[1], path_matched[2])
        end

        # rubocop: disable CodeReuse/ActiveRecord
        def orphan?
          return true if full_path.nil? || upload_path.nil?

          # It's possible to reduce to one query, but `where_full_path_in` is complex
          !Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
        end
        # rubocop: enable CodeReuse/ActiveRecord

        private

        # rubocop: disable CodeReuse/ActiveRecord
        def project_id
          @project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
        end
        # rubocop: enable CodeReuse/ActiveRecord
      end
    end
  end
end