summaryrefslogtreecommitdiff
path: root/lib/gitlab/cleanup/orphan_lfs_file_references.rb
blob: 14eac474e278eebf44c155664dd7013f3d0763fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# frozen_string_literal: true

module Gitlab
  module Cleanup
    class OrphanLfsFileReferences
      include Gitlab::Utils::StrongMemoize

      attr_reader :project, :dry_run, :logger, :limit

      DEFAULT_REMOVAL_LIMIT = 1000

      def initialize(project, dry_run: true, logger: nil, limit: nil)
        @project = project
        @dry_run = dry_run
        @logger = logger || Gitlab::AppLogger
        @limit = limit
      end

      def run!
        log_info("Looking for orphan LFS files for project #{project.name_with_namespace}")

        remove_orphan_references
      end

      private

      def remove_orphan_references
        invalid_references = project.lfs_objects_projects.lfs_object_in(orphan_objects)

        if dry_run
          log_info("Found invalid references: #{invalid_references.count}")
        else
          count = 0
          invalid_references.each_batch(of: limit || DEFAULT_REMOVAL_LIMIT) do |relation|
            count += relation.delete_all
          end

          ProjectCacheWorker.perform_async(project.id, [], [:lfs_objects_size])

          log_info("Removed invalid references: #{count}")
        end
      end

      def orphan_objects
        # Get these first so racing with a git push can't remove any LFS objects
        oids = project.lfs_objects_oids

        repos = [
          project.repository,
          project.design_repository,
          project.wiki.repository
        ].select(&:exists?)

        repos.flat_map do |repo|
          oids -= repo.gitaly_blob_client.get_all_lfs_pointers.map(&:lfs_oid)
        end

        # The remaining OIDs are not used by any repository, so are orphans
        LfsObject.for_oids(oids)
      end

      def log_info(msg)
        logger.info("#{'[DRY RUN] ' if dry_run}#{msg}")
      end
    end
  end
end