summaryrefslogtreecommitdiff
path: root/app/services/projects/cleanup_service.rb
blob: 75be3425029aa230b920b94df58191ccf7ab21ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# frozen_string_literal: true

module Projects
  # The CleanupService removes data from the project repository following a
  # BFG rewrite: https://rtyley.github.io/bfg-repo-cleaner/
  #
  # Before executing this service, all refs rewritten by BFG should have been
  # pushed to the repository
  class CleanupService < BaseService
    NoUploadError = StandardError.new("Couldn't find uploaded object map")

    include Gitlab::Utils::StrongMemoize

    class << self
      def enqueue(project, current_user, bfg_object_map)
        Projects::UpdateService.new(project, current_user, bfg_object_map: bfg_object_map).execute.tap do |result|
          next unless result[:status] == :success

          project.set_repository_read_only!
          RepositoryCleanupWorker.perform_async(project.id, current_user.id)
        end
      rescue Project::RepositoryReadOnlyError => err
        { status: :error, message: (_('Failed to make repository read-only. %{reason}') % { reason: err.message }) }
      end

      def cleanup_after(project)
        project.bfg_object_map.remove!
        project.set_repository_writable!
      end
    end

    # Attempt to clean up the project following the push. Warning: this is
    # destructive!
    #
    # path is the path of an upload of a BFG object map file. It contains a line
    # per rewritten object, with the old and new SHAs space-separated. It can be
    # used to update or remove content that references the objects that BFG has
    # altered
    def execute
      apply_bfg_object_map!

      # Remove older objects that are no longer referenced
      Projects::GitGarbageCollectWorker.new.perform(project.id, :prune, "project_cleanup:gc:#{project.id}")

      # The cache may now be inaccurate, and holding onto it could prevent
      # bugs assuming the presence of some object from manifesting for some
      # time. Better to feel the pain immediately.
      project.repository.expire_all_method_caches

      self.class.cleanup_after(project)
    end

    private

    def apply_bfg_object_map!
      raise NoUploadError unless project.bfg_object_map.exists?

      project.bfg_object_map.open do |io|
        repository_cleaner.apply_bfg_object_map_stream(io) do |response|
          cleanup_diffs(response)
        end
      end
    end

    def cleanup_diffs(response)
      old_commit_shas = extract_old_commit_shas(response.entries)

      ApplicationRecord.transaction do
        cleanup_merge_request_diffs(old_commit_shas)
        cleanup_note_diff_files(old_commit_shas)
      end
    end

    def extract_old_commit_shas(batch)
      batch.lazy.select { |entry| entry.type == :COMMIT }.map(&:old_oid).force
    end

    def cleanup_merge_request_diffs(old_commit_shas)
      merge_request_diffs = MergeRequestDiff
        .by_project_id(project.id)
        .by_commit_sha(old_commit_shas)

      # It's important to run the ActiveRecord callbacks here
      merge_request_diffs.destroy_all # rubocop:disable Cop/DestroyAll

      # TODO: ensure the highlight cache is removed immediately. It's too hard
      # to calculate the Redis keys at present.
      #
      # https://gitlab.com/gitlab-org/gitlab-foss/issues/61115
    end

    def cleanup_note_diff_files(old_commit_shas)
      # Pluck the IDs instead of running the query twice to ensure we clear the
      # cache for exactly the note diffs we remove
      ids = NoteDiffFile
        .referencing_sha(old_commit_shas, project_id: project.id)
        .pluck_primary_key

      NoteDiffFile.id_in(ids).delete_all

      # A highlighted version of the diff is stored in redis. Remove it now.
      Gitlab::DiscussionsDiff::HighlightCache.clear_multiple(ids)
    end

    def repository_cleaner
      @repository_cleaner ||= Gitlab::Git::RepositoryCleaner.new(repository.raw)
    end
  end
end

Projects::CleanupService.prepend_mod_with('Projects::CleanupService')