summaryrefslogtreecommitdiff
path: root/app/workers/concerns/git_garbage_collect_methods.rb
blob: c5f8c9c84640fad2207c41a711fa430b9237bf20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# frozen_string_literal: true

module GitGarbageCollectMethods
  extend ActiveSupport::Concern

  included do
    include ApplicationWorker

    sidekiq_options retry: false
    feature_category :gitaly
    loggable_arguments 1, 2, 3
  end

  # Timeout set to 24h
  LEASE_TIMEOUT = 86400

  def perform(resource_id, task = :gc, lease_key = nil, lease_uuid = nil)
    resource = find_resource(resource_id)
    lease_key ||= default_lease_key(task, resource)
    active_uuid = get_lease_uuid(lease_key)

    if active_uuid
      return unless active_uuid == lease_uuid

      renew_lease(lease_key, active_uuid)
    else
      lease_uuid = try_obtain_lease(lease_key)

      return unless lease_uuid
    end

    task = task.to_sym

    before_gitaly_call(task, resource)
    gitaly_call(task, resource)

    # Refresh the branch cache in case garbage collection caused a ref lookup to fail
    flush_ref_caches(resource) if gc?(task)

    update_repository_statistics(resource, task)

    # In case pack files are deleted, release libgit2 cache and open file
    # descriptors ASAP instead of waiting for Ruby garbage collection
    resource.cleanup
  ensure
    cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present?
  end

  private

  def default_lease_key(task, resource)
    "git_gc:#{task}:#{resource.class.name.underscore.pluralize}:#{resource.id}"
  end

  def find_resource(id)
    raise NotImplementedError
  end

  def gc?(task)
    task == :gc || task == :prune
  end

  def try_obtain_lease(key)
    ::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain
  end

  def renew_lease(key, uuid)
    ::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew
  end

  def cancel_lease(key, uuid)
    ::Gitlab::ExclusiveLease.cancel(key, uuid)
  end

  def get_lease_uuid(key)
    ::Gitlab::ExclusiveLease.get_uuid(key)
  end

  def before_gitaly_call(task, resource)
    # no-op
  end

  def gitaly_call(task, resource)
    repository = resource.repository.raw_repository
    client = repository.gitaly_repository_client

    if task == :prune
      client.prune_unreachable_objects
    else
      client.optimize_repository
    end
  rescue GRPC::NotFound => e
    Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found")
    raise Gitlab::Git::Repository::NoRepository, e
  rescue GRPC::BadStatus => e
    Gitlab::GitLogger.error("#{__method__} failed:\n#{e}")
    raise Gitlab::Git::CommandError, e
  end

  def flush_ref_caches(resource)
    resource.repository.expire_branches_cache
    resource.repository.branch_names
    resource.repository.has_visible_content?
  end

  def update_repository_statistics(resource, task)
    resource.repository.expire_statistics_caches

    return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary

    stats_to_update = stats

    stats_to_update.delete(:repository_size) if task == :incremental_repack

    update_db_repository_statistics(resource, stats_to_update)
  end

  def update_db_repository_statistics(resource, stats)
    # no-op
  end

  def stats
    []
  end
end