summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration/backfill_project_fullpath_in_repo_config.rb
blob: 3c142327e94989448bfa7a13076cd57890d7a650 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    # This module is used to write the full path of all projects to
    # the git repository config file.
    # Storing the full project path in the git config allows admins to
    # easily identify a project when it is using hashed storage.
    module BackfillProjectFullpathInRepoConfig
      OrphanedNamespaceError = Class.new(StandardError)

      module Storage
        # Class that returns the disk path for a project using hashed storage
        class HashedProject
          attr_accessor :project

          ROOT_PATH_PREFIX = '@hashed'

          def initialize(project)
            @project = project
          end

          def disk_path
            "#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
          end

          def disk_hash
            @disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s) if project.id
          end
        end

        # Class that returns the disk path for a project using legacy storage
        class LegacyProject
          attr_accessor :project

          def initialize(project)
            @project = project
          end

          def disk_path
            project.full_path
          end
        end
      end

      # Concern used by Project and Namespace to determine the full
      # route to the project
      module Routable
        extend ActiveSupport::Concern

        def full_path
          @full_path ||= build_full_path
        end

        def build_full_path
          return path unless has_parent?

          raise OrphanedNamespaceError if parent.nil?

          parent.full_path + '/' + path
        end

        def has_parent?
          read_attribute(association(:parent).reflection.foreign_key)
        end
      end

      # Class used to interact with repository using Gitaly
      class Repository
        attr_reader :storage

        def initialize(storage, relative_path)
          @storage = storage
          @relative_path = relative_path
        end

        def gitaly_repository
          Gitaly::Repository.new(storage_name: @storage, relative_path: @relative_path)
        end
      end

      # Namespace can be a user or group. It can be the root or a
      # child of another namespace.
      class Namespace < ActiveRecord::Base
        self.table_name = 'namespaces'
        self.inheritance_column = nil

        include Routable

        belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces'
        has_many :projects, inverse_of: :parent
        has_many :namespaces, inverse_of: :parent
      end

      # Project is where the repository (etc.) is stored
      class Project < ActiveRecord::Base
        self.table_name = 'projects'

        include Routable
        include EachBatch

        FULLPATH_CONFIG_KEY = 'gitlab.fullpath'

        belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects'
        delegate :disk_path, to: :storage

        def add_fullpath_config
          entries = { FULLPATH_CONFIG_KEY => full_path }

          repository_service.set_config(entries)
        end

        def remove_fullpath_config
          repository_service.delete_config([FULLPATH_CONFIG_KEY])
        end

        def cleanup_repository
          repository_service.cleanup
        end

        def storage
          @storage ||=
            if hashed_storage?
              Storage::HashedProject.new(self)
            else
              Storage::LegacyProject.new(self)
            end
        end

        def hashed_storage?
          self.storage_version && self.storage_version >= 1
        end

        def repository
          @repository ||= Repository.new(repository_storage, disk_path + '.git')
        end

        def repository_service
          @repository_service ||= Gitlab::GitalyClient::RepositoryService.new(repository)
        end
      end

      # Base class for Up and Down migration classes
      class BackfillFullpathMigration
        RETRY_DELAY = 15.minutes
        MAX_RETRIES = 2

        # Base class for retrying one project
        class BaseRetryOne
          def perform(project_id, retry_count)
            project = Project.find(project_id)

            return unless project

            migration_class.new.safe_perform_one(project, retry_count)
          end
        end

        def perform(start_id, end_id)
          Project.includes(:parent).where(id: start_id..end_id).each do |project|
            safe_perform_one(project)
          end
        end

        def safe_perform_one(project, retry_count = 0)
          perform_one(project)
        rescue GRPC::NotFound, GRPC::InvalidArgument, OrphanedNamespaceError
          nil
        rescue GRPC::BadStatus
          schedule_retry(project, retry_count + 1) if retry_count < MAX_RETRIES
        end

        def schedule_retry(project, retry_count)
          # Constants provided to BackgroundMigrationWorker must be within the
          # scope of Gitlab::BackgroundMigration
          retry_class_name = self.class::RetryOne.name.sub('Gitlab::BackgroundMigration::', '')

          BackgroundMigrationWorker.perform_in(RETRY_DELAY, retry_class_name, [project.id, retry_count])
        end
      end

      # Class to add the fullpath to the git repo config
      class Up < BackfillFullpathMigration
        # Class used to retry
        class RetryOne < BaseRetryOne
          def migration_class
            Up
          end
        end

        def perform_one(project)
          project.cleanup_repository
          project.add_fullpath_config
        end
      end

      # Class to rollback adding the fullpath to the git repo config
      class Down < BackfillFullpathMigration
        # Class used to retry
        class RetryOne < BaseRetryOne
          def migration_class
            Down
          end
        end

        def perform_one(project)
          project.cleanup_repository
          project.remove_fullpath_config
        end
      end
    end
  end
end