summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration/backfill_project_repositories.rb
blob: c8d83cc1803def1b89390ccf050a7a334cc73f06 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    # Class that will create fill the project_repositories table
    # for projects an entry is is missing in this table.
    class BackfillProjectRepositories
      OrphanedNamespaceError = Class.new(StandardError)

      # Shard model
      class Shard < ActiveRecord::Base
        self.table_name = 'shards'
      end

      # Class that will find or create the shard by name.
      # There is only a small set of shards, which would
      # not change quickly, so look them up from memory
      # instead of hitting the DB each time.
      class ShardFinder
        def find_shard_id(name)
          shard_id = shards.fetch(name, nil)
          return shard_id if shard_id.present?

          Shard.transaction(requires_new: true) do
            create!(name)
          end
        rescue ActiveRecord::RecordNotUnique
          reload!
          retry
        end

        private

        def create!(name)
          Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
        end

        def shards
          @shards ||= reload!
        end

        def reload!
          @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
        end
      end

      module Storage
        # Class that returns the disk path for a project using hashed storage
        class HashedProject
          attr_accessor :project

          ROOT_PATH_PREFIX = '@hashed'

          def initialize(project)
            @project = project
          end

          def disk_path
            "#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
          end

          def disk_hash
            @disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s)
          end
        end

        # Class that returns the disk path for a project using legacy storage
        class LegacyProject
          attr_accessor :project

          def initialize(project)
            @project = project
          end

          def disk_path
            project.full_path
          end
        end
      end

      # Concern used by Project and Namespace to determine the full route to the project
      module Routable
        extend ActiveSupport::Concern

        def full_path
          route&.path || build_full_path
        end

        def build_full_path
          return path unless has_parent?

          raise OrphanedNamespaceError if parent.nil?

          parent.full_path + '/' + path
        end

        def has_parent?
          read_attribute(association(:parent).reflection.foreign_key)
        end
      end

      # Route model
      class Route < ActiveRecord::Base
        belongs_to :source, inverse_of: :route, polymorphic: true
      end

      # Namespace model
      class Namespace < ActiveRecord::Base
        self.table_name = 'namespaces'
        self.inheritance_column = nil

        include Routable

        belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces'

        has_one :route, -> { where(source_type: 'Namespace') }, inverse_of: :source, foreign_key: :source_id

        has_many :projects, inverse_of: :parent
        has_many :namespaces, inverse_of: :parent
      end

      # ProjectRegistry model
      class ProjectRepository < ActiveRecord::Base
        self.table_name = 'project_repositories'

        belongs_to :project, inverse_of: :project_repository
      end

      # Project model
      class Project < ActiveRecord::Base
        self.table_name = 'projects'

        include Routable

        HASHED_STORAGE_FEATURES = {
          repository: 1,
          attachments: 2
        }.freeze

        scope :with_parent, -> { includes(:parent) }

        belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects'

        has_one :route, -> { where(source_type: 'Project') }, inverse_of: :source, foreign_key: :source_id
        has_one :project_repository, inverse_of: :project

        delegate :disk_path, to: :storage

        class << self
          def on_hashed_storage
            where(Project.arel_table[:storage_version]
              .gteq(HASHED_STORAGE_FEATURES[:repository]))
          end

          def on_legacy_storage
            where(Project.arel_table[:storage_version].eq(nil)
              .or(Project.arel_table[:storage_version].eq(0)))
          end

          def without_project_repository
            joins(left_outer_join_project_repository)
              .where(ProjectRepository.arel_table[:project_id].eq(nil))
          end

          def left_outer_join_project_repository
            projects_table = Project.arel_table
            repository_table = ProjectRepository.arel_table

            projects_table
              .join(repository_table, Arel::Nodes::OuterJoin)
              .on(projects_table[:id].eq(repository_table[:project_id]))
              .join_sources
          end
        end

        def storage
          @storage ||=
            if hashed_storage?
              Storage::HashedProject.new(self)
            else
              Storage::LegacyProject.new(self)
            end
        end

        def hashed_storage?
          self.storage_version &&
            self.storage_version >= HASHED_STORAGE_FEATURES[:repository]
        end
      end

      def perform(start_id, stop_id)
        Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
      end

      private

      def projects
        raise NotImplementedError,
          "#{self.class} does not implement #{__method__}"
      end

      def project_repositories(start_id, stop_id)
        projects
          .without_project_repository
          .includes(:route, parent: [:route]).references(:routes)
          .includes(:parent).references(:namespaces)
          .where(id: start_id..stop_id)
          .map { |project| build_attributes_for_project(project) }
          .compact
      end

      def build_attributes_for_project(project)
        {
          project_id: project.id,
          shard_id:   find_shard_id(project.repository_storage),
          disk_path:  project.disk_path
        }
      end

      def find_shard_id(repository_storage)
        shard_finder.find_shard_id(repository_storage)
      end

      def shard_finder
        @shard_finder ||= ShardFinder.new
      end
    end
  end
end