diff options
author | Toon Claes <toon@gitlab.com> | 2018-11-30 17:03:29 +0100 |
---|---|---|
committer | Douglas Barbosa Alexandre <dbalexandre@gmail.com> | 2018-12-06 21:04:41 -0200 |
commit | 5237a55d62f8dcb021a041741b3f09cad7784a36 (patch) | |
tree | 8c543fb224f2fe19a89f8a6758dd86dd9da53321 /lib | |
parent | 0f338434b90b6e385c0c1947ff53c143dd7ed6be (diff) | |
download | gitlab-ce-5237a55d62f8dcb021a041741b3f09cad7784a36.tar.gz |
Fill project_repositories for hashed storage
This adds a background migration that will ensure all projects that
are on hashed storage have a row in `project_repositories`.
Related issue: https://gitlab.com/gitlab-org/gitlab-ce/issues/48527
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/background_migration/backfill_hashed_project_repositories.rb | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb new file mode 100644 index 00000000000..88696dd1aa6 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Class the will create rows in project_repositories for all + # projects that are on hashed storage + class BackfillHashedProjectRepositories + # Model for a Shard + class Shard < ActiveRecord::Base + self.table_name = 'shards' + + def self.by_name(name) + to_a.detect { |shard| shard.name == name } || create_by(name: name) + rescue ActiveRecord::RecordNotUnique + retry + end + end + + # Class that will find or create the shard by name. + # There is only a small set of shards, which would not change quickly, + # so look them up from memory instead of hitting the DB each time. + class ShardFinder + def find(name) + shards.detect { |shard| shard.name == name } || create!(name) + rescue ActiveRecord::RecordNotUnique + load! + retry + end + + private + + def create!(name) + Shard.create!(name: name).tap { |shard| @shards << shard } + end + + def shards + @shards || load! + end + + def load! + @shards = Shard.all.to_a + end + end + + # Model for a ProjectRepository + class ProjectRepository < ActiveRecord::Base + self.table_name = 'project_repositories' + + belongs_to :project, inverse_of: :project_repository + end + + # Model for a Project + class Project < ActiveRecord::Base + self.table_name = 'projects' + + HASHED_PATH_PREFIX = '@hashed' + HASHED_STORAGE_FEATURES = { + repository: 1, + attachments: 2 + }.freeze + + has_one :project_repository, inverse_of: :project + + class << self + def on_hashed_storage + where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[:repository])) + end + + def without_project_repository + cond = ProjectRepository.arel_table[:project_id].eq(nil) + left_outer_joins(:project_repository).where(cond) + end + + def left_outer_joins(relation) + return super if Gitlab.rails5? + + # TODO Rails 4? + end + end + + def project_repository_attributes(shard_finder) + return unless hashed_storage? + + { + project_id: id, + shard_id: shard_finder.find(repository_storage).id, + disk_path: hashed_disk_path + } + end + + private + + def hashed_storage? + self.storage_version && self.storage_version >= 1 + end + + def hashed_disk_path + "#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}" + end + + def disk_hash + @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) if id + end + end + + def perform(start_id, stop_id) + Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) + end + + private + + def project_repositories(start_id, stop_id) + Project.on_hashed_storage.without_project_repository + .where(id: start_id..stop_id) + .map { |project| project.project_repository_attributes(shard_finder) } + .compact + end + + def shard_finder + @shard_finder ||= ShardFinder.new + end + end + end +end |