summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2018-11-30 17:03:29 +0100
committerDouglas Barbosa Alexandre <dbalexandre@gmail.com>2018-12-06 21:04:41 -0200
commit5237a55d62f8dcb021a041741b3f09cad7784a36 (patch)
tree8c543fb224f2fe19a89f8a6758dd86dd9da53321
parent0f338434b90b6e385c0c1947ff53c143dd7ed6be (diff)
downloadgitlab-ce-5237a55d62f8dcb021a041741b3f09cad7784a36.tar.gz
Fill project_repositories for hashed storage
This adds a background migration that will ensure all projects that are on hashed storage have a row in `project_repositories`. Related issue: https://gitlab.com/gitlab-org/gitlab-ce/issues/48527
-rw-r--r--changelogs/unreleased/tc-backfill-hashed-project_repositories.yml5
-rw-r--r--db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb27
-rw-r--r--lib/gitlab/background_migration/backfill_hashed_project_repositories.rb124
-rw-r--r--spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb155
4 files changed, 311 insertions, 0 deletions
diff --git a/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml b/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml
new file mode 100644
index 00000000000..90a5c8c4e2c
--- /dev/null
+++ b/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml
@@ -0,0 +1,5 @@
+---
+title: Fill project_repositories for hashed storage projects
+merge_request: 23482
+author:
+type: added
diff --git a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
new file mode 100644
index 00000000000..b989d9fb43d
--- /dev/null
+++ b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 1_000
+ DELAY_INTERVAL = 1.minutes
+ MIGRATION = 'BackfillHashedProjectRepositories'
+
+ disable_ddl_transaction!
+
+ class Project < ActiveRecord::Base
+ include EachBatch
+
+ self.table_name = 'projects'
+ end
+
+ def up
+ queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL)
+ end
+
+ def down
+ # Since there could have been existing rows before the migration
+ # do not remove anything
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
new file mode 100644
index 00000000000..88696dd1aa6
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Class the will create rows in project_repositories for all
+ # projects that are on hashed storage
+ class BackfillHashedProjectRepositories
+ # Model for a Shard
+ class Shard < ActiveRecord::Base
+ self.table_name = 'shards'
+
+ def self.by_name(name)
+ to_a.detect { |shard| shard.name == name } || create_by(name: name)
+ rescue ActiveRecord::RecordNotUnique
+ retry
+ end
+ end
+
+ # Class that will find or create the shard by name.
+ # There is only a small set of shards, which would not change quickly,
+ # so look them up from memory instead of hitting the DB each time.
+ class ShardFinder
+ def find(name)
+ shards.detect { |shard| shard.name == name } || create!(name)
+ rescue ActiveRecord::RecordNotUnique
+ load!
+ retry
+ end
+
+ private
+
+ def create!(name)
+ Shard.create!(name: name).tap { |shard| @shards << shard }
+ end
+
+ def shards
+ @shards || load!
+ end
+
+ def load!
+ @shards = Shard.all.to_a
+ end
+ end
+
+ # Model for a ProjectRepository
+ class ProjectRepository < ActiveRecord::Base
+ self.table_name = 'project_repositories'
+
+ belongs_to :project, inverse_of: :project_repository
+ end
+
+ # Model for a Project
+ class Project < ActiveRecord::Base
+ self.table_name = 'projects'
+
+ HASHED_PATH_PREFIX = '@hashed'
+ HASHED_STORAGE_FEATURES = {
+ repository: 1,
+ attachments: 2
+ }.freeze
+
+ has_one :project_repository, inverse_of: :project
+
+ class << self
+ def on_hashed_storage
+ where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[:repository]))
+ end
+
+ def without_project_repository
+ cond = ProjectRepository.arel_table[:project_id].eq(nil)
+ left_outer_joins(:project_repository).where(cond)
+ end
+
+ def left_outer_joins(relation)
+ return super if Gitlab.rails5?
+
+ # TODO Rails 4?
+ end
+ end
+
+ def project_repository_attributes(shard_finder)
+ return unless hashed_storage?
+
+ {
+ project_id: id,
+ shard_id: shard_finder.find(repository_storage).id,
+ disk_path: hashed_disk_path
+ }
+ end
+
+ private
+
+ def hashed_storage?
+ self.storage_version && self.storage_version >= 1
+ end
+
+ def hashed_disk_path
+ "#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
+ end
+
+ def disk_hash
+ @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) if id
+ end
+ end
+
+ def perform(start_id, stop_id)
+ Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
+ end
+
+ private
+
+ def project_repositories(start_id, stop_id)
+ Project.on_hashed_storage.without_project_repository
+ .where(id: start_id..stop_id)
+ .map { |project| project.project_repository_attributes(shard_finder) }
+ .compact
+ end
+
+ def shard_finder
+ @shard_finder ||= ShardFinder.new
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
new file mode 100644
index 00000000000..d2f499ffa64
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
@@ -0,0 +1,155 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
+ let(:shards) { table(:shards) }
+ let(:namespaces) { table(:namespaces) }
+ let(:projects) { table(:projects) }
+ let(:project_repositories) { table(:project_repositories) }
+ let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
+ let(:default_shard) { shards.create!(name: 'default') }
+
+ describe described_class::ShardFinder do
+ describe '#find' do
+ subject(:finder) { described_class.new }
+
+ it 'creates the shard by name' do
+ expect(finder).to receive(:create!).and_call_original
+
+ expect(finder.find('default')).to be_present
+ end
+
+ it 'does not try to create existing shards' do
+ shards.create(name: 'default')
+
+ expect(finder).not_to receive(:create!)
+
+ finder.find('default')
+ end
+
+ it 'only queries the database once for shards' do
+ finder.find('default')
+
+ expect do
+ finder.find('default')
+ end.not_to exceed_query_limit(0)
+ end
+
+ it 'creates a new shard when it does not exist yet' do
+ expect do
+ finder.find('other')
+ end.to change(shards, :count).by(1)
+ end
+
+ it 'only creates a new shard once' do
+ finder.find('other')
+
+ expect do
+ finder.find('other')
+ end.not_to change(shards, :count)
+ end
+
+ it 'is not vulnerable to race conditions' do
+ finder.find('default')
+
+ other_shard = shards.create(name: 'other')
+
+ expect(finder.find('other').id).to eq(other_shard.id)
+ end
+ end
+ end
+
+ describe described_class::Project do
+ describe '.on_hashed_storage' do
+ it 'finds projects with repository on hashed storage' do
+ hashed_projects = [
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1),
+ projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
+ ]
+
+ projects.create!(name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
+ projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: nil)
+
+ expect(described_class.on_hashed_storage.pluck(:id)).to match_array(hashed_projects.map(&:id))
+ end
+ end
+
+ describe '.without_project_repository' do
+ it 'finds projects which do not have a projects_repositories row' do
+ without_project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id)
+ with_project = projects.create!(name: 'bar', path: 'bar', namespace_id: group.id)
+ project_repositories.create!(project_id: with_project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id)
+
+ expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(without_project.id)
+ end
+ end
+
+ describe '#project_repository_attributes' do
+ let(:shard_finder) { Gitlab::BackgroundMigration::BackfillHashedProjectRepositories::ShardFinder.new }
+
+ it 'composes the correct attributes for project_repository' do
+ shiny_shard = shards.create!(name: 'shiny')
+ project = projects.create!(id: 5, name: 'foo', path: 'foo', namespace_id: group.id, repository_storage: shiny_shard.name, storage_version: 1)
+
+ expected_attributes = {
+ project_id: project.id,
+ shard_id: shiny_shard.id,
+ disk_path: '@hashed/ef/2d/ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d'
+ }
+
+ expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to eq(expected_attributes)
+ end
+
+ it 'returns nil for a project not on hashed storage' do
+ project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
+
+ expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to be_nil
+ end
+ end
+ end
+
+ describe '#perform' do
+ def perform!
+ described_class.new.perform(1, projects.last.id)
+ end
+
+ it 'create project_repository row for hashed storage project' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+
+ expect do
+ perform!
+ end.to change(project_repositories, :count).by(1)
+ end
+
+ it 'does nothing for projects that have already a project_repository' do
+ project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
+ project_repositories.create!(project_id: project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id)
+
+ expect do
+ perform!
+ end.not_to change(project_repositories, :count)
+ end
+
+ it 'does nothing for projects on legacy storage' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
+
+ expect do
+ perform!
+ end.not_to change(project_repositories, :count)
+ end
+
+ it 'inserts rows in a single query' do
+ projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
+
+ control_count = ActiveRecord::QueryRecorder.new do
+ perform!
+ end
+
+ projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
+ projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
+
+ expect { perform! }.not_to exceed_query_limit(control_count)
+ end
+ end
+end