diff options
author | Stan Hu <stanhu@gmail.com> | 2018-12-07 23:16:44 +0000 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2018-12-07 23:16:44 +0000 |
commit | 9655a602ac0d186e10c44f7b6bcdfc0f14ac7b6a (patch) | |
tree | e61c2ada5b2ad79e5607bb241df5a376c3238a84 | |
parent | a27ba8edfa6f41de135cf6f1573cc5366440f7b5 (diff) | |
parent | 8c9e692095afb59111c73781c8ee501cb4cb2459 (diff) | |
download | gitlab-ce-9655a602ac0d186e10c44f7b6bcdfc0f14ac7b6a.tar.gz |
Merge branch 'tc-backfill-hashed-project_repositories' into 'master'
Fill project_repositories for hashed storage projects
Closes #48527
See merge request gitlab-org/gitlab-ce!23482
4 files changed, 255 insertions, 0 deletions
diff --git a/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml b/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml new file mode 100644 index 00000000000..90a5c8c4e2c --- /dev/null +++ b/changelogs/unreleased/tc-backfill-hashed-project_repositories.yml @@ -0,0 +1,5 @@ +--- +title: Fill project_repositories for hashed storage projects +merge_request: 23482 +author: +type: added diff --git a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb new file mode 100644 index 00000000000..7814cdba58a --- /dev/null +++ b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class BackfillHashedProjectRepositories < ActiveRecord::Migration[4.2] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 1_000 + DELAY_INTERVAL = 5.minutes + MIGRATION = 'BackfillHashedProjectRepositories' + + disable_ddl_transaction! + + class Project < ActiveRecord::Base + include EachBatch + + self.table_name = 'projects' + end + + def up + queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL) + end + + def down + # no-op: since there could have been existing rows before the migration do not remove anything + end +end diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb new file mode 100644 index 00000000000..2f76f2f7434 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Class that will create fill the project_repositories table + # for all projects that are on hashed storage and an entry is + # is missing in this table. + class BackfillHashedProjectRepositories + # Shard model + class Shard < ActiveRecord::Base + self.table_name = 'shards' + end + + # Class that will find or create the shard by name. + # There is only a small set of shards, which would + # not change quickly, so look them up from memory + # instead of hitting the DB each time. + class ShardFinder + def find_shard_id(name) + shard_id = shards.fetch(name, nil) + return shard_id if shard_id.present? + + Shard.transaction(requires_new: true) do + create!(name) + end + rescue ActiveRecord::RecordNotUnique + reload! + retry + end + + private + + def create!(name) + Shard.create!(name: name).tap { |shard| @shards[name] = shard.id } + end + + def shards + @shards ||= reload! + end + + def reload! + @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten] + end + end + + # ProjectRegistry model + class ProjectRepository < ActiveRecord::Base + self.table_name = 'project_repositories' + + belongs_to :project, inverse_of: :project_repository + end + + # Project model + class Project < ActiveRecord::Base + self.table_name = 'projects' + + HASHED_PATH_PREFIX = '@hashed' + + HASHED_STORAGE_FEATURES = { + repository: 1, + attachments: 2 + }.freeze + + has_one :project_repository, inverse_of: :project + + class << self + def on_hashed_storage + where(Project.arel_table[:storage_version] + .gteq(HASHED_STORAGE_FEATURES[:repository])) + end + + def without_project_repository + joins(left_outer_join_project_repository) + .where(ProjectRepository.arel_table[:project_id].eq(nil)) + end + + def left_outer_join_project_repository + projects_table = Project.arel_table + repository_table = ProjectRepository.arel_table + + projects_table + .join(repository_table, Arel::Nodes::OuterJoin) + .on(projects_table[:id].eq(repository_table[:project_id])) + .join_sources + end + end + + def hashed_storage? + self.storage_version && self.storage_version >= 1 + end + + def hashed_disk_path + "#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}" + end + + def disk_hash + @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) + end + end + + def perform(start_id, stop_id) + Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) + end + + private + + def project_repositories(start_id, stop_id) + Project.on_hashed_storage + .without_project_repository + .where(id: start_id..stop_id) + .map { |project| build_attributes_for_project(project) } + .compact + end + + def build_attributes_for_project(project) + return unless project.hashed_storage? + + { + project_id: project.id, + shard_id: find_shard_id(project.repository_storage), + disk_path: project.hashed_disk_path + } + end + + def find_shard_id(repository_storage) + shard_finder.find_shard_id(repository_storage) + end + + def shard_finder + @shard_finder ||= ShardFinder.new + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb new file mode 100644 index 00000000000..b6c1edbbf8b --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do + let(:namespaces) { table(:namespaces) } + let(:project_repositories) { table(:project_repositories) } + let(:projects) { table(:projects) } + let(:shards) { table(:shards) } + let(:group) { namespaces.create!(name: 'foo', path: 'foo') } + let(:shard) { shards.create!(name: 'default') } + + describe described_class::ShardFinder do + describe '#find_shard_id' do + it 'creates a new shard when it does not exist yet' do + expect { subject.find_shard_id('other') }.to change(shards, :count).by(1) + end + + it 'returns the shard when it exists' do + shards.create(id: 5, name: 'other') + + shard_id = subject.find_shard_id('other') + + expect(shard_id).to eq(5) + end + + it 'only queries the database once to retrieve shards' do + subject.find_shard_id('default') + + expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0) + end + end + end + + describe described_class::Project do + describe '.on_hashed_storage' do + it 'finds projects with repository on hashed storage' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2) + projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0) + projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil) + + expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2]) + end + end + + describe '.without_project_repository' do + it 'finds projects which do not have a projects_repositories entry' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id) + projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id) + project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id) + + expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1) + end + end + end + + describe '#perform' do + it 'creates a project_repository row for projects on hashed storage that need one' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2) + + expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2) + end + + it 'does nothing for projects on hashed storage that have already a project_repository row' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + project_repositories.create!(project_id: 1, disk_path: '@phony/foo/bar', shard_id: shard.id) + + expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count) + end + + it 'does nothing for projects on legacy storage' do + projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0) + + expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count) + end + + it 'inserts rows in a single query' do + projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name) + + control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) } + + projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: shard.name) + projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name) + + expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count) + end + end +end |