diff options
author | Nick Thomas <nick@gitlab.com> | 2018-06-07 15:40:44 +0000 |
---|---|---|
committer | Nick Thomas <nick@gitlab.com> | 2018-06-07 15:40:44 +0000 |
commit | e114c6fcccef929f955ce25556d71456c2b05930 (patch) | |
tree | dd00844e2b78592b453d04443367eb9879f18c42 | |
parent | 1b06b8342cc75d948c40c9b74e4c4884a488484e (diff) | |
parent | 36c337647591d964b7ef1e1fc61fc64a930fb6f4 (diff) | |
download | gitlab-ce-e114c6fcccef929f955ce25556d71456c2b05930.tar.gz |
Merge branch '46922-hashed-storage-single-project' into 'master'
Resolve "Hashed Storage: Make possible to migrate single project"
Closes #46922
See merge request gitlab-org/gitlab-ce!19268
-rw-r--r-- | app/workers/storage_migrator_worker.rb | 25 | ||||
-rw-r--r-- | changelogs/unreleased/46922-hashed-storage-single-project.yml | 5 | ||||
-rw-r--r-- | doc/administration/raketasks/storage.md | 45 | ||||
-rw-r--r-- | lib/gitlab/hashed_storage/migrator.rb | 57 | ||||
-rw-r--r-- | lib/gitlab/hashed_storage/rake_helper.rb | 14 | ||||
-rw-r--r-- | lib/tasks/gitlab/storage.rake | 21 | ||||
-rw-r--r-- | spec/lib/gitlab/hashed_storage/migrator_spec.rb | 75 | ||||
-rw-r--r-- | spec/tasks/gitlab/storage_rake_spec.rb | 45 | ||||
-rw-r--r-- | spec/workers/storage_migrator_worker_spec.rb | 25 |
9 files changed, 243 insertions, 69 deletions
diff --git a/app/workers/storage_migrator_worker.rb b/app/workers/storage_migrator_worker.rb index f92421a667d..0aff0c4c7c6 100644 --- a/app/workers/storage_migrator_worker.rb +++ b/app/workers/storage_migrator_worker.rb @@ -1,29 +1,8 @@ class StorageMigratorWorker include ApplicationWorker - BATCH_SIZE = 100 - def perform(start, finish) - projects = build_relation(start, finish) - - projects.with_route.find_each(batch_size: BATCH_SIZE) do |project| - Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..." - - begin - project.migrate_to_hashed_storage! - rescue => err - Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}") - end - end - end - - def build_relation(start, finish) - relation = Project - table = Project.arel_table - - relation = relation.where(table[:id].gteq(start)) if start - relation = relation.where(table[:id].lteq(finish)) if finish - - relation + migrator = Gitlab::HashedStorage::Migrator.new + migrator.bulk_migrate(start, finish) end end diff --git a/changelogs/unreleased/46922-hashed-storage-single-project.yml b/changelogs/unreleased/46922-hashed-storage-single-project.yml new file mode 100644 index 00000000000..c293238a5a4 --- /dev/null +++ b/changelogs/unreleased/46922-hashed-storage-single-project.yml @@ -0,0 +1,5 @@ +--- +title: 'Hashed Storage: migration rake task now can be executed to specific project' +merge_request: 19268 +author: +type: changed diff --git a/doc/administration/raketasks/storage.md b/doc/administration/raketasks/storage.md index cfd601b8866..7ad38abe4f5 100644 --- a/doc/administration/raketasks/storage.md +++ b/doc/administration/raketasks/storage.md @@ -17,13 +17,21 @@ This task will schedule all your existing projects and attachments associated wi **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:migrate_to_hashed +sudo gitlab-rake gitlab:storage:migrate_to_hashed ``` **Source Installation** ```bash -rake gitlab:storage:migrate_to_hashed +sudo -u git -H bundle exec rake gitlab:storage:migrate_to_hashed RAILS_ENV=production +``` + +They both also accept a range as environment variable: + +```bash +# to migrate any non migrated project from ID 20 to 50. +export ID_FROM=20 +export ID_TO=50 ``` You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen. @@ -44,13 +52,13 @@ To have a simple summary of projects using **Legacy** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:legacy_projects +sudo gitlab-rake gitlab:storage:legacy_projects ``` **Source Installation** ```bash -rake gitlab:storage:legacy_projects +sudo -u git -H bundle exec rake gitlab:storage:legacy_projects RAILS_ENV=production ``` ------ @@ -60,13 +68,13 @@ To list projects using **Legacy** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:list_legacy_projects +sudo gitlab-rake gitlab:storage:list_legacy_projects ``` **Source Installation** ```bash -rake gitlab:storage:list_legacy_projects +sudo -u git -H bundle exec rake gitlab:storage:list_legacy_projects RAILS_ENV=production ``` @@ -77,13 +85,13 @@ To have a simple summary of projects using **Hashed** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:hashed_projects +sudo gitlab-rake gitlab:storage:hashed_projects ``` **Source Installation** ```bash -rake gitlab:storage:hashed_projects +sudo -u git -H bundle exec rake gitlab:storage:hashed_projects RAILS_ENV=production ``` ------ @@ -93,14 +101,13 @@ To list projects using **Hashed** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:list_hashed_projects +sudo gitlab-rake gitlab:storage:list_hashed_projects ``` **Source Installation** ```bash -rake gitlab:storage:list_hashed_projects - +sudo -u git -H bundle exec rake gitlab:storage:list_hashed_projects RAILS_ENV=production ``` ## List attachments on Legacy storage @@ -110,13 +117,13 @@ To have a simple summary of project attachments using **Legacy** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:legacy_attachments +sudo gitlab-rake gitlab:storage:legacy_attachments ``` **Source Installation** ```bash -rake gitlab:storage:legacy_attachments +sudo -u git -H bundle exec rake gitlab:storage:legacy_attachments RAILS_ENV=production ``` ------ @@ -126,13 +133,13 @@ To list project attachments using **Legacy** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:list_legacy_attachments +sudo gitlab-rake gitlab:storage:list_legacy_attachments ``` **Source Installation** ```bash -rake gitlab:storage:list_legacy_attachments +sudo -u git -H bundle exec rake gitlab:storage:list_legacy_attachments RAILS_ENV=production ``` ## List attachments on Hashed storage @@ -142,13 +149,13 @@ To have a simple summary of project attachments using **Hashed** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:hashed_attachments +sudo gitlab-rake gitlab:storage:hashed_attachments ``` **Source Installation** ```bash -rake gitlab:storage:hashed_attachments +sudo -u git -H bundle exec rake gitlab:storage:hashed_attachments RAILS_ENV=production ``` ------ @@ -158,13 +165,13 @@ To list project attachments using **Hashed** storage: **Omnibus Installation** ```bash -gitlab-rake gitlab:storage:list_hashed_attachments +sudo gitlab-rake gitlab:storage:list_hashed_attachments ``` **Source Installation** ```bash -rake gitlab:storage:list_hashed_attachments +sudo -u git -H bundle exec rake gitlab:storage:list_hashed_attachments RAILS_ENV=production ``` [storage-types]: ../repository_storage_types.md diff --git a/lib/gitlab/hashed_storage/migrator.rb b/lib/gitlab/hashed_storage/migrator.rb new file mode 100644 index 00000000000..9251ed654cd --- /dev/null +++ b/lib/gitlab/hashed_storage/migrator.rb @@ -0,0 +1,57 @@ +module Gitlab + module HashedStorage + # Hashed Storage Migrator + # + # This is responsible for scheduling and flagging projects + # to be migrated from Legacy to Hashed storage, either one by one or in bulk. + class Migrator + BATCH_SIZE = 100 + + # Schedule a range of projects to be bulk migrated with #bulk_migrate asynchronously + # + # @param [Object] start first project id for the range + # @param [Object] finish last project id for the range + def bulk_schedule(start, finish) + StorageMigratorWorker.perform_async(start, finish) + end + + # Start migration of projects from specified range + # + # Flagging a project to be migrated is a synchronous action, + # but the migration runs through async jobs + # + # @param [Object] start first project id for the range + # @param [Object] finish last project id for the range + def bulk_migrate(start, finish) + projects = build_relation(start, finish) + + projects.with_route.find_each(batch_size: BATCH_SIZE) do |project| + migrate(project) + end + end + + # Flag a project to me migrated + # + # @param [Object] project that will be migrated + def migrate(project) + Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..." + + project.migrate_to_hashed_storage! + rescue => err + Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}") + end + + private + + def build_relation(start, finish) + relation = Project + table = Project.arel_table + + relation = relation.where(table[:id].gteq(start)) if start + relation = relation.where(table[:id].lteq(finish)) if finish + + relation + end + end + end +end diff --git a/lib/gitlab/hashed_storage/rake_helper.rb b/lib/gitlab/hashed_storage/rake_helper.rb index 8aba42ccfce..303b05e6a9a 100644 --- a/lib/gitlab/hashed_storage/rake_helper.rb +++ b/lib/gitlab/hashed_storage/rake_helper.rb @@ -9,8 +9,20 @@ module Gitlab ENV.fetch('LIMIT', 500).to_i end + def self.range_from + ENV['ID_FROM'] + end + + def self.range_to + ENV['ID_TO'] + end + + def self.range_single_item? + !range_from.nil? && range_from == range_to + end + def self.project_id_batches(&block) - Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches + Project.with_unmigrated_storage.in_batches(of: batch_size, start: range_from, finish: range_to) do |relation| # rubocop: disable Cop/InBatches ids = relation.pluck(:id) yield ids.min, ids.max diff --git a/lib/tasks/gitlab/storage.rake b/lib/tasks/gitlab/storage.rake index 68d6f9d7cb1..f539b1df955 100644 --- a/lib/tasks/gitlab/storage.rake +++ b/lib/tasks/gitlab/storage.rake @@ -2,9 +2,26 @@ namespace :gitlab do namespace :storage do desc 'GitLab | Storage | Migrate existing projects to Hashed Storage' task migrate_to_hashed: :environment do - legacy_projects_count = Project.with_unmigrated_storage.count + storage_migrator = Gitlab::HashedStorage::Migrator.new helper = Gitlab::HashedStorage::RakeHelper + if helper.range_single_item? + project = Project.with_unmigrated_storage.find_by(id: helper.range_from) + + unless project + puts "There are no projects requiring storage migration with ID=#{helper.range_from}" + + next + end + + puts "Enqueueing storage migration of #{project.full_path} (ID=#{project.id})..." + storage_migrator.migrate(project) + + next + end + + legacy_projects_count = Project.with_unmigrated_storage.count + if legacy_projects_count == 0 puts 'There are no projects requiring storage migration. Nothing to do!' @@ -14,7 +31,7 @@ namespace :gitlab do print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}" helper.project_id_batches do |start, finish| - StorageMigratorWorker.perform_async(start, finish) + storage_migrator.bulk_schedule(start, finish) print '.' end diff --git a/spec/lib/gitlab/hashed_storage/migrator_spec.rb b/spec/lib/gitlab/hashed_storage/migrator_spec.rb new file mode 100644 index 00000000000..813ae43b4d3 --- /dev/null +++ b/spec/lib/gitlab/hashed_storage/migrator_spec.rb @@ -0,0 +1,75 @@ +require 'spec_helper' + +describe Gitlab::HashedStorage::Migrator do + describe '#bulk_schedule' do + it 'schedules job to StorageMigratorWorker' do + Sidekiq::Testing.fake! do + expect { subject.bulk_schedule(1, 5) }.to change(StorageMigratorWorker.jobs, :size).by(1) + end + end + end + + describe '#bulk_migrate' do + let(:projects) { create_list(:project, 2, :legacy_storage) } + let(:ids) { projects.map(&:id) } + + it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do + Sidekiq::Testing.fake! do + expect { subject.bulk_migrate(ids.min, ids.max) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(2) + end + end + + it 'sets projects as read only' do + allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice + subject.bulk_migrate(ids.min, ids.max) + + projects.each do |project| + expect(project.reload.repository_read_only?).to be_truthy + end + end + + it 'rescues and log exceptions' do + allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError) + expect { subject.bulk_migrate(ids.min, ids.max) }.not_to raise_error + end + + it 'delegates each project in specified range to #migrate' do + projects.each do |project| + expect(subject).to receive(:migrate).with(project) + end + + subject.bulk_migrate(ids.min, ids.max) + end + end + + describe '#migrate' do + let(:project) { create(:project, :legacy_storage, :empty_repo) } + + it 'enqueues job to ProjectMigrateHashedStorageWorker' do + Sidekiq::Testing.fake! do + expect { subject.migrate(project) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1) + end + end + + it 'rescues and log exceptions' do + allow(project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError) + + expect { subject.migrate(project) }.not_to raise_error + end + + it 'sets project as read only' do + allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async) + subject.migrate(project) + + expect(project.reload.repository_read_only?).to be_truthy + end + + it 'migrate project' do + Sidekiq::Testing.inline! do + subject.migrate(project) + end + + expect(project.reload.hashed_storage?(:attachments)).to be_truthy + end + end +end diff --git a/spec/tasks/gitlab/storage_rake_spec.rb b/spec/tasks/gitlab/storage_rake_spec.rb index 35e451b2f9a..233076ad6fa 100644 --- a/spec/tasks/gitlab/storage_rake_spec.rb +++ b/spec/tasks/gitlab/storage_rake_spec.rb @@ -1,6 +1,6 @@ require 'rake_helper' -describe 'gitlab:storage:*' do +describe 'rake gitlab:storage:*' do before do Rake.application.rake_require 'tasks/gitlab/storage' @@ -44,16 +44,18 @@ describe 'gitlab:storage:*' do end describe 'gitlab:storage:migrate_to_hashed' do + let(:task) { 'gitlab:storage:migrate_to_hashed' } + context '0 legacy projects' do it 'does nothing' do expect(StorageMigratorWorker).not_to receive(:perform_async) - run_rake_task('gitlab:storage:migrate_to_hashed') + run_rake_task(task) end end context '3 legacy projects' do - let(:projects) { create_list(:project, 3, storage_version: 0) } + let(:projects) { create_list(:project, 3, :legacy_storage) } context 'in batches of 1' do before do @@ -65,7 +67,7 @@ describe 'gitlab:storage:*' do expect(StorageMigratorWorker).to receive(:perform_async).with(project.id, project.id) end - run_rake_task('gitlab:storage:migrate_to_hashed') + run_rake_task(task) end end @@ -80,23 +82,48 @@ describe 'gitlab:storage:*' do expect(StorageMigratorWorker).to receive(:perform_async).with(first, last) end - run_rake_task('gitlab:storage:migrate_to_hashed') + run_rake_task(task) end end end + + context 'with same id in range' do + it 'displays message when project cant be found' do + stub_env('ID_FROM', 99999) + stub_env('ID_TO', 99999) + + expect { run_rake_task(task) }.to output(/There are no projects requiring storage migration with ID=99999/).to_stdout + end + + it 'displays a message when project exists but its already migrated' do + project = create(:project) + stub_env('ID_FROM', project.id) + stub_env('ID_TO', project.id) + + expect { run_rake_task(task) }.to output(/There are no projects requiring storage migration with ID=#{project.id}/).to_stdout + end + + it 'enqueues migration when project can be found' do + project = create(:project, :legacy_storage) + stub_env('ID_FROM', project.id) + stub_env('ID_TO', project.id) + + expect { run_rake_task(task) }.to output(/Enqueueing storage migration .* \(ID=#{project.id}\)/).to_stdout + end + end end describe 'gitlab:storage:legacy_projects' do it_behaves_like 'rake entities summary', 'projects', 'Legacy' do let(:task) { 'gitlab:storage:legacy_projects' } - let(:create_collection) { create_list(:project, 3, storage_version: 0) } + let(:create_collection) { create_list(:project, 3, :legacy_storage) } end end describe 'gitlab:storage:list_legacy_projects' do it_behaves_like 'rake listing entities', 'projects', 'Legacy' do let(:task) { 'gitlab:storage:list_legacy_projects' } - let(:create_collection) { create_list(:project, 3, storage_version: 0) } + let(:create_collection) { create_list(:project, 3, :legacy_storage) } end end @@ -133,7 +160,7 @@ describe 'gitlab:storage:*' do describe 'gitlab:storage:hashed_attachments' do it_behaves_like 'rake entities summary', 'attachments', 'Hashed' do let(:task) { 'gitlab:storage:hashed_attachments' } - let(:project) { create(:project, storage_version: 2) } + let(:project) { create(:project) } let(:create_collection) { create_list(:upload, 3, model: project) } end end @@ -141,7 +168,7 @@ describe 'gitlab:storage:*' do describe 'gitlab:storage:list_hashed_attachments' do it_behaves_like 'rake listing entities', 'attachments', 'Hashed' do let(:task) { 'gitlab:storage:list_hashed_attachments' } - let(:project) { create(:project, storage_version: 2) } + let(:project) { create(:project) } let(:create_collection) { create_list(:upload, 3, model: project) } end end diff --git a/spec/workers/storage_migrator_worker_spec.rb b/spec/workers/storage_migrator_worker_spec.rb index ff625164142..815432aacce 100644 --- a/spec/workers/storage_migrator_worker_spec.rb +++ b/spec/workers/storage_migrator_worker_spec.rb @@ -2,29 +2,24 @@ require 'spec_helper' describe StorageMigratorWorker do subject(:worker) { described_class.new } - let(:projects) { create_list(:project, 2, :legacy_storage) } + let(:projects) { create_list(:project, 2, :legacy_storage, :empty_repo) } + let(:ids) { projects.map(&:id) } describe '#perform' do - let(:ids) { projects.map(&:id) } + it 'delegates to MigratorService' do + expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(5, 10) - it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do - expect(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice - - worker.perform(ids.min, ids.max) + worker.perform(5, 10) end - it 'sets projects as read only' do - allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice - worker.perform(ids.min, ids.max) + it 'migrates projects in the specified range' do + Sidekiq::Testing.inline! do + worker.perform(ids.min, ids.max) + end projects.each do |project| - expect(project.reload.repository_read_only?).to be_truthy + expect(project.reload.hashed_storage?(:attachments)).to be_truthy end end - - it 'rescues and log exceptions' do - allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError) - expect { worker.perform(ids.min, ids.max) }.not_to raise_error - end end end |