summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Thomas <nick@gitlab.com>2018-06-07 15:40:44 +0000
committerNick Thomas <nick@gitlab.com>2018-06-07 15:40:44 +0000
commite114c6fcccef929f955ce25556d71456c2b05930 (patch)
treedd00844e2b78592b453d04443367eb9879f18c42
parent1b06b8342cc75d948c40c9b74e4c4884a488484e (diff)
parent36c337647591d964b7ef1e1fc61fc64a930fb6f4 (diff)
downloadgitlab-ce-e114c6fcccef929f955ce25556d71456c2b05930.tar.gz
Merge branch '46922-hashed-storage-single-project' into 'master'
Resolve "Hashed Storage: Make possible to migrate single project" Closes #46922 See merge request gitlab-org/gitlab-ce!19268
-rw-r--r--app/workers/storage_migrator_worker.rb25
-rw-r--r--changelogs/unreleased/46922-hashed-storage-single-project.yml5
-rw-r--r--doc/administration/raketasks/storage.md45
-rw-r--r--lib/gitlab/hashed_storage/migrator.rb57
-rw-r--r--lib/gitlab/hashed_storage/rake_helper.rb14
-rw-r--r--lib/tasks/gitlab/storage.rake21
-rw-r--r--spec/lib/gitlab/hashed_storage/migrator_spec.rb75
-rw-r--r--spec/tasks/gitlab/storage_rake_spec.rb45
-rw-r--r--spec/workers/storage_migrator_worker_spec.rb25
9 files changed, 243 insertions, 69 deletions
diff --git a/app/workers/storage_migrator_worker.rb b/app/workers/storage_migrator_worker.rb
index f92421a667d..0aff0c4c7c6 100644
--- a/app/workers/storage_migrator_worker.rb
+++ b/app/workers/storage_migrator_worker.rb
@@ -1,29 +1,8 @@
class StorageMigratorWorker
include ApplicationWorker
- BATCH_SIZE = 100
-
def perform(start, finish)
- projects = build_relation(start, finish)
-
- projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
- Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..."
-
- begin
- project.migrate_to_hashed_storage!
- rescue => err
- Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
- end
- end
- end
-
- def build_relation(start, finish)
- relation = Project
- table = Project.arel_table
-
- relation = relation.where(table[:id].gteq(start)) if start
- relation = relation.where(table[:id].lteq(finish)) if finish
-
- relation
+ migrator = Gitlab::HashedStorage::Migrator.new
+ migrator.bulk_migrate(start, finish)
end
end
diff --git a/changelogs/unreleased/46922-hashed-storage-single-project.yml b/changelogs/unreleased/46922-hashed-storage-single-project.yml
new file mode 100644
index 00000000000..c293238a5a4
--- /dev/null
+++ b/changelogs/unreleased/46922-hashed-storage-single-project.yml
@@ -0,0 +1,5 @@
+---
+title: 'Hashed Storage: migration rake task now can be executed to specific project'
+merge_request: 19268
+author:
+type: changed
diff --git a/doc/administration/raketasks/storage.md b/doc/administration/raketasks/storage.md
index cfd601b8866..7ad38abe4f5 100644
--- a/doc/administration/raketasks/storage.md
+++ b/doc/administration/raketasks/storage.md
@@ -17,13 +17,21 @@ This task will schedule all your existing projects and attachments associated wi
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:migrate_to_hashed
+sudo gitlab-rake gitlab:storage:migrate_to_hashed
```
**Source Installation**
```bash
-rake gitlab:storage:migrate_to_hashed
+sudo -u git -H bundle exec rake gitlab:storage:migrate_to_hashed RAILS_ENV=production
+```
+
+They both also accept a range as environment variable:
+
+```bash
+# to migrate any non migrated project from ID 20 to 50.
+export ID_FROM=20
+export ID_TO=50
```
You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen.
@@ -44,13 +52,13 @@ To have a simple summary of projects using **Legacy** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:legacy_projects
+sudo gitlab-rake gitlab:storage:legacy_projects
```
**Source Installation**
```bash
-rake gitlab:storage:legacy_projects
+sudo -u git -H bundle exec rake gitlab:storage:legacy_projects RAILS_ENV=production
```
------
@@ -60,13 +68,13 @@ To list projects using **Legacy** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:list_legacy_projects
+sudo gitlab-rake gitlab:storage:list_legacy_projects
```
**Source Installation**
```bash
-rake gitlab:storage:list_legacy_projects
+sudo -u git -H bundle exec rake gitlab:storage:list_legacy_projects RAILS_ENV=production
```
@@ -77,13 +85,13 @@ To have a simple summary of projects using **Hashed** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:hashed_projects
+sudo gitlab-rake gitlab:storage:hashed_projects
```
**Source Installation**
```bash
-rake gitlab:storage:hashed_projects
+sudo -u git -H bundle exec rake gitlab:storage:hashed_projects RAILS_ENV=production
```
------
@@ -93,14 +101,13 @@ To list projects using **Hashed** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:list_hashed_projects
+sudo gitlab-rake gitlab:storage:list_hashed_projects
```
**Source Installation**
```bash
-rake gitlab:storage:list_hashed_projects
-
+sudo -u git -H bundle exec rake gitlab:storage:list_hashed_projects RAILS_ENV=production
```
## List attachments on Legacy storage
@@ -110,13 +117,13 @@ To have a simple summary of project attachments using **Legacy** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:legacy_attachments
+sudo gitlab-rake gitlab:storage:legacy_attachments
```
**Source Installation**
```bash
-rake gitlab:storage:legacy_attachments
+sudo -u git -H bundle exec rake gitlab:storage:legacy_attachments RAILS_ENV=production
```
------
@@ -126,13 +133,13 @@ To list project attachments using **Legacy** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:list_legacy_attachments
+sudo gitlab-rake gitlab:storage:list_legacy_attachments
```
**Source Installation**
```bash
-rake gitlab:storage:list_legacy_attachments
+sudo -u git -H bundle exec rake gitlab:storage:list_legacy_attachments RAILS_ENV=production
```
## List attachments on Hashed storage
@@ -142,13 +149,13 @@ To have a simple summary of project attachments using **Hashed** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:hashed_attachments
+sudo gitlab-rake gitlab:storage:hashed_attachments
```
**Source Installation**
```bash
-rake gitlab:storage:hashed_attachments
+sudo -u git -H bundle exec rake gitlab:storage:hashed_attachments RAILS_ENV=production
```
------
@@ -158,13 +165,13 @@ To list project attachments using **Hashed** storage:
**Omnibus Installation**
```bash
-gitlab-rake gitlab:storage:list_hashed_attachments
+sudo gitlab-rake gitlab:storage:list_hashed_attachments
```
**Source Installation**
```bash
-rake gitlab:storage:list_hashed_attachments
+sudo -u git -H bundle exec rake gitlab:storage:list_hashed_attachments RAILS_ENV=production
```
[storage-types]: ../repository_storage_types.md
diff --git a/lib/gitlab/hashed_storage/migrator.rb b/lib/gitlab/hashed_storage/migrator.rb
new file mode 100644
index 00000000000..9251ed654cd
--- /dev/null
+++ b/lib/gitlab/hashed_storage/migrator.rb
@@ -0,0 +1,57 @@
+module Gitlab
+ module HashedStorage
+ # Hashed Storage Migrator
+ #
+ # This is responsible for scheduling and flagging projects
+ # to be migrated from Legacy to Hashed storage, either one by one or in bulk.
+ class Migrator
+ BATCH_SIZE = 100
+
+ # Schedule a range of projects to be bulk migrated with #bulk_migrate asynchronously
+ #
+ # @param [Object] start first project id for the range
+ # @param [Object] finish last project id for the range
+ def bulk_schedule(start, finish)
+ StorageMigratorWorker.perform_async(start, finish)
+ end
+
+ # Start migration of projects from specified range
+ #
+ # Flagging a project to be migrated is a synchronous action,
+ # but the migration runs through async jobs
+ #
+ # @param [Object] start first project id for the range
+ # @param [Object] finish last project id for the range
+ def bulk_migrate(start, finish)
+ projects = build_relation(start, finish)
+
+ projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
+ migrate(project)
+ end
+ end
+
+ # Flag a project to me migrated
+ #
+ # @param [Object] project that will be migrated
+ def migrate(project)
+ Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..."
+
+ project.migrate_to_hashed_storage!
+ rescue => err
+ Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
+ end
+
+ private
+
+ def build_relation(start, finish)
+ relation = Project
+ table = Project.arel_table
+
+ relation = relation.where(table[:id].gteq(start)) if start
+ relation = relation.where(table[:id].lteq(finish)) if finish
+
+ relation
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/hashed_storage/rake_helper.rb b/lib/gitlab/hashed_storage/rake_helper.rb
index 8aba42ccfce..303b05e6a9a 100644
--- a/lib/gitlab/hashed_storage/rake_helper.rb
+++ b/lib/gitlab/hashed_storage/rake_helper.rb
@@ -9,8 +9,20 @@ module Gitlab
ENV.fetch('LIMIT', 500).to_i
end
+ def self.range_from
+ ENV['ID_FROM']
+ end
+
+ def self.range_to
+ ENV['ID_TO']
+ end
+
+ def self.range_single_item?
+ !range_from.nil? && range_from == range_to
+ end
+
def self.project_id_batches(&block)
- Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
+ Project.with_unmigrated_storage.in_batches(of: batch_size, start: range_from, finish: range_to) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max
diff --git a/lib/tasks/gitlab/storage.rake b/lib/tasks/gitlab/storage.rake
index 68d6f9d7cb1..f539b1df955 100644
--- a/lib/tasks/gitlab/storage.rake
+++ b/lib/tasks/gitlab/storage.rake
@@ -2,9 +2,26 @@ namespace :gitlab do
namespace :storage do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
task migrate_to_hashed: :environment do
- legacy_projects_count = Project.with_unmigrated_storage.count
+ storage_migrator = Gitlab::HashedStorage::Migrator.new
helper = Gitlab::HashedStorage::RakeHelper
+ if helper.range_single_item?
+ project = Project.with_unmigrated_storage.find_by(id: helper.range_from)
+
+ unless project
+ puts "There are no projects requiring storage migration with ID=#{helper.range_from}"
+
+ next
+ end
+
+ puts "Enqueueing storage migration of #{project.full_path} (ID=#{project.id})..."
+ storage_migrator.migrate(project)
+
+ next
+ end
+
+ legacy_projects_count = Project.with_unmigrated_storage.count
+
if legacy_projects_count == 0
puts 'There are no projects requiring storage migration. Nothing to do!'
@@ -14,7 +31,7 @@ namespace :gitlab do
print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}"
helper.project_id_batches do |start, finish|
- StorageMigratorWorker.perform_async(start, finish)
+ storage_migrator.bulk_schedule(start, finish)
print '.'
end
diff --git a/spec/lib/gitlab/hashed_storage/migrator_spec.rb b/spec/lib/gitlab/hashed_storage/migrator_spec.rb
new file mode 100644
index 00000000000..813ae43b4d3
--- /dev/null
+++ b/spec/lib/gitlab/hashed_storage/migrator_spec.rb
@@ -0,0 +1,75 @@
+require 'spec_helper'
+
+describe Gitlab::HashedStorage::Migrator do
+ describe '#bulk_schedule' do
+ it 'schedules job to StorageMigratorWorker' do
+ Sidekiq::Testing.fake! do
+ expect { subject.bulk_schedule(1, 5) }.to change(StorageMigratorWorker.jobs, :size).by(1)
+ end
+ end
+ end
+
+ describe '#bulk_migrate' do
+ let(:projects) { create_list(:project, 2, :legacy_storage) }
+ let(:ids) { projects.map(&:id) }
+
+ it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do
+ Sidekiq::Testing.fake! do
+ expect { subject.bulk_migrate(ids.min, ids.max) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(2)
+ end
+ end
+
+ it 'sets projects as read only' do
+ allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice
+ subject.bulk_migrate(ids.min, ids.max)
+
+ projects.each do |project|
+ expect(project.reload.repository_read_only?).to be_truthy
+ end
+ end
+
+ it 'rescues and log exceptions' do
+ allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
+ expect { subject.bulk_migrate(ids.min, ids.max) }.not_to raise_error
+ end
+
+ it 'delegates each project in specified range to #migrate' do
+ projects.each do |project|
+ expect(subject).to receive(:migrate).with(project)
+ end
+
+ subject.bulk_migrate(ids.min, ids.max)
+ end
+ end
+
+ describe '#migrate' do
+ let(:project) { create(:project, :legacy_storage, :empty_repo) }
+
+ it 'enqueues job to ProjectMigrateHashedStorageWorker' do
+ Sidekiq::Testing.fake! do
+ expect { subject.migrate(project) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1)
+ end
+ end
+
+ it 'rescues and log exceptions' do
+ allow(project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
+
+ expect { subject.migrate(project) }.not_to raise_error
+ end
+
+ it 'sets project as read only' do
+ allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async)
+ subject.migrate(project)
+
+ expect(project.reload.repository_read_only?).to be_truthy
+ end
+
+ it 'migrate project' do
+ Sidekiq::Testing.inline! do
+ subject.migrate(project)
+ end
+
+ expect(project.reload.hashed_storage?(:attachments)).to be_truthy
+ end
+ end
+end
diff --git a/spec/tasks/gitlab/storage_rake_spec.rb b/spec/tasks/gitlab/storage_rake_spec.rb
index 35e451b2f9a..233076ad6fa 100644
--- a/spec/tasks/gitlab/storage_rake_spec.rb
+++ b/spec/tasks/gitlab/storage_rake_spec.rb
@@ -1,6 +1,6 @@
require 'rake_helper'
-describe 'gitlab:storage:*' do
+describe 'rake gitlab:storage:*' do
before do
Rake.application.rake_require 'tasks/gitlab/storage'
@@ -44,16 +44,18 @@ describe 'gitlab:storage:*' do
end
describe 'gitlab:storage:migrate_to_hashed' do
+ let(:task) { 'gitlab:storage:migrate_to_hashed' }
+
context '0 legacy projects' do
it 'does nothing' do
expect(StorageMigratorWorker).not_to receive(:perform_async)
- run_rake_task('gitlab:storage:migrate_to_hashed')
+ run_rake_task(task)
end
end
context '3 legacy projects' do
- let(:projects) { create_list(:project, 3, storage_version: 0) }
+ let(:projects) { create_list(:project, 3, :legacy_storage) }
context 'in batches of 1' do
before do
@@ -65,7 +67,7 @@ describe 'gitlab:storage:*' do
expect(StorageMigratorWorker).to receive(:perform_async).with(project.id, project.id)
end
- run_rake_task('gitlab:storage:migrate_to_hashed')
+ run_rake_task(task)
end
end
@@ -80,23 +82,48 @@ describe 'gitlab:storage:*' do
expect(StorageMigratorWorker).to receive(:perform_async).with(first, last)
end
- run_rake_task('gitlab:storage:migrate_to_hashed')
+ run_rake_task(task)
end
end
end
+
+ context 'with same id in range' do
+ it 'displays message when project cant be found' do
+ stub_env('ID_FROM', 99999)
+ stub_env('ID_TO', 99999)
+
+ expect { run_rake_task(task) }.to output(/There are no projects requiring storage migration with ID=99999/).to_stdout
+ end
+
+ it 'displays a message when project exists but its already migrated' do
+ project = create(:project)
+ stub_env('ID_FROM', project.id)
+ stub_env('ID_TO', project.id)
+
+ expect { run_rake_task(task) }.to output(/There are no projects requiring storage migration with ID=#{project.id}/).to_stdout
+ end
+
+ it 'enqueues migration when project can be found' do
+ project = create(:project, :legacy_storage)
+ stub_env('ID_FROM', project.id)
+ stub_env('ID_TO', project.id)
+
+ expect { run_rake_task(task) }.to output(/Enqueueing storage migration .* \(ID=#{project.id}\)/).to_stdout
+ end
+ end
end
describe 'gitlab:storage:legacy_projects' do
it_behaves_like 'rake entities summary', 'projects', 'Legacy' do
let(:task) { 'gitlab:storage:legacy_projects' }
- let(:create_collection) { create_list(:project, 3, storage_version: 0) }
+ let(:create_collection) { create_list(:project, 3, :legacy_storage) }
end
end
describe 'gitlab:storage:list_legacy_projects' do
it_behaves_like 'rake listing entities', 'projects', 'Legacy' do
let(:task) { 'gitlab:storage:list_legacy_projects' }
- let(:create_collection) { create_list(:project, 3, storage_version: 0) }
+ let(:create_collection) { create_list(:project, 3, :legacy_storage) }
end
end
@@ -133,7 +160,7 @@ describe 'gitlab:storage:*' do
describe 'gitlab:storage:hashed_attachments' do
it_behaves_like 'rake entities summary', 'attachments', 'Hashed' do
let(:task) { 'gitlab:storage:hashed_attachments' }
- let(:project) { create(:project, storage_version: 2) }
+ let(:project) { create(:project) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
@@ -141,7 +168,7 @@ describe 'gitlab:storage:*' do
describe 'gitlab:storage:list_hashed_attachments' do
it_behaves_like 'rake listing entities', 'attachments', 'Hashed' do
let(:task) { 'gitlab:storage:list_hashed_attachments' }
- let(:project) { create(:project, storage_version: 2) }
+ let(:project) { create(:project) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
diff --git a/spec/workers/storage_migrator_worker_spec.rb b/spec/workers/storage_migrator_worker_spec.rb
index ff625164142..815432aacce 100644
--- a/spec/workers/storage_migrator_worker_spec.rb
+++ b/spec/workers/storage_migrator_worker_spec.rb
@@ -2,29 +2,24 @@ require 'spec_helper'
describe StorageMigratorWorker do
subject(:worker) { described_class.new }
- let(:projects) { create_list(:project, 2, :legacy_storage) }
+ let(:projects) { create_list(:project, 2, :legacy_storage, :empty_repo) }
+ let(:ids) { projects.map(&:id) }
describe '#perform' do
- let(:ids) { projects.map(&:id) }
+ it 'delegates to MigratorService' do
+ expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(5, 10)
- it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do
- expect(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice
-
- worker.perform(ids.min, ids.max)
+ worker.perform(5, 10)
end
- it 'sets projects as read only' do
- allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice
- worker.perform(ids.min, ids.max)
+ it 'migrates projects in the specified range' do
+ Sidekiq::Testing.inline! do
+ worker.perform(ids.min, ids.max)
+ end
projects.each do |project|
- expect(project.reload.repository_read_only?).to be_truthy
+ expect(project.reload.hashed_storage?(:attachments)).to be_truthy
end
end
-
- it 'rescues and log exceptions' do
- allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
- expect { worker.perform(ids.min, ids.max) }.not_to raise_error
- end
end
end