diff options
author | Grzegorz Bizon <grzesiek.bizon@gmail.com> | 2017-07-05 10:38:06 +0200 |
---|---|---|
committer | Grzegorz Bizon <grzesiek.bizon@gmail.com> | 2017-07-05 10:38:06 +0200 |
commit | c5ede858eab81e662c48761749ff2fa22dbfa9df (patch) | |
tree | 3118abb851c551c9bcac5949b01ccce5f8d2abcc | |
parent | c5f1e1a70bd79b36fe8cfda75b7366dd8ee90d66 (diff) | |
parent | f6966d96ec5941db364a2c8d9d2d80d3aa7d20f2 (diff) | |
download | gitlab-ce-c5ede858eab81e662c48761749ff2fa22dbfa9df.tar.gz |
Merge branch 'fix/gb/stage-id-reference-background-migration' into backstage/gb/migrate-stages-statuses
* fix/gb/stage-id-reference-background-migration: (22 commits)
Reduce a delay between stage_id scheduled migrations
Improve exception description in bg migrations
Do not override original AR5 batching interface
Sanitize id value passed to async background migration
Improve code examples in background migrations docs
Add description to exception in bg migrations worker
Do not compare float with integer in migrations specs
Improve readability of build stage id migration query
Use integers to schedule delayed background migrations
Test if argument passed to a migration is present
Make `inline` a default sidekiq testing processing again
Improve specs for background stage_id ref migration
Perform stage_id ref backgound migration in bulks
Remove unused background migrations matcher
Use ActiveRecord 5 batching to schedule bg migration
Make it possible to schedule bg migrations in bulk
Add specs for delayed stage_id background migrations
Schedule background migration only when it is needed
Find builds that require a migration in batches
Update `db/schema.rb` with a new schema version
...
7 files changed, 185 insertions, 7 deletions
diff --git a/app/workers/background_migration_worker.rb b/app/workers/background_migration_worker.rb index e85e221d353..45ce49bb5c0 100644 --- a/app/workers/background_migration_worker.rb +++ b/app/workers/background_migration_worker.rb @@ -2,18 +2,34 @@ class BackgroundMigrationWorker include Sidekiq::Worker include DedicatedSidekiqQueue - # Schedules a number of jobs in bulk + # Enqueues a number of jobs in bulk. # # The `jobs` argument should be an Array of Arrays, each sub-array must be in # the form: # # [migration-class, [arg1, arg2, ...]] - def self.perform_bulk(*jobs) + def self.perform_bulk(jobs) Sidekiq::Client.push_bulk('class' => self, 'queue' => sidekiq_options['queue'], 'args' => jobs) end + # Schedules multiple jobs in bulk, with a delay. + # + def self.perform_bulk_in(delay, jobs) + now = Time.now.to_i + schedule = now + delay.to_i + + if schedule <= now + raise ArgumentError, 'The schedule time must be in the future!' + end + + Sidekiq::Client.push_bulk('class' => self, + 'queue' => sidekiq_options['queue'], + 'args' => jobs, + 'at' => schedule) + end + # Performs the background migration. # # See Gitlab::BackgroundMigration.perform for more information. diff --git a/db/post_migrate/20170628080858_migrate_stage_id_reference_in_background.rb b/db/post_migrate/20170628080858_migrate_stage_id_reference_in_background.rb new file mode 100644 index 00000000000..ebec4cb6bb7 --- /dev/null +++ b/db/post_migrate/20170628080858_migrate_stage_id_reference_in_background.rb @@ -0,0 +1,29 @@ +class MigrateStageIdReferenceInBackground < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 10000 + MIGRATION = 'MigrateBuildStageIdReference'.freeze + + disable_ddl_transaction! + + class Build < ActiveRecord::Base + self.table_name = 'ci_builds' + end + + def up + index = 1 + + Build.where(stage_id: nil).in_batches(of: BATCH_SIZE) do |relation| + jobs = relation.pluck(:id).map { |id| [MIGRATION, [id]] } + schedule = index * 2.minutes + index += 1 + + BackgroundMigrationWorker.perform_bulk_in(schedule, jobs) + end + end + + def down + # noop + end +end diff --git a/doc/development/background_migrations.md b/doc/development/background_migrations.md index 0239e6b3163..72a34aa7de9 100644 --- a/doc/development/background_migrations.md +++ b/doc/development/background_migrations.md @@ -50,14 +50,13 @@ your migration: BackgroundMigrationWorker.perform_async('BackgroundMigrationClassName', [arg1, arg2, ...]) ``` -Usually it's better to schedule jobs in bulk, for this you can use +Usually it's better to enqueue jobs in bulk, for this you can use `BackgroundMigrationWorker.perform_bulk`: ```ruby BackgroundMigrationWorker.perform_bulk( - ['BackgroundMigrationClassName', [1]], - ['BackgroundMigrationClassName', [2]], - ... + [['BackgroundMigrationClassName', [1]], + ['BackgroundMigrationClassName', [2]]] ) ``` @@ -68,6 +67,16 @@ consuming migrations it's best to schedule a background job using an updates. Removals in turn can be handled by simply defining foreign keys with cascading deletes. +If you would like to schedule jobs in bulk with a delay, you can use +`BackgroundMigrationWorker.perform_bulk_in`: + +```ruby +jobs = [['BackgroundMigrationClassName', [1]], + ['BackgroundMigrationClassName', [2]]] + +BackgroundMigrationWorker.perform_bulk_in(5.minutes, jobs) +``` + ## Cleaning Up Because background migrations can take a long time you can't immediately clean diff --git a/lib/gitlab/background_migration/migrate_build_stage_id_reference.rb b/lib/gitlab/background_migration/migrate_build_stage_id_reference.rb new file mode 100644 index 00000000000..d1d0a968588 --- /dev/null +++ b/lib/gitlab/background_migration/migrate_build_stage_id_reference.rb @@ -0,0 +1,19 @@ +module Gitlab + module BackgroundMigration + class MigrateBuildStageIdReference + def perform(id) + sql = <<-SQL.strip_heredoc + UPDATE "ci_builds" + SET "stage_id" = + (SELECT id FROM ci_stages + WHERE ci_stages.pipeline_id = ci_builds.commit_id + AND ci_stages.name = ci_builds.stage) + WHERE "ci_builds"."id" = #{id.to_i} + AND "ci_builds"."stage_id" IS NULL + SQL + + ActiveRecord::Base.connection.execute(sql) + end + end + end +end diff --git a/spec/migrations/migrate_stage_id_reference_in_background_spec.rb b/spec/migrations/migrate_stage_id_reference_in_background_spec.rb new file mode 100644 index 00000000000..a32a7fceb68 --- /dev/null +++ b/spec/migrations/migrate_stage_id_reference_in_background_spec.rb @@ -0,0 +1,68 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170628080858_migrate_stage_id_reference_in_background') + +describe MigrateStageIdReferenceInBackground, :migration, :sidekiq do + matcher :be_scheduled_migration do |delay, *expected| + match do |migration| + BackgroundMigrationWorker.jobs.any? do |job| + job['args'] == [migration, expected] && + job['at'].to_i == (delay.to_i + Time.now.to_i) + end + end + + failure_message do |migration| + "Migration `#{migration}` with args `#{expected.inspect}` not scheduled!" + end + end + + let(:jobs) { table(:ci_builds) } + let(:stages) { table(:ci_stages) } + let(:pipelines) { table(:ci_pipelines) } + let(:projects) { table(:projects) } + + before do + stub_const('MigrateStageIdReferenceInBackground::BATCH_SIZE', 2) + + projects.create!(id: 123, name: 'gitlab1', path: 'gitlab1') + projects.create!(id: 345, name: 'gitlab2', path: 'gitlab2') + + pipelines.create!(id: 1, project_id: 123, ref: 'master', sha: 'adf43c3a') + pipelines.create!(id: 2, project_id: 345, ref: 'feature', sha: 'cdf43c3c') + + jobs.create!(id: 1, commit_id: 1, project_id: 123, stage_idx: 2, stage: 'build') + jobs.create!(id: 2, commit_id: 1, project_id: 123, stage_idx: 2, stage: 'build') + jobs.create!(id: 3, commit_id: 1, project_id: 123, stage_idx: 1, stage: 'test') + jobs.create!(id: 4, commit_id: 1, project_id: 123, stage_idx: 3, stage: 'deploy') + jobs.create!(id: 5, commit_id: 2, project_id: 345, stage_idx: 1, stage: 'test') + + stages.create(id: 101, pipeline_id: 1, project_id: 123, name: 'test') + stages.create(id: 102, pipeline_id: 1, project_id: 123, name: 'build') + stages.create(id: 103, pipeline_id: 1, project_id: 123, name: 'deploy') + + jobs.create!(id: 6, commit_id: 2, project_id: 345, stage_id: 101, stage_idx: 1, stage: 'test') + end + + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(described_class::MIGRATION).to be_scheduled_migration(2.minutes, 1) + expect(described_class::MIGRATION).to be_scheduled_migration(2.minutes, 2) + expect(described_class::MIGRATION).to be_scheduled_migration(4.minutes, 3) + expect(described_class::MIGRATION).to be_scheduled_migration(4.minutes, 4) + expect(BackgroundMigrationWorker.jobs.size).to eq 5 + end + end + end + + it 'schedules background migrations' do + Sidekiq::Testing.inline! do + expect(jobs.where(stage_id: nil).count).to eq 5 + + migrate! + + expect(jobs.where(stage_id: nil).count).to eq 1 + end + end +end diff --git a/spec/support/sidekiq.rb b/spec/support/sidekiq.rb index 575d3451150..5478fea4e64 100644 --- a/spec/support/sidekiq.rb +++ b/spec/support/sidekiq.rb @@ -3,3 +3,9 @@ require 'sidekiq/testing/inline' Sidekiq::Testing.server_middleware do |chain| chain.add Gitlab::SidekiqStatus::ServerMiddleware end + +RSpec.configure do |config| + config.after(:each, :sidekiq) do + Sidekiq::Worker.clear_all + end +end diff --git a/spec/workers/background_migration_worker_spec.rb b/spec/workers/background_migration_worker_spec.rb index 85939429feb..4f6e3474634 100644 --- a/spec/workers/background_migration_worker_spec.rb +++ b/spec/workers/background_migration_worker_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe BackgroundMigrationWorker do +describe BackgroundMigrationWorker, :sidekiq do describe '.perform' do it 'performs a background migration' do expect(Gitlab::BackgroundMigration) @@ -10,4 +10,35 @@ describe BackgroundMigrationWorker do described_class.new.perform('Foo', [10, 20]) end end + + describe '.perform_bulk' do + it 'enqueues background migrations in bulk' do + Sidekiq::Testing.fake! do + described_class.perform_bulk([['Foo', [1]], ['Foo', [2]]]) + + expect(described_class.jobs.count).to eq 2 + expect(described_class.jobs).to all(include('enqueued_at')) + end + end + end + + describe '.perform_bulk_in' do + context 'when delay is valid' do + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + described_class.perform_bulk_in(1.minute, [['Foo', [1]], ['Foo', [2]]]) + + expect(described_class.jobs.count).to eq 2 + expect(described_class.jobs).to all(include('at')) + end + end + end + + context 'when delay is invalid' do + it 'raises an ArgumentError exception' do + expect { described_class.perform_bulk_in(-60, [['Foo']]) } + .to raise_error(ArgumentError) + end + end + end end |