summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@gitlab.com>2017-09-26 15:26:26 +0100
committerSean McGivern <sean@gitlab.com>2017-09-29 11:56:08 +0100
commit472be7fe611b726c065cc384259b75bb894ce19b (patch)
treeb48e36a2e435da55bf07b4f16363009deabe230f
parent1507ff8ab7d6f59b58978ad7385cb74603002409 (diff)
downloadgitlab-ce-472be7fe611b726c065cc384259b75bb894ce19b.tar.gz
The first attempt didn't migrate all rows on GitLab.com, due to a couple of issues: 1. Some rows in merge_request_diffs had truly huge numbers of commits and diffs serialised - one in particular had 26,000 commits! 2. The jobs were sometimes on Sidekiq hosts with frequent OOM errors, leading to the job being lost. The previous commit adds more logging, and a more robust insertion method. This commit reschedules the jobs, with a generous pause between each.
-rw-r--r--changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml6
-rw-r--r--db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb32
-rw-r--r--spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb59
3 files changed, 97 insertions, 0 deletions
diff --git a/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml b/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml
new file mode 100644
index 00000000000..a2e1d07158b
--- /dev/null
+++ b/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml
@@ -0,0 +1,6 @@
+---
+title: Reschedule merge request diff background migrations to catch failures from
+ 9.5 run
+merge_request:
+author:
+type: fixed
diff --git a/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb b/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb
new file mode 100644
index 00000000000..5732cb85ea5
--- /dev/null
+++ b/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb
@@ -0,0 +1,32 @@
+class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 500
+ MIGRATION = 'DeserializeMergeRequestDiffsAndCommits'
+ DELAY_INTERVAL = 10.minutes
+
+ disable_ddl_transaction!
+
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+
+ include ::EachBatch
+
+ default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') }
+ end
+
+ # By this point, we assume ScheduleMergeRequestDiffMigrations - the first
+ # version of this - has already run. On GitLab.com, we have ~220k un-migrated
+ # rows, but these rows will, in general, take a long time.
+ #
+ # With a gap of 10 minutes per batch, and 500 rows per batch, these migrations
+ # are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over
+ # three days.
+ def up
+ queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
+ end
+
+ def down
+ end
+end
diff --git a/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb b/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb
new file mode 100644
index 00000000000..4ab1bb67058
--- /dev/null
+++ b/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb
@@ -0,0 +1,59 @@
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two')
+
+describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do
+ matcher :be_scheduled_migration do |time, *expected|
+ match do |migration|
+ BackgroundMigrationWorker.jobs.any? do |job|
+ job['args'] == [migration, expected] &&
+ job['at'].to_i == time.to_i
+ end
+ end
+
+ failure_message do |migration|
+ "Migration `#{migration}` with args `#{expected.inspect}` not scheduled!"
+ end
+ end
+
+ let(:merge_request_diffs) { table(:merge_request_diffs) }
+ let(:merge_requests) { table(:merge_requests) }
+ let(:projects) { table(:projects) }
+
+ before do
+ stub_const("#{described_class.name}::BATCH_SIZE", 1)
+
+ projects.create!(id: 1, name: 'gitlab', path: 'gitlab')
+
+ merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master')
+
+ merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil)
+ merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([]))
+ merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil)
+ merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([]))
+ end
+
+ it 'correctly schedules background migrations' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ migrate!
+
+ expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1)
+ expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2)
+ expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4)
+ expect(BackgroundMigrationWorker.jobs.size).to eq 3
+ end
+ end
+ end
+
+ it 'migrates the data' do
+ Sidekiq::Testing.inline! do
+ non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL'
+
+ expect(merge_request_diffs.where(non_empty).count).to eq 3
+
+ migrate!
+
+ expect(merge_request_diffs.where(non_empty).count).to eq 0
+ end
+ end
+end