summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRémy Coutable <remy@rymai.me>2017-09-29 13:43:28 +0000
committerRémy Coutable <remy@rymai.me>2017-09-29 13:43:28 +0000
commit05e9c9f77e01d2a2761c335f05ca612185189ac0 (patch)
treec717d3988e6c0ab8117f105577e1fafb01c22c5c
parent66775a80833a7acd0a8e5d05327647940e4ed286 (diff)
parent472be7fe611b726c065cc384259b75bb894ce19b (diff)
downloadgitlab-ce-05e9c9f77e01d2a2761c335f05ca612185189ac0.tar.gz
Merge branch '36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout' into 'master'
Insert at most 1,000 rows at once in MR diff background migration Closes #36631 et #37505 See merge request gitlab-org/gitlab-ce!13661
-rw-r--r--changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml6
-rw-r--r--db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb32
-rw-r--r--lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb32
-rw-r--r--spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb122
-rw-r--r--spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb59
5 files changed, 226 insertions, 25 deletions
diff --git a/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml b/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml
new file mode 100644
index 00000000000..a2e1d07158b
--- /dev/null
+++ b/changelogs/unreleased/36631-activerecord-statementinvalid-pg-querycanceled-error-canceling-statement-due-to-statement-timeout.yml
@@ -0,0 +1,6 @@
+---
+title: Reschedule merge request diff background migrations to catch failures from
+ 9.5 run
+merge_request:
+author:
+type: fixed
diff --git a/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb b/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb
new file mode 100644
index 00000000000..5732cb85ea5
--- /dev/null
+++ b/db/post_migrate/20170926150348_schedule_merge_request_diff_migrations_take_two.rb
@@ -0,0 +1,32 @@
+class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 500
+ MIGRATION = 'DeserializeMergeRequestDiffsAndCommits'
+ DELAY_INTERVAL = 10.minutes
+
+ disable_ddl_transaction!
+
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+
+ include ::EachBatch
+
+ default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') }
+ end
+
+ # By this point, we assume ScheduleMergeRequestDiffMigrations - the first
+ # version of this - has already run. On GitLab.com, we have ~220k un-migrated
+ # rows, but these rows will, in general, take a long time.
+ #
+ # With a gap of 10 minutes per batch, and 500 rows per batch, these migrations
+ # are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over
+ # three days.
+ def up
+ queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
+ end
+
+ def down
+ end
+end
diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
index 3fde1b09efb..8e5c95f2287 100644
--- a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
+++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
@@ -3,11 +3,18 @@ module Gitlab
class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows
+ class Error < StandardError
+ def backtrace
+ cause.backtrace
+ end
+ end
+
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
BUFFER_ROWS = 1000
+ DIFF_FILE_BUFFER_ROWS = 100
def perform(start_id, stop_id)
merge_request_diffs = MergeRequestDiff
@@ -26,13 +33,17 @@ module Gitlab
if diff_ids.length > BUFFER_ROWS ||
commit_rows.length > BUFFER_ROWS ||
- file_rows.length > BUFFER_ROWS
+ file_rows.length > DIFF_FILE_BUFFER_ROWS
flush_buffers!
end
end
flush_buffers!
+ rescue => e
+ Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
+
+ raise Error.new(e.inspect)
end
private
@@ -45,17 +56,28 @@ module Gitlab
def flush_buffers!
if diff_ids.any?
- MergeRequestDiff.transaction do
- Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows)
- Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows)
+ commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
+ bulk_insert('merge_request_diff_commits', commit_rows_slice)
+ end
- MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
+ file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
+ bulk_insert('merge_request_diff_files', file_rows_slice)
end
+
+ MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end
reset_buffers!
end
+ def bulk_insert(table, rows)
+ Gitlab::Database.bulk_insert(table, rows)
+ rescue ActiveRecord::RecordNotUnique
+ ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
+
+ Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
+ end
+
def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue []
diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
index c0427639746..d2e7243ee05 100644
--- a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
+++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
@@ -1,9 +1,9 @@
require 'spec_helper'
-describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
+describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe '#perform' do
- set(:merge_request) { create(:merge_request) }
- set(:merge_request_diff) { merge_request.merge_request_diff }
+ let(:merge_request) { create(:merge_request) }
+ let(:merge_request_diff) { merge_request.merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs)
@@ -70,8 +70,8 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
before do
merge_request.reload_diff(true)
- convert_to_yaml(start_id, merge_request_diff.commits, merge_request_diff.diffs)
- convert_to_yaml(stop_id, updated_merge_request_diff.commits, updated_merge_request_diff.diffs)
+ convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
+ convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
MergeRequestDiffCommit.delete_all
MergeRequestDiffFile.delete_all
@@ -80,10 +80,32 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is exceeded' do
before do
stub_const("#{described_class}::BUFFER_ROWS", 1)
+
+ allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
+ end
+
+ it 'inserts commit rows in chunks of BUFFER_ROWS' do
+ # There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
+ stub_const("#{described_class}::BUFFER_ROWS", 20)
+
+ expect(Gitlab::Database).to receive(:bulk_insert)
+ .with('merge_request_diff_commits', anything)
+ .exactly(4)
+ .times
+ .and_call_original
+
+ subject.perform(start_id, stop_id)
end
- it 'updates and continues' do
- expect(described_class::MergeRequestDiff).to receive(:transaction).twice
+ it 'inserts diff rows in chunks of DIFF_FILE_BUFFER_ROWS' do
+ # There are 20 files in each diff, so we should have slices of 20 + 20.
+ stub_const("#{described_class}::DIFF_FILE_BUFFER_ROWS", 20)
+
+ expect(Gitlab::Database).to receive(:bulk_insert)
+ .with('merge_request_diff_files', anything)
+ .exactly(2)
+ .times
+ .and_call_original
subject.perform(start_id, stop_id)
end
@@ -91,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is not exceeded' do
it 'only updates once' do
- expect(described_class::MergeRequestDiff).to receive(:transaction).once
+ expect(Gitlab::Database).to receive(:bulk_insert)
+ .with('merge_request_diff_commits', anything)
+ .once
+ .and_call_original
+
+ expect(Gitlab::Database).to receive(:bulk_insert)
+ .with('merge_request_diff_files', anything)
+ .once
+ .and_call_original
subject.perform(start_id, stop_id)
end
end
- end
- context 'when the merge request diff update fails' do
- before do
- allow(described_class::MergeRequestDiff)
- .to receive(:update_all).and_raise(ActiveRecord::Rollback)
- end
+ context 'when some rows were already inserted due to a previous failure' do
+ before do
+ subject.perform(start_id, stop_id)
- it 'does not add any diff commits' do
- expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
- .not_to change { MergeRequestDiffCommit.count }
+ convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
+ convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
+ end
+
+ it 'does not raise' do
+ expect { subject.perform(start_id, stop_id) }.not_to raise_exception
+ end
+
+ it 'logs a message' do
+ expect(Rails.logger).to receive(:info)
+ .with(
+ a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
+ )
+ .twice
+
+ subject.perform(start_id, stop_id)
+ end
+
+ it 'ends up with the correct rows' do
+ expect(updated_merge_request_diff.commits.count).to eq(29)
+ expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
+ end
end
- it 'does not add any diff files' do
- expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
- .not_to change { MergeRequestDiffFile.count }
+ context 'when the merge request diff update fails' do
+ let(:exception) { ActiveRecord::RecordNotFound }
+
+ let(:perform_ignoring_exceptions) do
+ begin
+ subject.perform(start_id, stop_id)
+ rescue described_class::Error
+ end
+ end
+
+ before do
+ allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
+ .to receive(:update_all).and_raise(exception)
+ end
+
+ it 'raises an error' do
+ expect { subject.perform(start_id, stop_id) }
+ .to raise_exception(described_class::Error)
+ end
+
+ it 'logs the error' do
+ expect(Rails.logger).to receive(:info).with(
+ a_string_matching(described_class.name)
+ .and(matching([start_id, stop_id].inspect))
+ .and(matching(exception.name))
+ )
+
+ perform_ignoring_exceptions
+ end
+
+ it 'still adds diff commits' do
+ expect { perform_ignoring_exceptions }
+ .to change { MergeRequestDiffCommit.count }
+ end
+
+ it 'still adds diff files' do
+ expect { perform_ignoring_exceptions }
+ .to change { MergeRequestDiffFile.count }
+ end
end
end
diff --git a/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb b/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb
new file mode 100644
index 00000000000..4ab1bb67058
--- /dev/null
+++ b/spec/migrations/schedule_merge_request_diff_migrations_take_two_spec.rb
@@ -0,0 +1,59 @@
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two')
+
+describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do
+ matcher :be_scheduled_migration do |time, *expected|
+ match do |migration|
+ BackgroundMigrationWorker.jobs.any? do |job|
+ job['args'] == [migration, expected] &&
+ job['at'].to_i == time.to_i
+ end
+ end
+
+ failure_message do |migration|
+ "Migration `#{migration}` with args `#{expected.inspect}` not scheduled!"
+ end
+ end
+
+ let(:merge_request_diffs) { table(:merge_request_diffs) }
+ let(:merge_requests) { table(:merge_requests) }
+ let(:projects) { table(:projects) }
+
+ before do
+ stub_const("#{described_class.name}::BATCH_SIZE", 1)
+
+ projects.create!(id: 1, name: 'gitlab', path: 'gitlab')
+
+ merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master')
+
+ merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil)
+ merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([]))
+ merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil)
+ merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([]))
+ end
+
+ it 'correctly schedules background migrations' do
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ migrate!
+
+ expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1)
+ expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2)
+ expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4)
+ expect(BackgroundMigrationWorker.jobs.size).to eq 3
+ end
+ end
+ end
+
+ it 'migrates the data' do
+ Sidekiq::Testing.inline! do
+ non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL'
+
+ expect(merge_request_diffs.where(non_empty).count).to eq 3
+
+ migrate!
+
+ expect(merge_request_diffs.where(non_empty).count).to eq 0
+ end
+ end
+end