summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@gitlab.com>2017-09-26 15:09:37 +0100
committerSean McGivern <sean@gitlab.com>2017-09-29 11:56:08 +0100
commit1507ff8ab7d6f59b58978ad7385cb74603002409 (patch)
tree83db5d4f422480092192938b97df581862af109f
parent917194153f066556e0ebff92f9c1486144948535 (diff)
downloadgitlab-ce-1507ff8ab7d6f59b58978ad7385cb74603002409.tar.gz
Make MR diff background migration less likely to time out
This version does not use transactions, but individual statements. As we have unique constraints on the target tables for the inserts, we can just ignore uniqueness violations there (as long as we always insert the same batch size, in the same order). This means the spec now must use truncation, not a transaction, as the uniqueness violation means that the whole transaction for that spec would be invalid, which isn't what we'd want. In real-world use, this isn't run in a transaction anyway. This commit also wraps unhandled exceptions, for easier finding in Sentry, and logs with a consistent format, for easier searching.
-rw-r--r--lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb34
-rw-r--r--spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb98
2 files changed, 101 insertions, 31 deletions
diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
index 6f1fd0a333d..8e5c95f2287 100644
--- a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
+++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
@@ -3,6 +3,12 @@ module Gitlab
class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows
+ class Error < StandardError
+ def backtrace
+ cause.backtrace
+ end
+ end
+
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
@@ -34,6 +40,10 @@ module Gitlab
end
flush_buffers!
+ rescue => e
+ Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
+
+ raise Error.new(e.inspect)
end
private
@@ -46,22 +56,28 @@ module Gitlab
def flush_buffers!
if diff_ids.any?
- MergeRequestDiff.transaction do
- commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
- Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice)
- end
-
- file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
- Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice)
- end
+ commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
+ bulk_insert('merge_request_diff_commits', commit_rows_slice)
+ end
- MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
+ file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
+ bulk_insert('merge_request_diff_files', file_rows_slice)
end
+
+ MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end
reset_buffers!
end
+ def bulk_insert(table, rows)
+ Gitlab::Database.bulk_insert(table, rows)
+ rescue ActiveRecord::RecordNotUnique
+ ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
+
+ Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
+ end
+
def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue []
diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
index 117a6f8a5bb..d2e7243ee05 100644
--- a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
+++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
@@ -1,9 +1,9 @@
require 'spec_helper'
-describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
+describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe '#perform' do
- set(:merge_request) { create(:merge_request) }
- set(:merge_request_diff) { merge_request.merge_request_diff }
+ let(:merge_request) { create(:merge_request) }
+ let(:merge_request_diff) { merge_request.merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs)
@@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
end
- it 'updates and continues' do
- expect(described_class::MergeRequestDiff).to receive(:transaction).twice
-
- subject.perform(start_id, stop_id)
- end
-
it 'inserts commit rows in chunks of BUFFER_ROWS' do
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
stub_const("#{described_class}::BUFFER_ROWS", 20)
@@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is not exceeded' do
it 'only updates once' do
- expect(described_class::MergeRequestDiff).to receive(:transaction).once
+ expect(Gitlab::Database).to receive(:bulk_insert)
+ .with('merge_request_diff_commits', anything)
+ .once
+ .and_call_original
+
+ expect(Gitlab::Database).to receive(:bulk_insert)
+ .with('merge_request_diff_files', anything)
+ .once
+ .and_call_original
subject.perform(start_id, stop_id)
end
end
- end
- context 'when the merge request diff update fails' do
- before do
- allow(described_class::MergeRequestDiff)
- .to receive(:update_all).and_raise(ActiveRecord::Rollback)
- end
+ context 'when some rows were already inserted due to a previous failure' do
+ before do
+ subject.perform(start_id, stop_id)
- it 'does not add any diff commits' do
- expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
- .not_to change { MergeRequestDiffCommit.count }
+ convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
+ convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
+ end
+
+ it 'does not raise' do
+ expect { subject.perform(start_id, stop_id) }.not_to raise_exception
+ end
+
+ it 'logs a message' do
+ expect(Rails.logger).to receive(:info)
+ .with(
+ a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
+ )
+ .twice
+
+ subject.perform(start_id, stop_id)
+ end
+
+ it 'ends up with the correct rows' do
+ expect(updated_merge_request_diff.commits.count).to eq(29)
+ expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
+ end
end
- it 'does not add any diff files' do
- expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
- .not_to change { MergeRequestDiffFile.count }
+ context 'when the merge request diff update fails' do
+ let(:exception) { ActiveRecord::RecordNotFound }
+
+ let(:perform_ignoring_exceptions) do
+ begin
+ subject.perform(start_id, stop_id)
+ rescue described_class::Error
+ end
+ end
+
+ before do
+ allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
+ .to receive(:update_all).and_raise(exception)
+ end
+
+ it 'raises an error' do
+ expect { subject.perform(start_id, stop_id) }
+ .to raise_exception(described_class::Error)
+ end
+
+ it 'logs the error' do
+ expect(Rails.logger).to receive(:info).with(
+ a_string_matching(described_class.name)
+ .and(matching([start_id, stop_id].inspect))
+ .and(matching(exception.name))
+ )
+
+ perform_ignoring_exceptions
+ end
+
+ it 'still adds diff commits' do
+ expect { perform_ignoring_exceptions }
+ .to change { MergeRequestDiffCommit.count }
+ end
+
+ it 'still adds diff files' do
+ expect { perform_ignoring_exceptions }
+ .to change { MergeRequestDiffFile.count }
+ end
end
end