From 1507ff8ab7d6f59b58978ad7385cb74603002409 Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Tue, 26 Sep 2017 15:09:37 +0100 Subject: Make MR diff background migration less likely to time out This version does not use transactions, but individual statements. As we have unique constraints on the target tables for the inserts, we can just ignore uniqueness violations there (as long as we always insert the same batch size, in the same order). This means the spec now must use truncation, not a transaction, as the uniqueness violation means that the whole transaction for that spec would be invalid, which isn't what we'd want. In real-world use, this isn't run in a transaction anyway. This commit also wraps unhandled exceptions, for easier finding in Sentry, and logs with a consistent format, for easier searching. --- .../deserialize_merge_request_diffs_and_commits.rb | 34 ++++++-- ...rialize_merge_request_diffs_and_commits_spec.rb | 98 +++++++++++++++++----- 2 files changed, 101 insertions(+), 31 deletions(-) diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb index 6f1fd0a333d..8e5c95f2287 100644 --- a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb +++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb @@ -3,6 +3,12 @@ module Gitlab class DeserializeMergeRequestDiffsAndCommits attr_reader :diff_ids, :commit_rows, :file_rows + class Error < StandardError + def backtrace + cause.backtrace + end + end + class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' end @@ -34,6 +40,10 @@ module Gitlab end flush_buffers! + rescue => e + Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}") + + raise Error.new(e.inspect) end private @@ -46,22 +56,28 @@ module Gitlab def flush_buffers! if diff_ids.any? - MergeRequestDiff.transaction do - commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice| - Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice) - end - - file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice| - Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice) - end + commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice| + bulk_insert('merge_request_diff_commits', commit_rows_slice) + end - MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) + file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice| + bulk_insert('merge_request_diff_files', file_rows_slice) end + + MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) end reset_buffers! end + def bulk_insert(table, rows) + Gitlab::Database.bulk_insert(table, rows) + rescue ActiveRecord::RecordNotUnique + ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort + + Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}") + end + def single_diff_rows(merge_request_diff) sha_attribute = Gitlab::Database::ShaAttribute.new commits = YAML.load(merge_request_diff.st_commits) rescue [] diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb index 117a6f8a5bb..d2e7243ee05 100644 --- a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb +++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb @@ -1,9 +1,9 @@ require 'spec_helper' -describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do +describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do describe '#perform' do - set(:merge_request) { create(:merge_request) } - set(:merge_request_diff) { merge_request.merge_request_diff } + let(:merge_request) { create(:merge_request) } + let(:merge_request_diff) { merge_request.merge_request_diff } let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) } def diffs_to_hashes(diffs) @@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do allow(Gitlab::Database).to receive(:bulk_insert).and_call_original end - it 'updates and continues' do - expect(described_class::MergeRequestDiff).to receive(:transaction).twice - - subject.perform(start_id, stop_id) - end - it 'inserts commit rows in chunks of BUFFER_ROWS' do # There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9. stub_const("#{described_class}::BUFFER_ROWS", 20) @@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do context 'when BUFFER_ROWS is not exceeded' do it 'only updates once' do - expect(described_class::MergeRequestDiff).to receive(:transaction).once + expect(Gitlab::Database).to receive(:bulk_insert) + .with('merge_request_diff_commits', anything) + .once + .and_call_original + + expect(Gitlab::Database).to receive(:bulk_insert) + .with('merge_request_diff_files', anything) + .once + .and_call_original subject.perform(start_id, stop_id) end end - end - context 'when the merge request diff update fails' do - before do - allow(described_class::MergeRequestDiff) - .to receive(:update_all).and_raise(ActiveRecord::Rollback) - end + context 'when some rows were already inserted due to a previous failure' do + before do + subject.perform(start_id, stop_id) - it 'does not add any diff commits' do - expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } - .not_to change { MergeRequestDiffCommit.count } + convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files)) + convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files)) + end + + it 'does not raise' do + expect { subject.perform(start_id, stop_id) }.not_to raise_exception + end + + it 'logs a message' do + expect(Rails.logger).to receive(:info) + .with( + a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect)) + ) + .twice + + subject.perform(start_id, stop_id) + end + + it 'ends up with the correct rows' do + expect(updated_merge_request_diff.commits.count).to eq(29) + expect(updated_merge_request_diff.raw_diffs.count).to eq(20) + end end - it 'does not add any diff files' do - expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } - .not_to change { MergeRequestDiffFile.count } + context 'when the merge request diff update fails' do + let(:exception) { ActiveRecord::RecordNotFound } + + let(:perform_ignoring_exceptions) do + begin + subject.perform(start_id, stop_id) + rescue described_class::Error + end + end + + before do + allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation) + .to receive(:update_all).and_raise(exception) + end + + it 'raises an error' do + expect { subject.perform(start_id, stop_id) } + .to raise_exception(described_class::Error) + end + + it 'logs the error' do + expect(Rails.logger).to receive(:info).with( + a_string_matching(described_class.name) + .and(matching([start_id, stop_id].inspect)) + .and(matching(exception.name)) + ) + + perform_ignoring_exceptions + end + + it 'still adds diff commits' do + expect { perform_ignoring_exceptions } + .to change { MergeRequestDiffCommit.count } + end + + it 'still adds diff files' do + expect { perform_ignoring_exceptions } + .to change { MergeRequestDiffFile.count } + end end end -- cgit v1.2.1