From ab130a21b9506fd43670b2547703b75c43801964 Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Mon, 18 Dec 2017 10:45:28 +0000 Subject: Don't migrate MR commits or diffs over 100 MB On the GitLab.com staging environment, we saw this migration fail when run on a smaller node, because there was a merge request with diff text over 700 MB in size. The production environment had already processed this just fine, but to give the clean-up migration a chance to finish, any MR diff rows with commits or diffs greater than 100 MB (in their serialised Yaml form) will not be migrated. This is data loss, but the risk is hopefully small: 1. Many instances will have already completed the background migration (like GitLab.com). 2. Most other instances should not have rows matching these criteria - this seems like an extreme case. We also reduce the number of rows we process at once, to further reduce the memory required for processing a batch. --- ...lean_up_from_merge_request_diffs_and_commits.rb | 2 +- .../deserialize_merge_request_diffs_and_commits.rb | 3 +++ ...rialize_merge_request_diffs_and_commits_spec.rb | 23 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb b/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb index 30cf08b29fc..f7453958a16 100644 --- a/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb +++ b/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb @@ -24,7 +24,7 @@ class CleanUpFromMergeRequestDiffsAndCommits < ActiveRecord::Migration (st_diffs IS NOT NULL AND st_diffs != #{literal_prefix}'--- []\n') ".squish - MergeRequestDiff.where(non_empty).each_batch(of: 500) do |relation, index| + MergeRequestDiff.where(non_empty).each_batch(of: 100) do |relation, index| range = relation.pluck('MIN(id)', 'MAX(id)').first Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits.new.perform(*range) diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb index fd5cbf76e47..4f0a9b85deb 100644 --- a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb +++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb @@ -21,11 +21,14 @@ module Gitlab BUFFER_ROWS = 1000 DIFF_FILE_BUFFER_ROWS = 100 + MAX_ROW_SIZE = 100.megabytes def perform(start_id, stop_id) merge_request_diffs = MergeRequestDiff .select(:id, :st_commits, :st_diffs) .where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') + .where("length(coalesce(st_commits, '')) <= #{MAX_ROW_SIZE}") + .where("length(coalesce(st_diffs, '')) <= #{MAX_ROW_SIZE}") .where(id: start_id..stop_id) reset_buffers! diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb index 84d9e635810..6a8802f5173 100644 --- a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb +++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb @@ -3,6 +3,7 @@ require 'spec_helper' describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate, :migration, schema: 20171114162227 do let(:merge_request_diffs) { table(:merge_request_diffs) } let(:merge_requests) { table(:merge_requests) } + let(:too_large_string) { 'a' * described_class::MAX_ROW_SIZE.succ } describe '#perform' do let(:project) { create(:project, :repository) } @@ -67,6 +68,28 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t end end + context 'when the merge request commits are over 100 MB' do + before do + merge_request_diff.update(st_commits: too_large_string) + end + + it 'does not process any rows' do + expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } + .not_to change { merge_request_diff.reload.st_commits } + end + end + + context 'when the merge request diffs are over 100 MB' do + before do + merge_request_diff.update(st_diffs: too_large_string) + end + + it 'does not process any rows' do + expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } + .not_to change { merge_request_diff.reload.st_diffs } + end + end + context 'processing multiple merge request diffs' do let(:start_id) { described_class::MergeRequestDiff.minimum(:id) } let(:stop_id) { described_class::MergeRequestDiff.maximum(:id) } -- cgit v1.2.1