summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@gitlab.com>2017-12-18 10:45:28 +0000
committerSean McGivern <sean@gitlab.com>2017-12-18 11:23:22 +0000
commitab130a21b9506fd43670b2547703b75c43801964 (patch)
tree8d0866bb4cc514eeb0d6d5f337a744711cbc1e4a
parent627a96875ee68e37b45192af3121f09032ea4bbf (diff)
downloadgitlab-ce-set-max-size-for-mr-background-migration.tar.gz
Don't migrate MR commits or diffs over 100 MBset-max-size-for-mr-background-migration
On the GitLab.com staging environment, we saw this migration fail when run on a smaller node, because there was a merge request with diff text over 700 MB in size. The production environment had already processed this just fine, but to give the clean-up migration a chance to finish, any MR diff rows with commits or diffs greater than 100 MB (in their serialised Yaml form) will not be migrated. This is data loss, but the risk is hopefully small: 1. Many instances will have already completed the background migration (like GitLab.com). 2. Most other instances should not have rows matching these criteria - this seems like an extreme case. We also reduce the number of rows we process at once, to further reduce the memory required for processing a batch.
-rw-r--r--db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb2
-rw-r--r--lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb3
-rw-r--r--spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb23
3 files changed, 27 insertions, 1 deletions
diff --git a/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb b/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb
index 30cf08b29fc..f7453958a16 100644
--- a/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb
+++ b/db/migrate/20171121135738_clean_up_from_merge_request_diffs_and_commits.rb
@@ -24,7 +24,7 @@ class CleanUpFromMergeRequestDiffsAndCommits < ActiveRecord::Migration
(st_diffs IS NOT NULL AND st_diffs != #{literal_prefix}'--- []\n')
".squish
- MergeRequestDiff.where(non_empty).each_batch(of: 500) do |relation, index|
+ MergeRequestDiff.where(non_empty).each_batch(of: 100) do |relation, index|
range = relation.pluck('MIN(id)', 'MAX(id)').first
Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits.new.perform(*range)
diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
index fd5cbf76e47..4f0a9b85deb 100644
--- a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
+++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
@@ -21,11 +21,14 @@ module Gitlab
BUFFER_ROWS = 1000
DIFF_FILE_BUFFER_ROWS = 100
+ MAX_ROW_SIZE = 100.megabytes
def perform(start_id, stop_id)
merge_request_diffs = MergeRequestDiff
.select(:id, :st_commits, :st_diffs)
.where('st_commits IS NOT NULL OR st_diffs IS NOT NULL')
+ .where("length(coalesce(st_commits, '')) <= #{MAX_ROW_SIZE}")
+ .where("length(coalesce(st_diffs, '')) <= #{MAX_ROW_SIZE}")
.where(id: start_id..stop_id)
reset_buffers!
diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
index 84d9e635810..6a8802f5173 100644
--- a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
+++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
@@ -3,6 +3,7 @@ require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate, :migration, schema: 20171114162227 do
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
+ let(:too_large_string) { 'a' * described_class::MAX_ROW_SIZE.succ }
describe '#perform' do
let(:project) { create(:project, :repository) }
@@ -67,6 +68,28 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
end
end
+ context 'when the merge request commits are over 100 MB' do
+ before do
+ merge_request_diff.update(st_commits: too_large_string)
+ end
+
+ it 'does not process any rows' do
+ expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
+ .not_to change { merge_request_diff.reload.st_commits }
+ end
+ end
+
+ context 'when the merge request diffs are over 100 MB' do
+ before do
+ merge_request_diff.update(st_diffs: too_large_string)
+ end
+
+ it 'does not process any rows' do
+ expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
+ .not_to change { merge_request_diff.reload.st_diffs }
+ end
+ end
+
context 'processing multiple merge request diffs' do
let(:start_id) { described_class::MergeRequestDiff.minimum(:id) }
let(:stop_id) { described_class::MergeRequestDiff.maximum(:id) }