summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@gitlab.com>2017-07-03 15:48:59 +0100
committerSean McGivern <sean@gitlab.com>2017-07-05 13:12:09 +0100
commit3729adfe3d5e0184f8bd43451fc78221101dfb56 (patch)
treee6399b9169ce0cbf697349d67396d69bb8e053d2
parent657bf16b959beefff58acdc546d5afde60708e6e (diff)
downloadgitlab-ce-merge-request-commits-table.tar.gz
Migrate MR commits and diffs to new tablesmerge-request-commits-table
Previously, we stored these as serialised fields - `st_{commits,diffs}` - on the `merge_request_diffs` table. These now have their own tables - `merge_request_diff_{commits,diffs}` - with a column for each attribute of the serialised data. Add a background migration to go through the existing MR diffs and migrate them to the new format. Ignore any contents that cannot be displayed.
-rw-r--r--app/models/merge_request_diff.rb6
-rw-r--r--db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb24
-rw-r--r--db/schema.rb2
-rw-r--r--lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb53
-rw-r--r--spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb117
5 files changed, 196 insertions, 6 deletions
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb
index 53f35818146..3ad3952d86a 100644
--- a/app/models/merge_request_diff.rb
+++ b/app/models/merge_request_diff.rb
@@ -85,11 +85,7 @@ class MergeRequestDiff < ActiveRecord::Base
def raw_diffs(options = {})
if options[:ignore_whitespace_change]
- @diffs_no_whitespace ||=
- Gitlab::Git::Compare.new(
- repository.raw_repository,
- safe_start_commit_sha,
- head_commit_sha).diffs(options)
+ @diffs_no_whitespace ||= compare.diffs(options)
else
@raw_diffs ||= {}
@raw_diffs[options] ||= load_diffs(options)
diff --git a/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb b/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb
new file mode 100644
index 00000000000..ae53e3e1ab3
--- /dev/null
+++ b/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb
@@ -0,0 +1,24 @@
+class ScheduleMergeRequestDiffMigrations < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+ end
+
+ def up
+ MergeRequestDiff.all.in_batches do |relation|
+ jobs = relation.pluck(:id).map do |id|
+ ['DeserializeMergeRequestDiffsAndCommits', [id]]
+ end
+
+ BackgroundMigrationWorker.perform_bulk(*jobs)
+ end
+ end
+
+ def down
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 4937f290d3e..b51e4942750 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20170623080805) do
+ActiveRecord::Schema.define(version: 20170703130158) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
new file mode 100644
index 00000000000..be9ee0cbf40
--- /dev/null
+++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb
@@ -0,0 +1,53 @@
+class Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits
+ class MergeRequestDiff < ActiveRecord::Base
+ self.table_name = 'merge_request_diffs'
+ end
+
+ def perform(merge_request_diff_id)
+ merge_request_diff = MergeRequestDiff
+ .select(:st_commits, :st_diffs)
+ .where('st_commits IS NOT NULL OR st_diffs IS NOT NULL')
+ .find_by(id: merge_request_diff_id)
+
+ return unless merge_request_diff
+
+ sha_attribute = Gitlab::Database::ShaAttribute.new
+ commits = YAML.load(merge_request_diff.st_commits)
+
+ commit_rows = commits.map.with_index do |commit, index|
+ commit_hash = commit.to_hash.except(:parent_ids)
+ sha = commit_hash.delete(:id)
+
+ commit_hash.merge(
+ merge_request_diff_id: merge_request_diff_id,
+ relative_order: index,
+ sha: sha_attribute.type_cast_for_database(sha)
+ )
+ end
+
+ diffs = YAML.load(merge_request_diff.st_diffs)
+ diffs = [] unless valid_raw_diffs?(diffs)
+
+ diff_rows = diffs.map.with_index do |diff, index|
+ diff.merge(
+ merge_request_diff_id: merge_request_diff_id,
+ relative_order: index
+ )
+ end
+
+ Gitlab::Database.bulk_insert('merge_request_diff_files', diff_rows)
+ Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows)
+
+ merge_request_diff.update(st_commits: nil, st_diffs: nil)
+ end
+
+ private
+
+ # Unlike MergeRequestDiff#valid_raw_diff?, don't count Rugged objects as
+ # valid, because we don't render them usefully anyway.
+ def valid_raw_diffs?(diffs)
+ return false unless diffs.respond_to?(:each)
+
+ diffs.all? { |diff| diff.is_a?(Hash) }
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
new file mode 100644
index 00000000000..4a78e03a2ae
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb
@@ -0,0 +1,117 @@
+require 'spec_helper'
+
+describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
+ describe '#perform' do
+ set(:merge_request) { create(:merge_request) }
+ set(:merge_request_diff) { merge_request.merge_request_diff }
+
+ def clear_instance_variables(object)
+ [:@commits, :@raw_diffs, :@diffs_from_database].each do |ivar|
+ if object.instance_variable_defined?(ivar)
+ object.remove_instance_variable(ivar)
+ end
+ end
+ end
+
+ def diffs_to_hashes(diffs)
+ diffs.as_json(only: Gitlab::Git::Diff::SERIALIZE_KEYS).map(&:with_indifferent_access)
+ end
+
+ def quote_yaml(value)
+ MergeRequestDiff.connection.quote(YAML.dump(value))
+ end
+
+ before do
+ clear_instance_variables(merge_request_diff)
+ end
+
+ shared_examples 'updated MR diff' do
+ before do
+ MergeRequestDiff.where(id: merge_request_diff.id).update_all(
+ "st_commits = #{quote_yaml(commits)}, st_diffs = #{quote_yaml(diffs)}"
+ )
+
+ MergeRequestDiffCommit.delete_all
+ MergeRequestDiffFile.delete_all
+
+ subject.perform(merge_request_diff.id)
+
+ merge_request_diff.merge_request_diff_commits.reload
+ clear_instance_variables(merge_request_diff)
+ end
+
+ it 'creates correct entries in the merge_request_diff_commits table' do
+ expect(merge_request_diff.merge_request_diff_commits.count).to eq(commits.count)
+ expect(merge_request_diff.commits.map(&:to_hash)).to eq(commits)
+ end
+
+ it 'creates correct entries in the merge_request_diff_files table' do
+ expect(merge_request_diff.merge_request_diff_files.count).to eq(expected_diffs.count)
+ expect(diffs_to_hashes(merge_request_diff.raw_diffs)).to eq(expected_diffs)
+ end
+
+ it 'sets the st_commits and st_diffs columns to nil' do
+ expect(merge_request_diff.st_commits_before_type_cast).to be_nil
+ expect(merge_request_diff.st_diffs_before_type_cast).to be_nil
+ end
+ end
+
+ context 'when the diff ID passed does not exist' do
+ it 'returns nil' do
+ expect(subject.perform(0)).to be_nil
+ end
+ end
+
+ context 'when the merge request diff has no serialised commits or diffs' do
+ before do
+ merge_request_diff.update(st_commits: nil, st_diffs: nil)
+ end
+
+ it 'returns nil' do
+ expect(subject.perform(merge_request_diff.id)).to be_nil
+ end
+ end
+
+ context 'when the merge request diff has valid commits and diffs' do
+ let(:commits) { merge_request_diff.commits.map(&:to_hash) }
+ let(:diffs) { diffs_to_hashes(merge_request_diff.merge_request_diff_files) }
+ let(:expected_diffs) { diffs }
+
+ include_examples 'updated MR diff'
+ end
+
+ context 'when the merge request diff has commits, but no diffs' do
+ let(:commits) { merge_request_diff.commits.map(&:to_hash) }
+ let(:diffs) { [] }
+ let(:expected_diffs) { diffs }
+
+ include_examples 'updated MR diff'
+ end
+
+ context 'when the merge request diffs have invalid content' do
+ let(:commits) { merge_request_diff.commits.map(&:to_hash) }
+ let(:diffs) { ['--broken-diff'] }
+ let(:expected_diffs) { [] }
+
+ include_examples 'updated MR diff'
+ end
+
+ context 'when the merge request diffs are Rugged::Patch instances' do
+ let(:commits) { merge_request_diff.commits.map(&:to_hash) }
+ let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) }
+ let(:diffs) { first_commit.diff_from_parent.patches }
+ let(:expected_diffs) { [] }
+
+ include_examples 'updated MR diff'
+ end
+
+ context 'when the merge request diffs are Rugged::Diff::Delta instances' do
+ let(:commits) { merge_request_diff.commits.map(&:to_hash) }
+ let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) }
+ let(:diffs) { first_commit.diff_from_parent.deltas }
+ let(:expected_diffs) { [] }
+
+ include_examples 'updated MR diff'
+ end
+ end
+end