diff options
author | Sean McGivern <sean@gitlab.com> | 2017-07-03 15:48:59 +0100 |
---|---|---|
committer | Sean McGivern <sean@gitlab.com> | 2017-07-05 13:12:09 +0100 |
commit | 3729adfe3d5e0184f8bd43451fc78221101dfb56 (patch) | |
tree | e6399b9169ce0cbf697349d67396d69bb8e053d2 | |
parent | 657bf16b959beefff58acdc546d5afde60708e6e (diff) | |
download | gitlab-ce-merge-request-commits-table.tar.gz |
Migrate MR commits and diffs to new tablesmerge-request-commits-table
Previously, we stored these as serialised fields - `st_{commits,diffs}` - on the
`merge_request_diffs` table. These now have their own tables -
`merge_request_diff_{commits,diffs}` - with a column for each attribute of the
serialised data.
Add a background migration to go through the existing MR diffs and migrate them
to the new format. Ignore any contents that cannot be displayed.
5 files changed, 196 insertions, 6 deletions
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb index 53f35818146..3ad3952d86a 100644 --- a/app/models/merge_request_diff.rb +++ b/app/models/merge_request_diff.rb @@ -85,11 +85,7 @@ class MergeRequestDiff < ActiveRecord::Base def raw_diffs(options = {}) if options[:ignore_whitespace_change] - @diffs_no_whitespace ||= - Gitlab::Git::Compare.new( - repository.raw_repository, - safe_start_commit_sha, - head_commit_sha).diffs(options) + @diffs_no_whitespace ||= compare.diffs(options) else @raw_diffs ||= {} @raw_diffs[options] ||= load_diffs(options) diff --git a/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb b/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb new file mode 100644 index 00000000000..ae53e3e1ab3 --- /dev/null +++ b/db/post_migrate/20170703130158_schedule_merge_request_diff_migrations.rb @@ -0,0 +1,24 @@ +class ScheduleMergeRequestDiffMigrations < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + end + + def up + MergeRequestDiff.all.in_batches do |relation| + jobs = relation.pluck(:id).map do |id| + ['DeserializeMergeRequestDiffsAndCommits', [id]] + end + + BackgroundMigrationWorker.perform_bulk(*jobs) + end + end + + def down + end +end diff --git a/db/schema.rb b/db/schema.rb index 4937f290d3e..b51e4942750 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170623080805) do +ActiveRecord::Schema.define(version: 20170703130158) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" diff --git a/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb new file mode 100644 index 00000000000..be9ee0cbf40 --- /dev/null +++ b/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits.rb @@ -0,0 +1,53 @@ +class Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits + class MergeRequestDiff < ActiveRecord::Base + self.table_name = 'merge_request_diffs' + end + + def perform(merge_request_diff_id) + merge_request_diff = MergeRequestDiff + .select(:st_commits, :st_diffs) + .where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') + .find_by(id: merge_request_diff_id) + + return unless merge_request_diff + + sha_attribute = Gitlab::Database::ShaAttribute.new + commits = YAML.load(merge_request_diff.st_commits) + + commit_rows = commits.map.with_index do |commit, index| + commit_hash = commit.to_hash.except(:parent_ids) + sha = commit_hash.delete(:id) + + commit_hash.merge( + merge_request_diff_id: merge_request_diff_id, + relative_order: index, + sha: sha_attribute.type_cast_for_database(sha) + ) + end + + diffs = YAML.load(merge_request_diff.st_diffs) + diffs = [] unless valid_raw_diffs?(diffs) + + diff_rows = diffs.map.with_index do |diff, index| + diff.merge( + merge_request_diff_id: merge_request_diff_id, + relative_order: index + ) + end + + Gitlab::Database.bulk_insert('merge_request_diff_files', diff_rows) + Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows) + + merge_request_diff.update(st_commits: nil, st_diffs: nil) + end + + private + + # Unlike MergeRequestDiff#valid_raw_diff?, don't count Rugged objects as + # valid, because we don't render them usefully anyway. + def valid_raw_diffs?(diffs) + return false unless diffs.respond_to?(:each) + + diffs.all? { |diff| diff.is_a?(Hash) } + end +end diff --git a/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb new file mode 100644 index 00000000000..4a78e03a2ae --- /dev/null +++ b/spec/lib/gitlab/background_migration/deserialize_merge_request_diffs_and_commits_spec.rb @@ -0,0 +1,117 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do + describe '#perform' do + set(:merge_request) { create(:merge_request) } + set(:merge_request_diff) { merge_request.merge_request_diff } + + def clear_instance_variables(object) + [:@commits, :@raw_diffs, :@diffs_from_database].each do |ivar| + if object.instance_variable_defined?(ivar) + object.remove_instance_variable(ivar) + end + end + end + + def diffs_to_hashes(diffs) + diffs.as_json(only: Gitlab::Git::Diff::SERIALIZE_KEYS).map(&:with_indifferent_access) + end + + def quote_yaml(value) + MergeRequestDiff.connection.quote(YAML.dump(value)) + end + + before do + clear_instance_variables(merge_request_diff) + end + + shared_examples 'updated MR diff' do + before do + MergeRequestDiff.where(id: merge_request_diff.id).update_all( + "st_commits = #{quote_yaml(commits)}, st_diffs = #{quote_yaml(diffs)}" + ) + + MergeRequestDiffCommit.delete_all + MergeRequestDiffFile.delete_all + + subject.perform(merge_request_diff.id) + + merge_request_diff.merge_request_diff_commits.reload + clear_instance_variables(merge_request_diff) + end + + it 'creates correct entries in the merge_request_diff_commits table' do + expect(merge_request_diff.merge_request_diff_commits.count).to eq(commits.count) + expect(merge_request_diff.commits.map(&:to_hash)).to eq(commits) + end + + it 'creates correct entries in the merge_request_diff_files table' do + expect(merge_request_diff.merge_request_diff_files.count).to eq(expected_diffs.count) + expect(diffs_to_hashes(merge_request_diff.raw_diffs)).to eq(expected_diffs) + end + + it 'sets the st_commits and st_diffs columns to nil' do + expect(merge_request_diff.st_commits_before_type_cast).to be_nil + expect(merge_request_diff.st_diffs_before_type_cast).to be_nil + end + end + + context 'when the diff ID passed does not exist' do + it 'returns nil' do + expect(subject.perform(0)).to be_nil + end + end + + context 'when the merge request diff has no serialised commits or diffs' do + before do + merge_request_diff.update(st_commits: nil, st_diffs: nil) + end + + it 'returns nil' do + expect(subject.perform(merge_request_diff.id)).to be_nil + end + end + + context 'when the merge request diff has valid commits and diffs' do + let(:commits) { merge_request_diff.commits.map(&:to_hash) } + let(:diffs) { diffs_to_hashes(merge_request_diff.merge_request_diff_files) } + let(:expected_diffs) { diffs } + + include_examples 'updated MR diff' + end + + context 'when the merge request diff has commits, but no diffs' do + let(:commits) { merge_request_diff.commits.map(&:to_hash) } + let(:diffs) { [] } + let(:expected_diffs) { diffs } + + include_examples 'updated MR diff' + end + + context 'when the merge request diffs have invalid content' do + let(:commits) { merge_request_diff.commits.map(&:to_hash) } + let(:diffs) { ['--broken-diff'] } + let(:expected_diffs) { [] } + + include_examples 'updated MR diff' + end + + context 'when the merge request diffs are Rugged::Patch instances' do + let(:commits) { merge_request_diff.commits.map(&:to_hash) } + let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) } + let(:diffs) { first_commit.diff_from_parent.patches } + let(:expected_diffs) { [] } + + include_examples 'updated MR diff' + end + + context 'when the merge request diffs are Rugged::Diff::Delta instances' do + let(:commits) { merge_request_diff.commits.map(&:to_hash) } + let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) } + let(:diffs) { first_commit.diff_from_parent.deltas } + let(:expected_diffs) { [] } + + include_examples 'updated MR diff' + end + end +end |