summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2018-06-04 16:20:01 +0200
committerYorick Peterse <yorickpeterse@gmail.com>2018-06-04 18:33:56 +0200
commit71ed7987d3a7ab16677cb2b87721f391e8daaf13 (patch)
treeadf08481293e0542eb1ccc7e648dd6281ac6cdcc
parent9c2961947826442e780285cb551583b09cf6dae9 (diff)
downloadgitlab-ce-gh-importer-transactions.tar.gz
Perform pull request IO work outside a transactiongh-importer-transactions
When importing a GitHub pull request we would perform all work in a single database transaction. This is less than ideal, because we perform various slow Git operations when creating a merge request. This in turn can lead to many DB connections being used, while just waiting for an IO operation to complete. To work around this, we now move most of the heavy lifting out of the database transaction. Some extra error handling is added to ensure we can resume importing a partially imported pull request, instead of just throwing an error. This commit also changes the specs for IssueImporter so they don't rely on deprecated RSpec methods.
-rw-r--r--changelogs/unreleased/gh-importer-transactions.yml5
-rw-r--r--lib/gitlab/github_import/importer/pull_request_importer.rb56
-rw-r--r--spec/lib/gitlab/github_import/importer/issue_importer_spec.rb4
-rw-r--r--spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb85
4 files changed, 121 insertions, 29 deletions
diff --git a/changelogs/unreleased/gh-importer-transactions.yml b/changelogs/unreleased/gh-importer-transactions.yml
new file mode 100644
index 00000000000..1489d60a3fb
--- /dev/null
+++ b/changelogs/unreleased/gh-importer-transactions.yml
@@ -0,0 +1,5 @@
+---
+title: Move PR IO operations out of a transaction
+merge_request:
+author:
+type: performance
diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb
index 49d859f9624..b2f6cb7ad19 100644
--- a/lib/gitlab/github_import/importer/pull_request_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_request_importer.rb
@@ -22,15 +22,22 @@ module Gitlab
end
def execute
- if (mr_id = create_merge_request)
- issuable_finder.cache_database_id(mr_id)
+ mr, already_exists = create_merge_request
+
+ if mr
+ insert_git_data(mr, already_exists)
+ issuable_finder.cache_database_id(mr.id)
end
end
# Creates the merge request and returns its ID.
#
# This method will return `nil` if the merge request could not be
- # created.
+ # created, otherwise it will return an Array containing the following
+ # values:
+ #
+ # 1. A MergeRequest instance.
+ # 2. A boolean indicating if the MR already exists.
def create_merge_request
author_id, author_found = user_finder.author_id_for(pull_request)
@@ -69,21 +76,42 @@ module Gitlab
merge_request_id = GithubImport
.insert_and_return_id(attributes, project.merge_requests)
- merge_request = project.merge_requests.find(merge_request_id)
-
- # These fields are set so we can create the correct merge request
- # diffs.
- merge_request.source_branch_sha = pull_request.source_branch_sha
- merge_request.target_branch_sha = pull_request.target_branch_sha
-
- merge_request.keep_around_commit
- merge_request.merge_request_diffs.create
-
- merge_request.id
+ [project.merge_requests.find(merge_request_id), false]
end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the merge request.
+ []
+ rescue ActiveRecord::RecordNotUnique
+ # It's possible we previously created the MR, but failed when updating
+ # the Git data. In this case we'll just continue working on the
+ # existing row.
+ [project.merge_requests.find_by(iid: pull_request.iid), true]
+ end
+
+ def insert_git_data(merge_request, already_exists = false)
+ # These fields are set so we can create the correct merge request
+ # diffs.
+ merge_request.source_branch_sha = pull_request.source_branch_sha
+ merge_request.target_branch_sha = pull_request.target_branch_sha
+
+ merge_request.keep_around_commit
+
+ # MR diffs normally use an "after_save" hook to pull data from Git.
+ # All of this happens in the transaction started by calling
+ # create/save/etc. This in turn can lead to these transactions being
+ # held open for much longer than necessary. To work around this we
+ # first save the diff, then populate it.
+ diff =
+ if already_exists
+ merge_request.merge_request_diffs.take
+ else
+ merge_request.merge_request_diffs.build
+ end
+
+ diff.importing = true
+ diff.save
+ diff.save_git_content
end
end
end
diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
index d34ca0b76b8..81fe97c1e49 100644
--- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
@@ -180,12 +180,12 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
allow(importer.user_finder)
.to receive(:user_id_for)
- .ordered.with(issue.assignees[0])
+ .with(issue.assignees[0])
.and_return(4)
allow(importer.user_finder)
.to receive(:user_id_for)
- .ordered.with(issue.assignees[1])
+ .with(issue.assignees[1])
.and_return(5)
expect(Gitlab::Database)
diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
index 35f3fdf8304..6686b7ce0b5 100644
--- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
@@ -40,13 +40,19 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
describe '#execute' do
it 'imports the pull request' do
+ mr = double(:merge_request, id: 10)
+
expect(importer)
.to receive(:create_merge_request)
- .and_return(10)
+ .and_return([mr, false])
+
+ expect(importer)
+ .to receive(:insert_git_data)
+ .with(mr, false)
expect_any_instance_of(Gitlab::GithubImport::IssuableFinder)
.to receive(:cache_database_id)
- .with(10)
+ .with(mr.id)
importer.execute
end
@@ -99,18 +105,11 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
importer.create_merge_request
end
- it 'returns the ID of the created merge request' do
- id = importer.create_merge_request
-
- expect(id).to be_a_kind_of(Numeric)
- end
-
- it 'creates the merge request diffs' do
- importer.create_merge_request
-
- mr = project.merge_requests.take
+ it 'returns the created merge request' do
+ mr, exists = importer.create_merge_request
- expect(mr.merge_request_diffs.exists?).to eq(true)
+ expect(mr).to be_instance_of(MergeRequest)
+ expect(exists).to eq(false)
end
end
@@ -217,5 +216,65 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
expect { importer.create_merge_request }.not_to raise_error
end
end
+
+ context 'when the merge request already exists' do
+ before do
+ allow(importer.user_finder)
+ .to receive(:author_id_for)
+ .with(pull_request)
+ .and_return([user.id, true])
+
+ allow(importer.user_finder)
+ .to receive(:assignee_id_for)
+ .with(pull_request)
+ .and_return(user.id)
+ end
+
+ it 'returns the existing merge request' do
+ mr1, exists1 = importer.create_merge_request
+ mr2, exists2 = importer.create_merge_request
+
+ expect(mr2).to eq(mr1)
+ expect(exists1).to eq(false)
+ expect(exists2).to eq(true)
+ end
+ end
+ end
+
+ describe '#insert_git_data' do
+ before do
+ allow(importer.milestone_finder)
+ .to receive(:id_for)
+ .with(pull_request)
+ .and_return(milestone.id)
+
+ allow(importer.user_finder)
+ .to receive(:author_id_for)
+ .with(pull_request)
+ .and_return([user.id, true])
+
+ allow(importer.user_finder)
+ .to receive(:assignee_id_for)
+ .with(pull_request)
+ .and_return(user.id)
+ end
+
+ it 'creates the merge request diffs' do
+ mr, exists = importer.create_merge_request
+
+ importer.insert_git_data(mr, exists)
+
+ expect(mr.merge_request_diffs.exists?).to eq(true)
+ end
+
+ it 'creates the merge request diff commits' do
+ mr, exists = importer.create_merge_request
+
+ importer.insert_git_data(mr, exists)
+
+ diff = mr.merge_request_diffs.take
+
+ expect(diff.merge_request_diff_commits.exists?).to eq(true)
+ end
end
end