summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-08-13 15:36:15 -0700
committerStan Hu <stanhu@gmail.com>2018-08-17 16:43:58 -0700
commit5ffa7220130b57251187d9b953e6946754589503 (patch)
tree84e4a3fb4e1323691cb990b831ba10a7fff82a83
parent74eec89ebe0b118f8aba6051f53d5dbe5e68c2d1 (diff)
downloadgitlab-ce-sh-insert-git-data-in-separate-transaction.tar.gz
Bitbucket Server importer: Eliminate most idle-in-transaction issuessh-insert-git-data-in-separate-transaction
Just like with the GitHub importer, the Bitbucket Server importer can hit the default 60 s idle-in-transaction timeouts if it takes too long to create the merge request. We solve this by using the same approach as the GitHub importer: 1. Bypass all validation and hooks in creating a merge request 2. Insert the Git data in a separate transaction Part of #50021
-rw-r--r--changelogs/unreleased/sh-insert-git-data-in-separate-transaction.yml5
-rw-r--r--lib/gitlab/bitbucket_server_import/importer.rb16
-rw-r--r--lib/gitlab/github_import.rb18
-rw-r--r--lib/gitlab/github_import/importer/issue_importer.rb4
-rw-r--r--lib/gitlab/github_import/importer/pull_request_importer.rb90
-rw-r--r--lib/gitlab/import/database_helpers.rb25
-rw-r--r--lib/gitlab/import/merge_request_creator.rb38
-rw-r--r--lib/gitlab/import/merge_request_helpers.rb64
-rw-r--r--spec/lib/gitlab/github_import/importer/issue_importer_spec.rb6
-rw-r--r--spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb8
-rw-r--r--spec/lib/gitlab/github_import_spec.rb33
-rw-r--r--spec/lib/gitlab/import/database_helpers_spec.rb44
-rw-r--r--spec/lib/gitlab/import/merge_request_creator_spec.rb43
13 files changed, 259 insertions, 135 deletions
diff --git a/changelogs/unreleased/sh-insert-git-data-in-separate-transaction.yml b/changelogs/unreleased/sh-insert-git-data-in-separate-transaction.yml
new file mode 100644
index 00000000000..116929b2f53
--- /dev/null
+++ b/changelogs/unreleased/sh-insert-git-data-in-separate-transaction.yml
@@ -0,0 +1,5 @@
+---
+title: 'Bitbucket Server importer: Eliminate most idle-in-transaction issues'
+merge_request:
+author:
+type: performance
diff --git a/lib/gitlab/bitbucket_server_import/importer.rb b/lib/gitlab/bitbucket_server_import/importer.rb
index 268d21a77d1..b97ee882349 100644
--- a/lib/gitlab/bitbucket_server_import/importer.rb
+++ b/lib/gitlab/bitbucket_server_import/importer.rb
@@ -2,6 +2,7 @@ module Gitlab
module BitbucketServerImport
class Importer
include Gitlab::ShellAdapter
+
attr_reader :recover_missing_commits
attr_reader :project, :project_key, :repository_slug, :client, :errors, :users
@@ -175,21 +176,18 @@ module Gitlab
description = ''
description += @formatter.author_line(pull_request.author) unless find_user_id(pull_request.author_email)
description += pull_request.description if pull_request.description
-
- source_branch_sha = pull_request.source_branch_sha
- target_branch_sha = pull_request.target_branch_sha
author_id = gitlab_user_id(pull_request.author_email)
attributes = {
iid: pull_request.iid,
title: pull_request.title,
description: description,
- source_project: project,
+ source_project_id: project.id,
source_branch: Gitlab::Git.ref_name(pull_request.source_branch_name),
- source_branch_sha: source_branch_sha,
- target_project: project,
+ source_branch_sha: pull_request.source_branch_sha,
+ target_project_id: project.id,
target_branch: Gitlab::Git.ref_name(pull_request.target_branch_name),
- target_branch_sha: target_branch_sha,
+ target_branch_sha: pull_request.target_branch_sha,
state: pull_request.state,
author_id: author_id,
assignee_id: nil,
@@ -197,7 +195,9 @@ module Gitlab
updated_at: pull_request.updated_at
}
- merge_request = project.merge_requests.create!(attributes)
+ creator = Gitlab::Import::MergeRequestCreator.new(project)
+ merge_request = creator.execute(attributes)
+
import_pull_request_comments(pull_request, merge_request) if merge_request.persisted?
end
diff --git a/lib/gitlab/github_import.rb b/lib/gitlab/github_import.rb
index 65b5e30c70f..d40b06f969f 100644
--- a/lib/gitlab/github_import.rb
+++ b/lib/gitlab/github_import.rb
@@ -10,24 +10,6 @@ module Gitlab
Client.new(token_to_use, parallel: parallel)
end
- # Inserts a raw row and returns the ID of the inserted row.
- #
- # attributes - The attributes/columns to set.
- # relation - An ActiveRecord::Relation to use for finding the ID of the row
- # when using MySQL.
- def self.insert_and_return_id(attributes, relation)
- # We use bulk_insert here so we can bypass any queries executed by
- # callbacks or validation rules, as doing this wouldn't scale when
- # importing very large projects.
- result = Gitlab::Database
- .bulk_insert(relation.table_name, [attributes], return_ids: true)
-
- # MySQL doesn't support returning the IDs of a bulk insert in a way that
- # is not a pain, so in this case we'll issue an extra query instead.
- result.first ||
- relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first
- end
-
# Returns the ID of the ghost user.
def self.ghost_user_id
key = 'github-import/ghost-user-id'
diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb
index 31fefebf787..fe10bec395d 100644
--- a/lib/gitlab/github_import/importer/issue_importer.rb
+++ b/lib/gitlab/github_import/importer/issue_importer.rb
@@ -4,6 +4,8 @@ module Gitlab
module GithubImport
module Importer
class IssueImporter
+ include Gitlab::Import::DatabaseHelpers
+
attr_reader :project, :issue, :client, :user_finder, :milestone_finder,
:issuable_finder
@@ -55,7 +57,7 @@ module Gitlab
updated_at: issue.updated_at
}
- GithubImport.insert_and_return_id(attributes, project.issues)
+ insert_and_return_id(attributes, project.issues)
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the issue.
diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb
index 6b3688c4381..74fc64f3c6b 100644
--- a/lib/gitlab/github_import/importer/pull_request_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_request_importer.rb
@@ -4,6 +4,8 @@ module Gitlab
module GithubImport
module Importer
class PullRequestImporter
+ include Gitlab::Import::MergeRequestHelpers
+
attr_reader :pull_request, :project, :client, :user_finder,
:milestone_finder, :issuable_finder
@@ -30,6 +32,10 @@ module Gitlab
end
end
+ def insert_git_data(merge_request, already_exists)
+ insert_or_replace_git_data(merge_request, pull_request.source_branch_sha, pull_request.target_branch_sha, already_exists)
+ end
+
# Creates the merge request and returns its ID.
#
# This method will return `nil` if the merge request could not be
@@ -44,75 +50,23 @@ module Gitlab
description = MarkdownText
.format(pull_request.description, pull_request.author, author_found)
- # This work must be wrapped in a transaction as otherwise we can leave
- # behind incomplete data in the event of an error. This can then lead
- # to duplicate key errors when jobs are retried.
- MergeRequest.transaction do
- attributes = {
- iid: pull_request.iid,
- title: pull_request.truncated_title,
- description: description,
- source_project_id: project.id,
- target_project_id: project.id,
- source_branch: pull_request.formatted_source_branch,
- target_branch: pull_request.target_branch,
- state: pull_request.state,
- milestone_id: milestone_finder.id_for(pull_request),
- author_id: author_id,
- assignee_id: user_finder.assignee_id_for(pull_request),
- created_at: pull_request.created_at,
- updated_at: pull_request.updated_at
- }
-
- # When creating merge requests there are a lot of hooks that may
- # run, for many different reasons. Many of these hooks (e.g. the
- # ones used for rendering Markdown) are completely unnecessary and
- # may even lead to transaction timeouts.
- #
- # To ensure importing pull requests has a minimal impact and can
- # complete in a reasonable time we bypass all the hooks by inserting
- # the row and then retrieving it. We then only perform the
- # additional work that is strictly necessary.
- merge_request_id = GithubImport
- .insert_and_return_id(attributes, project.merge_requests)
-
- [project.merge_requests.find(merge_request_id), false]
- end
- rescue ActiveRecord::InvalidForeignKey
- # It's possible the project has been deleted since scheduling this
- # job. In this case we'll just skip creating the merge request.
- []
- rescue ActiveRecord::RecordNotUnique
- # It's possible we previously created the MR, but failed when updating
- # the Git data. In this case we'll just continue working on the
- # existing row.
- [project.merge_requests.find_by(iid: pull_request.iid), true]
- end
-
- def insert_git_data(merge_request, already_exists = false)
- # These fields are set so we can create the correct merge request
- # diffs.
- merge_request.source_branch_sha = pull_request.source_branch_sha
- merge_request.target_branch_sha = pull_request.target_branch_sha
-
- merge_request.keep_around_commit
-
- # MR diffs normally use an "after_save" hook to pull data from Git.
- # All of this happens in the transaction started by calling
- # create/save/etc. This in turn can lead to these transactions being
- # held open for much longer than necessary. To work around this we
- # first save the diff, then populate it.
- diff =
- if already_exists
- merge_request.merge_request_diffs.take ||
- merge_request.merge_request_diffs.build
- else
- merge_request.merge_request_diffs.build
- end
+ attributes = {
+ iid: pull_request.iid,
+ title: pull_request.truncated_title,
+ description: description,
+ source_project_id: project.id,
+ target_project_id: project.id,
+ source_branch: pull_request.formatted_source_branch,
+ target_branch: pull_request.target_branch,
+ state: pull_request.state,
+ milestone_id: milestone_finder.id_for(pull_request),
+ author_id: author_id,
+ assignee_id: user_finder.assignee_id_for(pull_request),
+ created_at: pull_request.created_at,
+ updated_at: pull_request.updated_at
+ }
- diff.importing = true
- diff.save
- diff.save_git_content
+ create_merge_request_without_hooks(project, attributes, pull_request.iid)
end
end
end
diff --git a/lib/gitlab/import/database_helpers.rb b/lib/gitlab/import/database_helpers.rb
new file mode 100644
index 00000000000..80857061933
--- /dev/null
+++ b/lib/gitlab/import/database_helpers.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Import
+ module DatabaseHelpers
+ # Inserts a raw row and returns the ID of the inserted row.
+ #
+ # attributes - The attributes/columns to set.
+ # relation - An ActiveRecord::Relation to use for finding the ID of the row
+ # when using MySQL.
+ def insert_and_return_id(attributes, relation)
+ # We use bulk_insert here so we can bypass any queries executed by
+ # callbacks or validation rules, as doing this wouldn't scale when
+ # importing very large projects.
+ result = Gitlab::Database
+ .bulk_insert(relation.table_name, [attributes], return_ids: true)
+
+ # MySQL doesn't support returning the IDs of a bulk insert in a way that
+ # is not a pain, so in this case we'll issue an extra query instead.
+ result.first ||
+ relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/import/merge_request_creator.rb b/lib/gitlab/import/merge_request_creator.rb
new file mode 100644
index 00000000000..327abdd4183
--- /dev/null
+++ b/lib/gitlab/import/merge_request_creator.rb
@@ -0,0 +1,38 @@
+# This module is designed for importers that need to create many merge
+# requests quickly. When creating merge requests there are a lot of hooks
+# that may run, for many different reasons. Many of these hooks (e.g. the ones
+# used for rendering Markdown) are completely unnecessary and may even lead to
+# transaction timeouts.
+#
+# To ensure importing merge requests requests has a minimal impact and can
+# complete in a reasonable time we bypass all the hooks by inserting the row
+# and then retrieving it. We then only perform the additional work that is
+# strictly necessary.
+module Gitlab
+ module Import
+ class MergeRequestCreator
+ include ::Gitlab::Import::DatabaseHelpers
+ include ::Gitlab::Import::MergeRequestHelpers
+
+ attr_accessor :project
+
+ def initialize(project)
+ @project = project
+ end
+
+ def execute(attributes)
+ source_branch_sha = attributes.delete(:source_branch_sha)
+ target_branch_sha = attributes.delete(:target_branch_sha)
+ iid = attributes[:iid]
+
+ merge_request, already_exists = create_merge_request_without_hooks(project, attributes, iid)
+
+ if merge_request
+ insert_or_replace_git_data(merge_request, source_branch_sha, target_branch_sha, already_exists)
+ end
+
+ merge_request
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/import/merge_request_helpers.rb b/lib/gitlab/import/merge_request_helpers.rb
new file mode 100644
index 00000000000..89a312574aa
--- /dev/null
+++ b/lib/gitlab/import/merge_request_helpers.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Import
+ module MergeRequestHelpers
+ include DatabaseHelpers
+
+ def create_merge_request_without_hooks(project, attributes, iid)
+ # This work must be wrapped in a transaction as otherwise we can leave
+ # behind incomplete data in the event of an error. This can then lead
+ # to duplicate key errors when jobs are retried.
+ MergeRequest.transaction do
+ # When creating merge requests there are a lot of hooks that may
+ # run, for many different reasons. Many of these hooks (e.g. the
+ # ones used for rendering Markdown) are completely unnecessary and
+ # may even lead to transaction timeouts.
+ #
+ # To ensure importing pull requests has a minimal impact and can
+ # complete in a reasonable time we bypass all the hooks by inserting
+ # the row and then retrieving it. We then only perform the
+ # additional work that is strictly necessary.
+ merge_request_id = insert_and_return_id(attributes, project.merge_requests)
+
+ [project.merge_requests.find(merge_request_id), false]
+ end
+ rescue ActiveRecord::InvalidForeignKey
+ # It's possible the project has been deleted since scheduling this
+ # job. In this case we'll just skip creating the merge request.
+ []
+ rescue ActiveRecord::RecordNotUnique
+ # It's possible we previously created the MR, but failed when updating
+ # the Git data. In this case we'll just continue working on the
+ # existing row.
+ [project.merge_requests.find_by(iid: iid), true]
+ end
+
+ def insert_or_replace_git_data(merge_request, source_branch_sha, target_branch_sha, already_exists = false)
+ # These fields are set so we can create the correct merge request
+ # diffs.
+ merge_request.source_branch_sha = source_branch_sha
+ merge_request.target_branch_sha = target_branch_sha
+
+ merge_request.keep_around_commit
+
+ # MR diffs normally use an "after_save" hook to pull data from Git.
+ # All of this happens in the transaction started by calling
+ # create/save/etc. This in turn can lead to these transactions being
+ # held open for much longer than necessary. To work around this we
+ # first save the diff, then populate it.
+ diff =
+ if already_exists
+ merge_request.merge_request_diffs.take ||
+ merge_request.merge_request_diffs.build
+ else
+ merge_request.merge_request_diffs.build
+ end
+
+ diff.importing = true
+ diff.save
+ diff.save_git_content
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
index 81fe97c1e49..7901ae005d9 100644
--- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
@@ -87,7 +87,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.with(issue)
.and_return([user.id, true])
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.with(
{
@@ -116,7 +116,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.with(issue)
.and_return([project.creator_id, false])
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.with(
{
@@ -145,7 +145,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.with(issue)
.and_return([user.id, true])
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.and_raise(ActiveRecord::InvalidForeignKey, 'invalid foreign key')
diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
index 3422a1e82fc..0f21b8843b6 100644
--- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
@@ -80,7 +80,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
end
it 'imports the pull request with the pull request author as the merge request author' do
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.with(
{
@@ -125,7 +125,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
.with(pull_request)
.and_return(user.id)
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.with(
{
@@ -171,7 +171,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
.to receive(:source_branch)
.and_return('master')
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.with(
{
@@ -209,7 +209,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
.with(pull_request)
.and_return(user.id)
- expect(Gitlab::GithubImport)
+ expect(importer)
.to receive(:insert_and_return_id)
.and_raise(ActiveRecord::InvalidForeignKey, 'invalid foreign key')
diff --git a/spec/lib/gitlab/github_import_spec.rb b/spec/lib/gitlab/github_import_spec.rb
index 51414800e8c..496244c91bf 100644
--- a/spec/lib/gitlab/github_import_spec.rb
+++ b/spec/lib/gitlab/github_import_spec.rb
@@ -27,39 +27,6 @@ describe Gitlab::GithubImport do
end
end
- describe '.insert_and_return_id' do
- let(:attributes) { { iid: 1, title: 'foo' } }
- let(:project) { create(:project) }
-
- context 'on PostgreSQL' do
- it 'returns the ID returned by the query' do
- expect(Gitlab::Database)
- .to receive(:bulk_insert)
- .with(Issue.table_name, [attributes], return_ids: true)
- .and_return([10])
-
- id = described_class.insert_and_return_id(attributes, project.issues)
-
- expect(id).to eq(10)
- end
- end
-
- context 'on MySQL' do
- it 'uses a separate query to retrieve the ID' do
- issue = create(:issue, project: project, iid: attributes[:iid])
-
- expect(Gitlab::Database)
- .to receive(:bulk_insert)
- .with(Issue.table_name, [attributes], return_ids: true)
- .and_return([])
-
- id = described_class.insert_and_return_id(attributes, project.issues)
-
- expect(id).to eq(issue.id)
- end
- end
- end
-
describe '.ghost_user_id', :clean_gitlab_redis_cache do
it 'returns the ID of the ghost user' do
expect(described_class.ghost_user_id).to eq(User.ghost.id)
diff --git a/spec/lib/gitlab/import/database_helpers_spec.rb b/spec/lib/gitlab/import/database_helpers_spec.rb
new file mode 100644
index 00000000000..ec7d5710b97
--- /dev/null
+++ b/spec/lib/gitlab/import/database_helpers_spec.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+class DatabaseHelpersTest
+ include Gitlab::Import::DatabaseHelpers
+end
+
+describe Gitlab::Import::DatabaseHelpers do
+ subject { DatabaseHelpersTest.new }
+
+ describe '.insert_and_return_id' do
+ let(:attributes) { { iid: 1, title: 'foo' } }
+ let(:project) { create(:project) }
+
+ context 'on PostgreSQL' do
+ it 'returns the ID returned by the query' do
+ expect(Gitlab::Database)
+ .to receive(:bulk_insert)
+ .with(Issue.table_name, [attributes], return_ids: true)
+ .and_return([10])
+
+ id = subject.insert_and_return_id(attributes, project.issues)
+
+ expect(id).to eq(10)
+ end
+ end
+
+ context 'on MySQL' do
+ it 'uses a separate query to retrieve the ID' do
+ issue = create(:issue, project: project, iid: attributes[:iid])
+
+ expect(Gitlab::Database)
+ .to receive(:bulk_insert)
+ .with(Issue.table_name, [attributes], return_ids: true)
+ .and_return([])
+
+ id = subject.insert_and_return_id(attributes, project.issues)
+
+ expect(id).to eq(issue.id)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/import/merge_request_creator_spec.rb b/spec/lib/gitlab/import/merge_request_creator_spec.rb
new file mode 100644
index 00000000000..3683558f59f
--- /dev/null
+++ b/spec/lib/gitlab/import/merge_request_creator_spec.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Gitlab::Import::MergeRequestCreator do
+ let(:project) { create(:project, :repository) }
+
+ subject { described_class.new(project) }
+
+ describe '#execute' do
+ context 'merge request already exists' do
+ let(:merge_request) { create(:merge_request, target_project: project, source_project: project) }
+ let(:commits) { merge_request.merge_request_diffs.first.commits }
+ let(:attributes) { HashWithIndifferentAccess.new(merge_request.attributes) }
+
+ it 'updates the data' do
+ commits_count = commits.count
+ merge_request.merge_request_diffs.destroy_all
+
+ expect(merge_request.merge_request_diffs.count).to eq(0)
+
+ subject.execute(attributes)
+
+ expect(merge_request.reload.merge_request_diffs.count).to eq(1)
+ expect(merge_request.reload.merge_request_diffs.first.commits.count).to eq(commits_count)
+ end
+ end
+
+ context 'new merge request' do
+ let(:merge_request) { build(:merge_request, target_project: project, source_project: project) }
+ let(:attributes) { HashWithIndifferentAccess.new(merge_request.attributes) }
+
+ it 'creates a new merge request' do
+ attributes.delete(:id)
+
+ expect { subject.execute(attributes) }.to change { MergeRequest.count }.by(1)
+
+ new_mr = MergeRequest.last
+ expect(new_mr.merge_request_diffs.count).to eq(1)
+ end
+ end
+ end
+end