summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2017-10-05 14:36:38 +0000
committerSean McGivern <sean@mcgivern.me.uk>2017-10-05 14:36:38 +0000
commit6ddd1275b3dbe209a633b922347f66b5f4f1e439 (patch)
treee8bb3218d0918f805298b4ca1b0818d579995e43
parent5725e347ebdf04481a4eb7f82ce5db8c32b209b6 (diff)
parent6e48cae27d7c9c9280dabce013f4bcb936b6e38c (diff)
downloadgitlab-ce-6ddd1275b3dbe209a633b922347f66b5f4f1e439.tar.gz
Merge branch 'rc/fix/gh-import-branches-performance' into 'master'
Improve GitHub import performance Closes #36288, #36292, and #38467 See merge request gitlab-org/gitlab-ce!14445
-rw-r--r--app/models/concerns/repository_mirroring.rb25
-rw-r--r--app/models/merge_request.rb3
-rw-r--r--app/models/merge_request_diff.rb1
-rw-r--r--changelogs/unreleased/rc-fix-gh-import-branches-performance.yml5
-rw-r--r--lib/github/import.rb46
-rw-r--r--lib/github/representation/branch.rb20
-rw-r--r--lib/github/representation/issuable.rb12
-rw-r--r--lib/github/representation/issue.rb20
-rw-r--r--lib/github/representation/pull_request.rb75
-rw-r--r--lib/tasks/import.rake27
10 files changed, 113 insertions, 121 deletions
diff --git a/app/models/concerns/repository_mirroring.rb b/app/models/concerns/repository_mirroring.rb
index fed336c29d6..f6aba91bc4c 100644
--- a/app/models/concerns/repository_mirroring.rb
+++ b/app/models/concerns/repository_mirroring.rb
@@ -1,11 +1,26 @@
module RepositoryMirroring
- def set_remote_as_mirror(name)
- config = raw_repository.rugged.config
+ IMPORT_HEAD_REFS = '+refs/heads/*:refs/heads/*'.freeze
+ IMPORT_TAG_REFS = '+refs/tags/*:refs/tags/*'.freeze
+ def set_remote_as_mirror(name)
# This is used to define repository as equivalent as "git clone --mirror"
- config["remote.#{name}.fetch"] = 'refs/*:refs/*'
- config["remote.#{name}.mirror"] = true
- config["remote.#{name}.prune"] = true
+ raw_repository.rugged.config["remote.#{name}.fetch"] = 'refs/*:refs/*'
+ raw_repository.rugged.config["remote.#{name}.mirror"] = true
+ raw_repository.rugged.config["remote.#{name}.prune"] = true
+ end
+
+ def set_import_remote_as_mirror(remote_name)
+ # Add first fetch with Rugged so it does not create its own.
+ raw_repository.rugged.config["remote.#{remote_name}.fetch"] = IMPORT_HEAD_REFS
+
+ add_remote_fetch_config(remote_name, IMPORT_TAG_REFS)
+
+ raw_repository.rugged.config["remote.#{remote_name}.mirror"] = true
+ raw_repository.rugged.config["remote.#{remote_name}.prune"] = true
+ end
+
+ def add_remote_fetch_config(remote_name, refspec)
+ run_git(%W[config --add remote.#{remote_name}.fetch #{refspec}])
end
def fetch_mirror(remote, url)
diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb
index 0ba00d447e8..086226618e6 100644
--- a/app/models/merge_request.rb
+++ b/app/models/merge_request.rb
@@ -415,6 +415,8 @@ class MergeRequest < ActiveRecord::Base
end
def create_merge_request_diff
+ fetch_ref
+
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37435
Gitlab::GitalyClient.allow_n_plus_1_calls do
merge_request_diffs.create
@@ -462,6 +464,7 @@ class MergeRequest < ActiveRecord::Base
return unless open?
old_diff_refs = self.diff_refs
+
create_merge_request_diff
MergeRequests::MergeRequestDiffCacheService.new.execute(self)
new_diff_refs = self.diff_refs
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb
index 58050e1f438..faf0b95f842 100644
--- a/app/models/merge_request_diff.rb
+++ b/app/models/merge_request_diff.rb
@@ -55,7 +55,6 @@ class MergeRequestDiff < ActiveRecord::Base
end
def ensure_commit_shas
- merge_request.fetch_ref
self.start_commit_sha ||= merge_request.target_branch_sha
self.head_commit_sha ||= merge_request.source_branch_sha
self.base_commit_sha ||= find_base_sha
diff --git a/changelogs/unreleased/rc-fix-gh-import-branches-performance.yml b/changelogs/unreleased/rc-fix-gh-import-branches-performance.yml
new file mode 100644
index 00000000000..af359ce96b4
--- /dev/null
+++ b/changelogs/unreleased/rc-fix-gh-import-branches-performance.yml
@@ -0,0 +1,5 @@
+---
+title: Improve GitHub import performance
+merge_request: 14445
+author:
+type: other
diff --git a/lib/github/import.rb b/lib/github/import.rb
index c0cd8382875..55f8387f27a 100644
--- a/lib/github/import.rb
+++ b/lib/github/import.rb
@@ -9,7 +9,7 @@ module Github
include Gitlab::ShellAdapter
attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
- :options, :errors, :cached, :verbose
+ :options, :errors, :cached, :verbose, :last_fetched_at
def initialize(project, options = {})
@project = project
@@ -21,12 +21,13 @@ module Github
@verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = []
+ @last_fetched_at = nil
end
# rubocop: disable Rails/Output
def execute
puts 'Fetching repository...'.color(:aqua) if verbose
- fetch_repository
+ setup_and_fetch_repository
puts 'Fetching labels...'.color(:aqua) if verbose
fetch_labels
puts 'Fetching milestones...'.color(:aqua) if verbose
@@ -42,7 +43,7 @@ module Github
puts 'Expiring repository cache...'.color(:aqua) if verbose
expire_repository_cache
- true
+ errors.empty?
rescue Github::RepositoryFetchError
expire_repository_cache
false
@@ -52,18 +53,24 @@ module Github
private
- def fetch_repository
+ def setup_and_fetch_repository
begin
project.ensure_repository
project.repository.add_remote('github', repo_url)
- project.repository.set_remote_as_mirror('github')
- project.repository.fetch_remote('github', forced: true)
+ project.repository.set_import_remote_as_mirror('github')
+ project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head')
+ fetch_remote(forced: true)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
error(:project, repo_url, e.message)
raise Github::RepositoryFetchError
end
end
+ def fetch_remote(forced: false)
+ @last_fetched_at = Time.now
+ project.repository.fetch_remote('github', forced: forced)
+ end
+
def fetch_wiki_repository
return if project.wiki.repository_exists?
@@ -92,7 +99,7 @@ module Github
label.color = representation.color
end
- cached[:label_ids][label.title] = label.id
+ cached[:label_ids][representation.title] = label.id
rescue => e
error(:label, representation.url, e.message)
end
@@ -143,7 +150,9 @@ module Github
next unless merge_request.new_record? && pull_request.valid?
begin
- pull_request.restore_branches!
+ # If the PR has been created/updated after we last fetched the
+ # remote, we fetch again to get the up-to-date refs.
+ fetch_remote if pull_request.updated_at > last_fetched_at
author_id = user_id(pull_request.author, project.creator_id)
description = format_description(pull_request.description, pull_request.author)
@@ -152,6 +161,7 @@ module Github
iid: pull_request.iid,
title: pull_request.title,
description: description,
+ ref_fetched: true,
source_project: pull_request.source_project,
source_branch: pull_request.source_branch_name,
source_branch_sha: pull_request.source_branch_sha,
@@ -173,8 +183,6 @@ module Github
fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
rescue => e
error(:pull_request, pull_request.url, e.message)
- ensure
- pull_request.remove_restored_branches!
end
end
@@ -203,11 +211,11 @@ module Github
# for both features, like manipulating assignees, labels
# and milestones, are provided within the Issues API.
if representation.pull_request?
- return unless representation.has_labels? || representation.has_comments?
+ return unless representation.labels? || representation.comments?
merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
- if representation.has_labels?
+ if representation.labels?
merge_request.update_attribute(:label_ids, label_ids(representation.labels))
end
@@ -222,14 +230,16 @@ module Github
issue.title = representation.title
issue.description = format_description(representation.description, representation.author)
issue.state = representation.state
- issue.label_ids = label_ids(representation.labels)
issue.milestone_id = milestone_id(representation.milestone)
issue.author_id = author_id
- issue.assignee_ids = [user_id(representation.assignee)]
issue.created_at = representation.created_at
issue.updated_at = representation.updated_at
issue.save!(validate: false)
+ issue.update(
+ label_ids: label_ids(representation.labels),
+ assignee_ids: assignee_ids(representation.assignees))
+
fetch_comments_conditionally(issue, representation)
end
rescue => e
@@ -238,7 +248,7 @@ module Github
end
def fetch_comments_conditionally(issuable, representation)
- if representation.has_comments?
+ if representation.comments?
comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments"
fetch_comments(issuable, :comment, comments_url)
end
@@ -302,7 +312,11 @@ module Github
end
def label_ids(labels)
- labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact
+ labels.map { |label| cached[:label_ids][label.title] }.compact
+ end
+
+ def assignee_ids(assignees)
+ assignees.map { |assignee| user_id(assignee) }.compact
end
def milestone_id(milestone)
diff --git a/lib/github/representation/branch.rb b/lib/github/representation/branch.rb
index 823e8e9a9c4..0087a3d3c4f 100644
--- a/lib/github/representation/branch.rb
+++ b/lib/github/representation/branch.rb
@@ -7,10 +7,14 @@ module Github
raw.dig('user', 'login') || 'unknown'
end
+ def repo?
+ raw['repo'].present?
+ end
+
def repo
- return @repo if defined?(@repo)
+ return unless repo?
- @repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present?
+ @repo ||= Github::Representation::Repo.new(raw['repo'])
end
def ref
@@ -25,10 +29,6 @@ module Github
Commit.truncate_sha(sha)
end
- def exists?
- @exists ||= branch_exists? && commit_exists?
- end
-
def valid?
sha.present? && ref.present?
end
@@ -47,14 +47,6 @@ module Github
private
- def branch_exists?
- repository.branch_exists?(ref)
- end
-
- def commit_exists?
- repository.branch_names_contains(sha).include?(ref)
- end
-
def repository
@repository ||= options.fetch(:repository)
end
diff --git a/lib/github/representation/issuable.rb b/lib/github/representation/issuable.rb
index 9713b82615d..768ba3b993c 100644
--- a/lib/github/representation/issuable.rb
+++ b/lib/github/representation/issuable.rb
@@ -23,14 +23,14 @@ module Github
@author ||= Github::Representation::User.new(raw['user'], options)
end
- def assignee
- return unless assigned?
-
- @assignee ||= Github::Representation::User.new(raw['assignee'], options)
+ def labels?
+ raw['labels'].any?
end
- def assigned?
- raw['assignee'].present?
+ def labels
+ @labels ||= Array(raw['labels']).map do |label|
+ Github::Representation::Label.new(label, options)
+ end
end
end
end
diff --git a/lib/github/representation/issue.rb b/lib/github/representation/issue.rb
index df3540a6e6c..4f1a02cb90f 100644
--- a/lib/github/representation/issue.rb
+++ b/lib/github/representation/issue.rb
@@ -1,25 +1,27 @@
module Github
module Representation
class Issue < Representation::Issuable
- def labels
- raw['labels']
- end
-
def state
raw['state'] == 'closed' ? 'closed' : 'opened'
end
- def has_comments?
+ def comments?
raw['comments'] > 0
end
- def has_labels?
- labels.count > 0
- end
-
def pull_request?
raw['pull_request'].present?
end
+
+ def assigned?
+ raw['assignees'].present?
+ end
+
+ def assignees
+ @assignees ||= Array(raw['assignees']).map do |user|
+ Github::Representation::User.new(user, options)
+ end
+ end
end
end
end
diff --git a/lib/github/representation/pull_request.rb b/lib/github/representation/pull_request.rb
index 55461097e8a..0171179bb0f 100644
--- a/lib/github/representation/pull_request.rb
+++ b/lib/github/representation/pull_request.rb
@@ -1,26 +1,17 @@
module Github
module Representation
class PullRequest < Representation::Issuable
- delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true
- delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true
+ delegate :sha, to: :source_branch, prefix: true
+ delegate :sha, to: :target_branch, prefix: true
def source_project
project
end
def source_branch_name
- @source_branch_name ||=
- if cross_project? || !source_branch_exists?
- source_branch_name_prefixed
- else
- source_branch_ref
- end
- end
-
- def source_branch_exists?
- return @source_branch_exists if defined?(@source_branch_exists)
-
- @source_branch_exists = !cross_project? && source_branch.exists?
+ # Mimic the "user:branch" displayed in the MR widget,
+ # i.e. "Request to merge rymai:add-external-mounts into master"
+ cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref
end
def target_project
@@ -28,11 +19,7 @@ module Github
end
def target_branch_name
- @target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed
- end
-
- def target_branch_exists?
- @target_branch_exists ||= target_branch.exists?
+ target_branch.ref
end
def state
@@ -50,16 +37,14 @@ module Github
source_branch.valid? && target_branch.valid?
end
- def restore_branches!
- restore_source_branch!
- restore_target_branch!
+ def assigned?
+ raw['assignee'].present?
end
- def remove_restored_branches!
- return if opened?
+ def assignee
+ return unless assigned?
- remove_source_branch!
- remove_target_branch!
+ @assignee ||= Github::Representation::User.new(raw['assignee'], options)
end
private
@@ -72,48 +57,14 @@ module Github
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
end
- def source_branch_name_prefixed
- "gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}"
- end
-
def target_branch
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
end
- def target_branch_name_prefixed
- "gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}"
- end
-
def cross_project?
- return true if source_branch_repo.nil?
-
- source_branch_repo.id != target_branch_repo.id
- end
-
- def restore_source_branch!
- return if source_branch_exists?
-
- source_branch.restore!(source_branch_name)
- end
-
- def restore_target_branch!
- return if target_branch_exists?
-
- target_branch.restore!(target_branch_name)
- end
-
- def remove_source_branch!
- # We should remove the source/target branches only if they were
- # restored. Otherwise, we'll remove branches like 'master' that
- # target_branch_exists? returns true. In other words, we need
- # to clean up only the restored branches that (source|target)_branch_exists?
- # returns false for the first time it has been called, because of
- # this that is important to memoize these values.
- source_branch.remove!(source_branch_name) unless source_branch_exists?
- end
+ return true unless source_branch.repo?
- def remove_target_branch!
- target_branch.remove!(target_branch_name) unless target_branch_exists?
+ source_branch.repo.id != target_branch.repo.id
end
end
end
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake
index 4d485108cf6..7f86fd7b45e 100644
--- a/lib/tasks/import.rake
+++ b/lib/tasks/import.rake
@@ -39,13 +39,19 @@ class GithubImport
def import!
@project.force_import_start
+ import_success = false
+
timings = Benchmark.measure do
- Github::Import.new(@project, @options).execute
+ import_success = Github::Import.new(@project, @options).execute
end
- puts "Import finished. Timings: #{timings}".color(:green)
-
- @project.import_finish
+ if import_success
+ @project.import_finish
+ puts "Import finished. Timings: #{timings}".color(:green)
+ else
+ puts "Import was not successful. Errors were as follows:"
+ puts @project.import_error
+ end
end
def new_project
@@ -53,18 +59,23 @@ class GithubImport
namespace_path, _sep, name = @project_path.rpartition('/')
namespace = find_or_create_namespace(namespace_path)
- Projects::CreateService.new(
+ project = Projects::CreateService.new(
@current_user,
name: name,
path: name,
description: @repo['description'],
namespace_id: namespace.id,
visibility_level: visibility_level,
- import_type: 'github',
- import_source: @repo['full_name'],
- import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"),
skip_wiki: @repo['has_wiki']
).execute
+
+ project.update!(
+ import_type: 'github',
+ import_source: @repo['full_name'],
+ import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@")
+ )
+
+ project
end
end