diff options
author | Sean McGivern <sean@mcgivern.me.uk> | 2017-10-05 14:36:38 +0000 |
---|---|---|
committer | Sean McGivern <sean@mcgivern.me.uk> | 2017-10-05 14:36:38 +0000 |
commit | 6ddd1275b3dbe209a633b922347f66b5f4f1e439 (patch) | |
tree | e8bb3218d0918f805298b4ca1b0818d579995e43 | |
parent | 5725e347ebdf04481a4eb7f82ce5db8c32b209b6 (diff) | |
parent | 6e48cae27d7c9c9280dabce013f4bcb936b6e38c (diff) | |
download | gitlab-ce-6ddd1275b3dbe209a633b922347f66b5f4f1e439.tar.gz |
Merge branch 'rc/fix/gh-import-branches-performance' into 'master'
Improve GitHub import performance
Closes #36288, #36292, and #38467
See merge request gitlab-org/gitlab-ce!14445
-rw-r--r-- | app/models/concerns/repository_mirroring.rb | 25 | ||||
-rw-r--r-- | app/models/merge_request.rb | 3 | ||||
-rw-r--r-- | app/models/merge_request_diff.rb | 1 | ||||
-rw-r--r-- | changelogs/unreleased/rc-fix-gh-import-branches-performance.yml | 5 | ||||
-rw-r--r-- | lib/github/import.rb | 46 | ||||
-rw-r--r-- | lib/github/representation/branch.rb | 20 | ||||
-rw-r--r-- | lib/github/representation/issuable.rb | 12 | ||||
-rw-r--r-- | lib/github/representation/issue.rb | 20 | ||||
-rw-r--r-- | lib/github/representation/pull_request.rb | 75 | ||||
-rw-r--r-- | lib/tasks/import.rake | 27 |
10 files changed, 113 insertions, 121 deletions
diff --git a/app/models/concerns/repository_mirroring.rb b/app/models/concerns/repository_mirroring.rb index fed336c29d6..f6aba91bc4c 100644 --- a/app/models/concerns/repository_mirroring.rb +++ b/app/models/concerns/repository_mirroring.rb @@ -1,11 +1,26 @@ module RepositoryMirroring - def set_remote_as_mirror(name) - config = raw_repository.rugged.config + IMPORT_HEAD_REFS = '+refs/heads/*:refs/heads/*'.freeze + IMPORT_TAG_REFS = '+refs/tags/*:refs/tags/*'.freeze + def set_remote_as_mirror(name) # This is used to define repository as equivalent as "git clone --mirror" - config["remote.#{name}.fetch"] = 'refs/*:refs/*' - config["remote.#{name}.mirror"] = true - config["remote.#{name}.prune"] = true + raw_repository.rugged.config["remote.#{name}.fetch"] = 'refs/*:refs/*' + raw_repository.rugged.config["remote.#{name}.mirror"] = true + raw_repository.rugged.config["remote.#{name}.prune"] = true + end + + def set_import_remote_as_mirror(remote_name) + # Add first fetch with Rugged so it does not create its own. + raw_repository.rugged.config["remote.#{remote_name}.fetch"] = IMPORT_HEAD_REFS + + add_remote_fetch_config(remote_name, IMPORT_TAG_REFS) + + raw_repository.rugged.config["remote.#{remote_name}.mirror"] = true + raw_repository.rugged.config["remote.#{remote_name}.prune"] = true + end + + def add_remote_fetch_config(remote_name, refspec) + run_git(%W[config --add remote.#{remote_name}.fetch #{refspec}]) end def fetch_mirror(remote, url) diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 0ba00d447e8..086226618e6 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -415,6 +415,8 @@ class MergeRequest < ActiveRecord::Base end def create_merge_request_diff + fetch_ref + # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37435 Gitlab::GitalyClient.allow_n_plus_1_calls do merge_request_diffs.create @@ -462,6 +464,7 @@ class MergeRequest < ActiveRecord::Base return unless open? old_diff_refs = self.diff_refs + create_merge_request_diff MergeRequests::MergeRequestDiffCacheService.new.execute(self) new_diff_refs = self.diff_refs diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb index 58050e1f438..faf0b95f842 100644 --- a/app/models/merge_request_diff.rb +++ b/app/models/merge_request_diff.rb @@ -55,7 +55,6 @@ class MergeRequestDiff < ActiveRecord::Base end def ensure_commit_shas - merge_request.fetch_ref self.start_commit_sha ||= merge_request.target_branch_sha self.head_commit_sha ||= merge_request.source_branch_sha self.base_commit_sha ||= find_base_sha diff --git a/changelogs/unreleased/rc-fix-gh-import-branches-performance.yml b/changelogs/unreleased/rc-fix-gh-import-branches-performance.yml new file mode 100644 index 00000000000..af359ce96b4 --- /dev/null +++ b/changelogs/unreleased/rc-fix-gh-import-branches-performance.yml @@ -0,0 +1,5 @@ +--- +title: Improve GitHub import performance +merge_request: 14445 +author: +type: other diff --git a/lib/github/import.rb b/lib/github/import.rb index c0cd8382875..55f8387f27a 100644 --- a/lib/github/import.rb +++ b/lib/github/import.rb @@ -9,7 +9,7 @@ module Github include Gitlab::ShellAdapter attr_reader :project, :repository, :repo, :repo_url, :wiki_url, - :options, :errors, :cached, :verbose + :options, :errors, :cached, :verbose, :last_fetched_at def initialize(project, options = {}) @project = project @@ -21,12 +21,13 @@ module Github @verbose = options.fetch(:verbose, false) @cached = Hash.new { |hash, key| hash[key] = Hash.new } @errors = [] + @last_fetched_at = nil end # rubocop: disable Rails/Output def execute puts 'Fetching repository...'.color(:aqua) if verbose - fetch_repository + setup_and_fetch_repository puts 'Fetching labels...'.color(:aqua) if verbose fetch_labels puts 'Fetching milestones...'.color(:aqua) if verbose @@ -42,7 +43,7 @@ module Github puts 'Expiring repository cache...'.color(:aqua) if verbose expire_repository_cache - true + errors.empty? rescue Github::RepositoryFetchError expire_repository_cache false @@ -52,18 +53,24 @@ module Github private - def fetch_repository + def setup_and_fetch_repository begin project.ensure_repository project.repository.add_remote('github', repo_url) - project.repository.set_remote_as_mirror('github') - project.repository.fetch_remote('github', forced: true) + project.repository.set_import_remote_as_mirror('github') + project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head') + fetch_remote(forced: true) rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e error(:project, repo_url, e.message) raise Github::RepositoryFetchError end end + def fetch_remote(forced: false) + @last_fetched_at = Time.now + project.repository.fetch_remote('github', forced: forced) + end + def fetch_wiki_repository return if project.wiki.repository_exists? @@ -92,7 +99,7 @@ module Github label.color = representation.color end - cached[:label_ids][label.title] = label.id + cached[:label_ids][representation.title] = label.id rescue => e error(:label, representation.url, e.message) end @@ -143,7 +150,9 @@ module Github next unless merge_request.new_record? && pull_request.valid? begin - pull_request.restore_branches! + # If the PR has been created/updated after we last fetched the + # remote, we fetch again to get the up-to-date refs. + fetch_remote if pull_request.updated_at > last_fetched_at author_id = user_id(pull_request.author, project.creator_id) description = format_description(pull_request.description, pull_request.author) @@ -152,6 +161,7 @@ module Github iid: pull_request.iid, title: pull_request.title, description: description, + ref_fetched: true, source_project: pull_request.source_project, source_branch: pull_request.source_branch_name, source_branch_sha: pull_request.source_branch_sha, @@ -173,8 +183,6 @@ module Github fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote) rescue => e error(:pull_request, pull_request.url, e.message) - ensure - pull_request.remove_restored_branches! end end @@ -203,11 +211,11 @@ module Github # for both features, like manipulating assignees, labels # and milestones, are provided within the Issues API. if representation.pull_request? - return unless representation.has_labels? || representation.has_comments? + return unless representation.labels? || representation.comments? merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid) - if representation.has_labels? + if representation.labels? merge_request.update_attribute(:label_ids, label_ids(representation.labels)) end @@ -222,14 +230,16 @@ module Github issue.title = representation.title issue.description = format_description(representation.description, representation.author) issue.state = representation.state - issue.label_ids = label_ids(representation.labels) issue.milestone_id = milestone_id(representation.milestone) issue.author_id = author_id - issue.assignee_ids = [user_id(representation.assignee)] issue.created_at = representation.created_at issue.updated_at = representation.updated_at issue.save!(validate: false) + issue.update( + label_ids: label_ids(representation.labels), + assignee_ids: assignee_ids(representation.assignees)) + fetch_comments_conditionally(issue, representation) end rescue => e @@ -238,7 +248,7 @@ module Github end def fetch_comments_conditionally(issuable, representation) - if representation.has_comments? + if representation.comments? comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments" fetch_comments(issuable, :comment, comments_url) end @@ -302,7 +312,11 @@ module Github end def label_ids(labels) - labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact + labels.map { |label| cached[:label_ids][label.title] }.compact + end + + def assignee_ids(assignees) + assignees.map { |assignee| user_id(assignee) }.compact end def milestone_id(milestone) diff --git a/lib/github/representation/branch.rb b/lib/github/representation/branch.rb index 823e8e9a9c4..0087a3d3c4f 100644 --- a/lib/github/representation/branch.rb +++ b/lib/github/representation/branch.rb @@ -7,10 +7,14 @@ module Github raw.dig('user', 'login') || 'unknown' end + def repo? + raw['repo'].present? + end + def repo - return @repo if defined?(@repo) + return unless repo? - @repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present? + @repo ||= Github::Representation::Repo.new(raw['repo']) end def ref @@ -25,10 +29,6 @@ module Github Commit.truncate_sha(sha) end - def exists? - @exists ||= branch_exists? && commit_exists? - end - def valid? sha.present? && ref.present? end @@ -47,14 +47,6 @@ module Github private - def branch_exists? - repository.branch_exists?(ref) - end - - def commit_exists? - repository.branch_names_contains(sha).include?(ref) - end - def repository @repository ||= options.fetch(:repository) end diff --git a/lib/github/representation/issuable.rb b/lib/github/representation/issuable.rb index 9713b82615d..768ba3b993c 100644 --- a/lib/github/representation/issuable.rb +++ b/lib/github/representation/issuable.rb @@ -23,14 +23,14 @@ module Github @author ||= Github::Representation::User.new(raw['user'], options) end - def assignee - return unless assigned? - - @assignee ||= Github::Representation::User.new(raw['assignee'], options) + def labels? + raw['labels'].any? end - def assigned? - raw['assignee'].present? + def labels + @labels ||= Array(raw['labels']).map do |label| + Github::Representation::Label.new(label, options) + end end end end diff --git a/lib/github/representation/issue.rb b/lib/github/representation/issue.rb index df3540a6e6c..4f1a02cb90f 100644 --- a/lib/github/representation/issue.rb +++ b/lib/github/representation/issue.rb @@ -1,25 +1,27 @@ module Github module Representation class Issue < Representation::Issuable - def labels - raw['labels'] - end - def state raw['state'] == 'closed' ? 'closed' : 'opened' end - def has_comments? + def comments? raw['comments'] > 0 end - def has_labels? - labels.count > 0 - end - def pull_request? raw['pull_request'].present? end + + def assigned? + raw['assignees'].present? + end + + def assignees + @assignees ||= Array(raw['assignees']).map do |user| + Github::Representation::User.new(user, options) + end + end end end end diff --git a/lib/github/representation/pull_request.rb b/lib/github/representation/pull_request.rb index 55461097e8a..0171179bb0f 100644 --- a/lib/github/representation/pull_request.rb +++ b/lib/github/representation/pull_request.rb @@ -1,26 +1,17 @@ module Github module Representation class PullRequest < Representation::Issuable - delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true - delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true + delegate :sha, to: :source_branch, prefix: true + delegate :sha, to: :target_branch, prefix: true def source_project project end def source_branch_name - @source_branch_name ||= - if cross_project? || !source_branch_exists? - source_branch_name_prefixed - else - source_branch_ref - end - end - - def source_branch_exists? - return @source_branch_exists if defined?(@source_branch_exists) - - @source_branch_exists = !cross_project? && source_branch.exists? + # Mimic the "user:branch" displayed in the MR widget, + # i.e. "Request to merge rymai:add-external-mounts into master" + cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref end def target_project @@ -28,11 +19,7 @@ module Github end def target_branch_name - @target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed - end - - def target_branch_exists? - @target_branch_exists ||= target_branch.exists? + target_branch.ref end def state @@ -50,16 +37,14 @@ module Github source_branch.valid? && target_branch.valid? end - def restore_branches! - restore_source_branch! - restore_target_branch! + def assigned? + raw['assignee'].present? end - def remove_restored_branches! - return if opened? + def assignee + return unless assigned? - remove_source_branch! - remove_target_branch! + @assignee ||= Github::Representation::User.new(raw['assignee'], options) end private @@ -72,48 +57,14 @@ module Github @source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository) end - def source_branch_name_prefixed - "gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}" - end - def target_branch @target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository) end - def target_branch_name_prefixed - "gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}" - end - def cross_project? - return true if source_branch_repo.nil? - - source_branch_repo.id != target_branch_repo.id - end - - def restore_source_branch! - return if source_branch_exists? - - source_branch.restore!(source_branch_name) - end - - def restore_target_branch! - return if target_branch_exists? - - target_branch.restore!(target_branch_name) - end - - def remove_source_branch! - # We should remove the source/target branches only if they were - # restored. Otherwise, we'll remove branches like 'master' that - # target_branch_exists? returns true. In other words, we need - # to clean up only the restored branches that (source|target)_branch_exists? - # returns false for the first time it has been called, because of - # this that is important to memoize these values. - source_branch.remove!(source_branch_name) unless source_branch_exists? - end + return true unless source_branch.repo? - def remove_target_branch! - target_branch.remove!(target_branch_name) unless target_branch_exists? + source_branch.repo.id != target_branch.repo.id end end end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 4d485108cf6..7f86fd7b45e 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -39,13 +39,19 @@ class GithubImport def import! @project.force_import_start + import_success = false + timings = Benchmark.measure do - Github::Import.new(@project, @options).execute + import_success = Github::Import.new(@project, @options).execute end - puts "Import finished. Timings: #{timings}".color(:green) - - @project.import_finish + if import_success + @project.import_finish + puts "Import finished. Timings: #{timings}".color(:green) + else + puts "Import was not successful. Errors were as follows:" + puts @project.import_error + end end def new_project @@ -53,18 +59,23 @@ class GithubImport namespace_path, _sep, name = @project_path.rpartition('/') namespace = find_or_create_namespace(namespace_path) - Projects::CreateService.new( + project = Projects::CreateService.new( @current_user, name: name, path: name, description: @repo['description'], namespace_id: namespace.id, visibility_level: visibility_level, - import_type: 'github', - import_source: @repo['full_name'], - import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"), skip_wiki: @repo['has_wiki'] ).execute + + project.update!( + import_type: 'github', + import_source: @repo['full_name'], + import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@") + ) + + project end end |