From 395a9301b233da30a9abef56bb7b6fa6229f3acd Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Tue, 27 Sep 2016 19:32:47 +0200 Subject: Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. --- lib/gitlab/github_import/client.rb | 11 ++- lib/gitlab/github_import/importer.rb | 101 ++++++++++++++------------- spec/lib/gitlab/github_import/client_spec.rb | 2 +- 3 files changed, 58 insertions(+), 56 deletions(-) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 084e514492c..e33ac61f5ae 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -52,7 +52,7 @@ module Gitlab def method_missing(method, *args, &block) if api.respond_to?(method) - request { api.send(method, *args, &block) } + request(method, *args, &block) else super(method, *args, &block) end @@ -99,20 +99,19 @@ module Gitlab rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME end - def request + def request(method, *args, &block) sleep rate_limit_sleep_time if rate_limit_exceed? - data = yield + data = api.send(method, *args, &block) + yield data last_response = api.last_response while last_response.rels[:next] sleep rate_limit_sleep_time if rate_limit_exceed? last_response = last_response.rels[:next].get - data.concat(last_response.data) if last_response.data.is_a?(Array) + yield last_response.data if last_response.data.is_a?(Array) end - - data end end end diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index d35ee2a1c65..f1adec8212b 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -46,64 +46,66 @@ module Gitlab end def import_labels - labels = client.labels(repo, per_page: 100) - - labels.each do |raw| - begin - LabelFormatter.new(project, raw).create! - rescue => e - errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.labels(repo, per_page: 100) do |labels| + labels.each do |raw| + begin + LabelFormatter.new(project, raw).create! + rescue => e + errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end def import_milestones - milestones = client.milestones(repo, state: :all, per_page: 100) - - milestones.each do |raw| - begin - MilestoneFormatter.new(project, raw).create! - rescue => e - errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.milestones(repo, state: :all, per_page: 100) do |milestones| + milestones.each do |raw| + begin + MilestoneFormatter.new(project, raw).create! + rescue => e + errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end def import_issues - issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) - - issues.each do |raw| - gh_issue = IssueFormatter.new(project, raw) - - if gh_issue.valid? - begin - issue = gh_issue.create! - apply_labels(issue) - import_comments(issue) if gh_issue.has_comments? - rescue => e - errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| + issues.each do |raw| + gh_issue = IssueFormatter.new(project, raw) + + if gh_issue.valid? + begin + issue = gh_issue.create! + apply_labels(issue) + import_comments(issue) if gh_issue.has_comments? + rescue => e + errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end end def import_pull_requests - pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) - pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?) - - pull_requests.each do |pull_request| - begin - restore_source_branch(pull_request) unless pull_request.source_branch_exists? - restore_target_branch(pull_request) unless pull_request.target_branch_exists? - - merge_request = pull_request.create! - apply_labels(merge_request) - import_comments(merge_request) - import_comments_on_diff(merge_request) - rescue => e - errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } - ensure - clean_up_restored_branches(pull_request) + client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| + pull_requests.each do |raw| + pull_request = PullRequestFormatter.new(project, raw) + next unless pull_request.valid? + + begin + restore_source_branch(pull_request) unless pull_request.source_branch_exists? + restore_target_branch(pull_request) unless pull_request.target_branch_exists? + + merge_request = pull_request.create! + apply_labels(merge_request) + import_comments(merge_request) + import_comments_on_diff(merge_request) + rescue => e + errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } + ensure + clean_up_restored_branches(pull_request) + end end end end @@ -180,13 +182,14 @@ module Gitlab end def import_releases - releases = client.releases(repo, per_page: 100) - releases.each do |raw| - begin - gh_release = ReleaseFormatter.new(project, raw) - gh_release.create! if gh_release.valid? - rescue => e - errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.releases(repo, per_page: 100) do |releases| + releases.each do |raw| + begin + gh_release = ReleaseFormatter.new(project, raw) + gh_release.create! if gh_release.valid? + rescue => e + errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end diff --git a/spec/lib/gitlab/github_import/client_spec.rb b/spec/lib/gitlab/github_import/client_spec.rb index 613c47d55f1..e829b936343 100644 --- a/spec/lib/gitlab/github_import/client_spec.rb +++ b/spec/lib/gitlab/github_import/client_spec.rb @@ -66,6 +66,6 @@ describe Gitlab::GithubImport::Client, lib: true do stub_request(:get, /api.github.com/) allow(client.api).to receive(:rate_limit!).and_raise(Octokit::NotFound) - expect { client.issues }.not_to raise_error + expect { client.issues {} }.not_to raise_error end end -- cgit v1.2.1 From dbcbbf262b731e30446167fbd10ef081d195c862 Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Tue, 27 Sep 2016 19:35:39 +0200 Subject: Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. --- lib/gitlab/github_import/importer.rb | 18 +++++++++--------- spec/lib/gitlab/github_import/importer_spec.rb | 11 ++++++----- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index f1adec8212b..31fba36de7e 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -10,6 +10,7 @@ module Gitlab @repo = project.import_source @repo_url = project.import_url @errors = [] + @labels = {} if credentials @client = Client.new(credentials[:user]) @@ -49,7 +50,8 @@ module Gitlab client.labels(repo, per_page: 100) do |labels| labels.each do |raw| begin - LabelFormatter.new(project, raw).create! + label = LabelFormatter.new(project, raw).create! + @labels[label.title] = label.id rescue => e errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end @@ -77,7 +79,7 @@ module Gitlab if gh_issue.valid? begin issue = gh_issue.create! - apply_labels(issue) + apply_labels(issue, raw) import_comments(issue) if gh_issue.has_comments? rescue => e errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } @@ -98,7 +100,7 @@ module Gitlab restore_target_branch(pull_request) unless pull_request.target_branch_exists? merge_request = pull_request.create! - apply_labels(merge_request) + apply_labels(merge_request, raw) import_comments(merge_request) import_comments_on_diff(merge_request) rescue => e @@ -131,12 +133,10 @@ module Gitlab project.repository.after_remove_branch end - def apply_labels(issuable) - issue = client.issue(repo, issuable.iid) - - if issue.labels.count > 0 - label_ids = issue.labels - .map { |attrs| project.labels.find_by(title: attrs.name).try(:id) } + def apply_labels(issuable, raw_issuable) + if raw_issuable.labels.count > 0 + label_ids = raw_issuable.labels + .map { |attrs| @labels[attrs.name] } .compact issuable.update_attribute(:label_ids, label_ids) diff --git a/spec/lib/gitlab/github_import/importer_spec.rb b/spec/lib/gitlab/github_import/importer_spec.rb index 553c849c9b4..844c081fc7f 100644 --- a/spec/lib/gitlab/github_import/importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer_spec.rb @@ -57,7 +57,8 @@ describe Gitlab::GithubImport::Importer, lib: true do created_at: created_at, updated_at: updated_at, closed_at: nil, - url: 'https://api.github.com/repos/octocat/Hello-World/issues/1347' + url: 'https://api.github.com/repos/octocat/Hello-World/issues/1347', + labels: [double(name: 'Label #1')], ) end @@ -75,7 +76,8 @@ describe Gitlab::GithubImport::Importer, lib: true do created_at: created_at, updated_at: updated_at, closed_at: nil, - url: 'https://api.github.com/repos/octocat/Hello-World/issues/1348' + url: 'https://api.github.com/repos/octocat/Hello-World/issues/1348', + labels: [double(name: 'Label #2')], ) end @@ -94,7 +96,8 @@ describe Gitlab::GithubImport::Importer, lib: true do updated_at: updated_at, closed_at: nil, merged_at: nil, - url: 'https://api.github.com/repos/octocat/Hello-World/pulls/1347' + url: 'https://api.github.com/repos/octocat/Hello-World/pulls/1347', + labels: [double(name: 'Label #3')], ) end @@ -148,9 +151,7 @@ describe Gitlab::GithubImport::Importer, lib: true do errors: [ { type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" }, { type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" }, - { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1347", errors: "Invalid Repository. Use user/repo format." }, { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" }, - { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" }, { type: :wiki, errors: "Gitlab::Shell::Error" }, { type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" } -- cgit v1.2.1 From e30bfb809afdbf5596176cf0fe0ea63e976cbb9f Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Tue, 27 Sep 2016 19:41:22 +0200 Subject: Import all GitHub comments after importing issues and PRs --- lib/gitlab/github_import/importer.rb | 27 ++++++++++++++------------ spec/lib/gitlab/github_import/importer_spec.rb | 2 ++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index 31fba36de7e..1d385dba0f5 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -24,6 +24,7 @@ module Gitlab import_milestones import_issues import_pull_requests + import_comments import_wiki import_releases handle_errors @@ -80,7 +81,6 @@ module Gitlab begin issue = gh_issue.create! apply_labels(issue, raw) - import_comments(issue) if gh_issue.has_comments? rescue => e errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end @@ -101,8 +101,6 @@ module Gitlab merge_request = pull_request.create! apply_labels(merge_request, raw) - import_comments(merge_request) - import_comments_on_diff(merge_request) rescue => e errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } ensure @@ -143,21 +141,26 @@ module Gitlab end end - def import_comments(issuable) - comments = client.issue_comments(repo, issuable.iid, per_page: 100) - create_comments(issuable, comments) - end + def import_comments + client.issues_comments(repo, per_page: 100) do |comments| + create_comments(comments, :issue) + end - def import_comments_on_diff(merge_request) - comments = client.pull_request_comments(repo, merge_request.iid, per_page: 100) - create_comments(merge_request, comments) + client.pull_requests_comments(repo, per_page: 100) do |comments| + create_comments(comments, :pull_request) + end end - def create_comments(issuable, comments) + def create_comments(comments, issuable_type) ActiveRecord::Base.no_touching do comments.each do |raw| begin - comment = CommentFormatter.new(project, raw) + comment = CommentFormatter.new(project, raw) + issuable_class = issuable_type == :issue ? Issue : MergeRequest + iid = raw.send("#{issuable_type}_url").split('/').last # GH doesn't return parent ID directly + issuable = issuable_class.find_by_iid(iid) + next unless issuable + issuable.notes.create!(comment.attributes) rescue => e errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } diff --git a/spec/lib/gitlab/github_import/importer_spec.rb b/spec/lib/gitlab/github_import/importer_spec.rb index 844c081fc7f..8854c8431b5 100644 --- a/spec/lib/gitlab/github_import/importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer_spec.rb @@ -132,6 +132,8 @@ describe Gitlab::GithubImport::Importer, lib: true do allow_any_instance_of(Octokit::Client).to receive(:milestones).and_return([milestone, milestone]) allow_any_instance_of(Octokit::Client).to receive(:issues).and_return([issue1, issue2]) allow_any_instance_of(Octokit::Client).to receive(:pull_requests).and_return([pull_request, pull_request]) + allow_any_instance_of(Octokit::Client).to receive(:issues_comments).and_return([]) + allow_any_instance_of(Octokit::Client).to receive(:pull_requests_comments).and_return([]) allow_any_instance_of(Octokit::Client).to receive(:last_response).and_return(double(rels: { next: nil })) allow_any_instance_of(Octokit::Client).to receive(:releases).and_return([release1, release2]) allow_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error) -- cgit v1.2.1 From dca1acd6a6eaf78f31e90569ff008b1a26d63fbb Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Tue, 27 Sep 2016 19:36:58 +0200 Subject: Call after_remove_branch only once after importing all GitHub PRs --- CHANGELOG | 1 + lib/gitlab/github_import/importer.rb | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b5827cc128a..da3bb75fb43 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,7 @@ v 8.13.0 (unreleased) - Fix robots.txt disallowing access to groups starting with "s" (Matt Harrison) - Revoke button in Applications Settings underlines on hover. - Add organization field to user profile + - Optimize GitHub importing for speed and memory v 8.12.2 (unreleased) - Fix Import/Export not recognising correctly the imported services. diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index 1d385dba0f5..b8321244473 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -108,6 +108,8 @@ module Gitlab end end end + + project.repository.after_remove_branch end def restore_source_branch(pull_request) @@ -127,8 +129,6 @@ module Gitlab def clean_up_restored_branches(pull_request) remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists? remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists? - - project.repository.after_remove_branch end def apply_labels(issuable, raw_issuable) -- cgit v1.2.1