summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAhmad Sherif <me@ahmadsherif.com>2016-09-27 19:32:47 +0200
committerAhmad Sherif <me@ahmadsherif.com>2016-09-27 20:45:07 +0200
commit395a9301b233da30a9abef56bb7b6fa6229f3acd (patch)
tree952605568f0f698960c6f1deef7ef179d50a15db
parent90578f4ad091f80428a661005060af65a0c7151e (diff)
downloadgitlab-ce-395a9301b233da30a9abef56bb7b6fa6229f3acd.tar.gz
Process each page of GitHub resources instead of concating them then processing
This should avoid having large memory growth when importing GitHub repos with lots of resources.
-rw-r--r--lib/gitlab/github_import/client.rb11
-rw-r--r--lib/gitlab/github_import/importer.rb101
-rw-r--r--spec/lib/gitlab/github_import/client_spec.rb2
3 files changed, 58 insertions, 56 deletions
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 084e514492c..e33ac61f5ae 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -52,7 +52,7 @@ module Gitlab
def method_missing(method, *args, &block)
if api.respond_to?(method)
- request { api.send(method, *args, &block) }
+ request(method, *args, &block)
else
super(method, *args, &block)
end
@@ -99,20 +99,19 @@ module Gitlab
rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
end
- def request
+ def request(method, *args, &block)
sleep rate_limit_sleep_time if rate_limit_exceed?
- data = yield
+ data = api.send(method, *args, &block)
+ yield data
last_response = api.last_response
while last_response.rels[:next]
sleep rate_limit_sleep_time if rate_limit_exceed?
last_response = last_response.rels[:next].get
- data.concat(last_response.data) if last_response.data.is_a?(Array)
+ yield last_response.data if last_response.data.is_a?(Array)
end
-
- data
end
end
end
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index d35ee2a1c65..f1adec8212b 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -46,64 +46,66 @@ module Gitlab
end
def import_labels
- labels = client.labels(repo, per_page: 100)
-
- labels.each do |raw|
- begin
- LabelFormatter.new(project, raw).create!
- rescue => e
- errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.labels(repo, per_page: 100) do |labels|
+ labels.each do |raw|
+ begin
+ LabelFormatter.new(project, raw).create!
+ rescue => e
+ errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
def import_milestones
- milestones = client.milestones(repo, state: :all, per_page: 100)
-
- milestones.each do |raw|
- begin
- MilestoneFormatter.new(project, raw).create!
- rescue => e
- errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.milestones(repo, state: :all, per_page: 100) do |milestones|
+ milestones.each do |raw|
+ begin
+ MilestoneFormatter.new(project, raw).create!
+ rescue => e
+ errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
def import_issues
- issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100)
-
- issues.each do |raw|
- gh_issue = IssueFormatter.new(project, raw)
-
- if gh_issue.valid?
- begin
- issue = gh_issue.create!
- apply_labels(issue)
- import_comments(issue) if gh_issue.has_comments?
- rescue => e
- errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
+ issues.each do |raw|
+ gh_issue = IssueFormatter.new(project, raw)
+
+ if gh_issue.valid?
+ begin
+ issue = gh_issue.create!
+ apply_labels(issue)
+ import_comments(issue) if gh_issue.has_comments?
+ rescue => e
+ errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
end
def import_pull_requests
- pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100)
- pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?)
-
- pull_requests.each do |pull_request|
- begin
- restore_source_branch(pull_request) unless pull_request.source_branch_exists?
- restore_target_branch(pull_request) unless pull_request.target_branch_exists?
-
- merge_request = pull_request.create!
- apply_labels(merge_request)
- import_comments(merge_request)
- import_comments_on_diff(merge_request)
- rescue => e
- errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
- ensure
- clean_up_restored_branches(pull_request)
+ client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
+ pull_requests.each do |raw|
+ pull_request = PullRequestFormatter.new(project, raw)
+ next unless pull_request.valid?
+
+ begin
+ restore_source_branch(pull_request) unless pull_request.source_branch_exists?
+ restore_target_branch(pull_request) unless pull_request.target_branch_exists?
+
+ merge_request = pull_request.create!
+ apply_labels(merge_request)
+ import_comments(merge_request)
+ import_comments_on_diff(merge_request)
+ rescue => e
+ errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
+ ensure
+ clean_up_restored_branches(pull_request)
+ end
end
end
end
@@ -180,13 +182,14 @@ module Gitlab
end
def import_releases
- releases = client.releases(repo, per_page: 100)
- releases.each do |raw|
- begin
- gh_release = ReleaseFormatter.new(project, raw)
- gh_release.create! if gh_release.valid?
- rescue => e
- errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.releases(repo, per_page: 100) do |releases|
+ releases.each do |raw|
+ begin
+ gh_release = ReleaseFormatter.new(project, raw)
+ gh_release.create! if gh_release.valid?
+ rescue => e
+ errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
diff --git a/spec/lib/gitlab/github_import/client_spec.rb b/spec/lib/gitlab/github_import/client_spec.rb
index 613c47d55f1..e829b936343 100644
--- a/spec/lib/gitlab/github_import/client_spec.rb
+++ b/spec/lib/gitlab/github_import/client_spec.rb
@@ -66,6 +66,6 @@ describe Gitlab::GithubImport::Client, lib: true do
stub_request(:get, /api.github.com/)
allow(client.api).to receive(:rate_limit!).and_raise(Octokit::NotFound)
- expect { client.issues }.not_to raise_error
+ expect { client.issues {} }.not_to raise_error
end
end