summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAhmad Sherif <me@ahmadsherif.com>2016-09-27 19:32:47 +0200
committerAhmad Sherif <me@ahmadsherif.com>2016-09-27 20:45:07 +0200
commit395a9301b233da30a9abef56bb7b6fa6229f3acd (patch)
tree952605568f0f698960c6f1deef7ef179d50a15db /lib
parent90578f4ad091f80428a661005060af65a0c7151e (diff)
downloadgitlab-ce-395a9301b233da30a9abef56bb7b6fa6229f3acd.tar.gz
Process each page of GitHub resources instead of concating them then processing
This should avoid having large memory growth when importing GitHub repos with lots of resources.
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/github_import/client.rb11
-rw-r--r--lib/gitlab/github_import/importer.rb101
2 files changed, 57 insertions, 55 deletions
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 084e514492c..e33ac61f5ae 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -52,7 +52,7 @@ module Gitlab
def method_missing(method, *args, &block)
if api.respond_to?(method)
- request { api.send(method, *args, &block) }
+ request(method, *args, &block)
else
super(method, *args, &block)
end
@@ -99,20 +99,19 @@ module Gitlab
rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
end
- def request
+ def request(method, *args, &block)
sleep rate_limit_sleep_time if rate_limit_exceed?
- data = yield
+ data = api.send(method, *args, &block)
+ yield data
last_response = api.last_response
while last_response.rels[:next]
sleep rate_limit_sleep_time if rate_limit_exceed?
last_response = last_response.rels[:next].get
- data.concat(last_response.data) if last_response.data.is_a?(Array)
+ yield last_response.data if last_response.data.is_a?(Array)
end
-
- data
end
end
end
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index d35ee2a1c65..f1adec8212b 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -46,64 +46,66 @@ module Gitlab
end
def import_labels
- labels = client.labels(repo, per_page: 100)
-
- labels.each do |raw|
- begin
- LabelFormatter.new(project, raw).create!
- rescue => e
- errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.labels(repo, per_page: 100) do |labels|
+ labels.each do |raw|
+ begin
+ LabelFormatter.new(project, raw).create!
+ rescue => e
+ errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
def import_milestones
- milestones = client.milestones(repo, state: :all, per_page: 100)
-
- milestones.each do |raw|
- begin
- MilestoneFormatter.new(project, raw).create!
- rescue => e
- errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.milestones(repo, state: :all, per_page: 100) do |milestones|
+ milestones.each do |raw|
+ begin
+ MilestoneFormatter.new(project, raw).create!
+ rescue => e
+ errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
def import_issues
- issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100)
-
- issues.each do |raw|
- gh_issue = IssueFormatter.new(project, raw)
-
- if gh_issue.valid?
- begin
- issue = gh_issue.create!
- apply_labels(issue)
- import_comments(issue) if gh_issue.has_comments?
- rescue => e
- errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
+ issues.each do |raw|
+ gh_issue = IssueFormatter.new(project, raw)
+
+ if gh_issue.valid?
+ begin
+ issue = gh_issue.create!
+ apply_labels(issue)
+ import_comments(issue) if gh_issue.has_comments?
+ rescue => e
+ errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end
end
def import_pull_requests
- pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100)
- pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?)
-
- pull_requests.each do |pull_request|
- begin
- restore_source_branch(pull_request) unless pull_request.source_branch_exists?
- restore_target_branch(pull_request) unless pull_request.target_branch_exists?
-
- merge_request = pull_request.create!
- apply_labels(merge_request)
- import_comments(merge_request)
- import_comments_on_diff(merge_request)
- rescue => e
- errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
- ensure
- clean_up_restored_branches(pull_request)
+ client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
+ pull_requests.each do |raw|
+ pull_request = PullRequestFormatter.new(project, raw)
+ next unless pull_request.valid?
+
+ begin
+ restore_source_branch(pull_request) unless pull_request.source_branch_exists?
+ restore_target_branch(pull_request) unless pull_request.target_branch_exists?
+
+ merge_request = pull_request.create!
+ apply_labels(merge_request)
+ import_comments(merge_request)
+ import_comments_on_diff(merge_request)
+ rescue => e
+ errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
+ ensure
+ clean_up_restored_branches(pull_request)
+ end
end
end
end
@@ -180,13 +182,14 @@ module Gitlab
end
def import_releases
- releases = client.releases(repo, per_page: 100)
- releases.each do |raw|
- begin
- gh_release = ReleaseFormatter.new(project, raw)
- gh_release.create! if gh_release.valid?
- rescue => e
- errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ client.releases(repo, per_page: 100) do |releases|
+ releases.each do |raw|
+ begin
+ gh_release = ReleaseFormatter.new(project, raw)
+ gh_release.create! if gh_release.valid?
+ rescue => e
+ errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+ end
end
end
end