diff options
author | Ahmad Sherif <me@ahmadsherif.com> | 2016-09-27 19:32:47 +0200 |
---|---|---|
committer | Ahmad Sherif <me@ahmadsherif.com> | 2016-09-27 20:45:07 +0200 |
commit | 395a9301b233da30a9abef56bb7b6fa6229f3acd (patch) | |
tree | 952605568f0f698960c6f1deef7ef179d50a15db /lib | |
parent | 90578f4ad091f80428a661005060af65a0c7151e (diff) | |
download | gitlab-ce-395a9301b233da30a9abef56bb7b6fa6229f3acd.tar.gz |
Process each page of GitHub resources instead of concating them then processing
This should avoid having large memory growth when importing GitHub repos
with lots of resources.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/github_import/client.rb | 11 | ||||
-rw-r--r-- | lib/gitlab/github_import/importer.rb | 101 |
2 files changed, 57 insertions, 55 deletions
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 084e514492c..e33ac61f5ae 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -52,7 +52,7 @@ module Gitlab def method_missing(method, *args, &block) if api.respond_to?(method) - request { api.send(method, *args, &block) } + request(method, *args, &block) else super(method, *args, &block) end @@ -99,20 +99,19 @@ module Gitlab rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME end - def request + def request(method, *args, &block) sleep rate_limit_sleep_time if rate_limit_exceed? - data = yield + data = api.send(method, *args, &block) + yield data last_response = api.last_response while last_response.rels[:next] sleep rate_limit_sleep_time if rate_limit_exceed? last_response = last_response.rels[:next].get - data.concat(last_response.data) if last_response.data.is_a?(Array) + yield last_response.data if last_response.data.is_a?(Array) end - - data end end end diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index d35ee2a1c65..f1adec8212b 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -46,64 +46,66 @@ module Gitlab end def import_labels - labels = client.labels(repo, per_page: 100) - - labels.each do |raw| - begin - LabelFormatter.new(project, raw).create! - rescue => e - errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.labels(repo, per_page: 100) do |labels| + labels.each do |raw| + begin + LabelFormatter.new(project, raw).create! + rescue => e + errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end def import_milestones - milestones = client.milestones(repo, state: :all, per_page: 100) - - milestones.each do |raw| - begin - MilestoneFormatter.new(project, raw).create! - rescue => e - errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.milestones(repo, state: :all, per_page: 100) do |milestones| + milestones.each do |raw| + begin + MilestoneFormatter.new(project, raw).create! + rescue => e + errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end def import_issues - issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) - - issues.each do |raw| - gh_issue = IssueFormatter.new(project, raw) - - if gh_issue.valid? - begin - issue = gh_issue.create! - apply_labels(issue) - import_comments(issue) if gh_issue.has_comments? - rescue => e - errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| + issues.each do |raw| + gh_issue = IssueFormatter.new(project, raw) + + if gh_issue.valid? + begin + issue = gh_issue.create! + apply_labels(issue) + import_comments(issue) if gh_issue.has_comments? + rescue => e + errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end end def import_pull_requests - pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) - pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?) - - pull_requests.each do |pull_request| - begin - restore_source_branch(pull_request) unless pull_request.source_branch_exists? - restore_target_branch(pull_request) unless pull_request.target_branch_exists? - - merge_request = pull_request.create! - apply_labels(merge_request) - import_comments(merge_request) - import_comments_on_diff(merge_request) - rescue => e - errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } - ensure - clean_up_restored_branches(pull_request) + client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| + pull_requests.each do |raw| + pull_request = PullRequestFormatter.new(project, raw) + next unless pull_request.valid? + + begin + restore_source_branch(pull_request) unless pull_request.source_branch_exists? + restore_target_branch(pull_request) unless pull_request.target_branch_exists? + + merge_request = pull_request.create! + apply_labels(merge_request) + import_comments(merge_request) + import_comments_on_diff(merge_request) + rescue => e + errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } + ensure + clean_up_restored_branches(pull_request) + end end end end @@ -180,13 +182,14 @@ module Gitlab end def import_releases - releases = client.releases(repo, per_page: 100) - releases.each do |raw| - begin - gh_release = ReleaseFormatter.new(project, raw) - gh_release.create! if gh_release.valid? - rescue => e - errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.releases(repo, per_page: 100) do |releases| + releases.each do |raw| + begin + gh_release = ReleaseFormatter.new(project, raw) + gh_release.create! if gh_release.valid? + rescue => e + errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end |