From 395a9301b233da30a9abef56bb7b6fa6229f3acd Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Tue, 27 Sep 2016 19:32:47 +0200 Subject: Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. --- lib/gitlab/github_import/client.rb | 11 ++-- lib/gitlab/github_import/importer.rb | 101 ++++++++++++++++++----------------- 2 files changed, 57 insertions(+), 55 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 084e514492c..e33ac61f5ae 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -52,7 +52,7 @@ module Gitlab def method_missing(method, *args, &block) if api.respond_to?(method) - request { api.send(method, *args, &block) } + request(method, *args, &block) else super(method, *args, &block) end @@ -99,20 +99,19 @@ module Gitlab rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME end - def request + def request(method, *args, &block) sleep rate_limit_sleep_time if rate_limit_exceed? - data = yield + data = api.send(method, *args, &block) + yield data last_response = api.last_response while last_response.rels[:next] sleep rate_limit_sleep_time if rate_limit_exceed? last_response = last_response.rels[:next].get - data.concat(last_response.data) if last_response.data.is_a?(Array) + yield last_response.data if last_response.data.is_a?(Array) end - - data end end end diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index d35ee2a1c65..f1adec8212b 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -46,64 +46,66 @@ module Gitlab end def import_labels - labels = client.labels(repo, per_page: 100) - - labels.each do |raw| - begin - LabelFormatter.new(project, raw).create! - rescue => e - errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.labels(repo, per_page: 100) do |labels| + labels.each do |raw| + begin + LabelFormatter.new(project, raw).create! + rescue => e + errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end def import_milestones - milestones = client.milestones(repo, state: :all, per_page: 100) - - milestones.each do |raw| - begin - MilestoneFormatter.new(project, raw).create! - rescue => e - errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.milestones(repo, state: :all, per_page: 100) do |milestones| + milestones.each do |raw| + begin + MilestoneFormatter.new(project, raw).create! + rescue => e + errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end def import_issues - issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) - - issues.each do |raw| - gh_issue = IssueFormatter.new(project, raw) - - if gh_issue.valid? - begin - issue = gh_issue.create! - apply_labels(issue) - import_comments(issue) if gh_issue.has_comments? - rescue => e - errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| + issues.each do |raw| + gh_issue = IssueFormatter.new(project, raw) + + if gh_issue.valid? + begin + issue = gh_issue.create! + apply_labels(issue) + import_comments(issue) if gh_issue.has_comments? + rescue => e + errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end end def import_pull_requests - pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) - pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?) - - pull_requests.each do |pull_request| - begin - restore_source_branch(pull_request) unless pull_request.source_branch_exists? - restore_target_branch(pull_request) unless pull_request.target_branch_exists? - - merge_request = pull_request.create! - apply_labels(merge_request) - import_comments(merge_request) - import_comments_on_diff(merge_request) - rescue => e - errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } - ensure - clean_up_restored_branches(pull_request) + client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| + pull_requests.each do |raw| + pull_request = PullRequestFormatter.new(project, raw) + next unless pull_request.valid? + + begin + restore_source_branch(pull_request) unless pull_request.source_branch_exists? + restore_target_branch(pull_request) unless pull_request.target_branch_exists? + + merge_request = pull_request.create! + apply_labels(merge_request) + import_comments(merge_request) + import_comments_on_diff(merge_request) + rescue => e + errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } + ensure + clean_up_restored_branches(pull_request) + end end end end @@ -180,13 +182,14 @@ module Gitlab end def import_releases - releases = client.releases(repo, per_page: 100) - releases.each do |raw| - begin - gh_release = ReleaseFormatter.new(project, raw) - gh_release.create! if gh_release.valid? - rescue => e - errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + client.releases(repo, per_page: 100) do |releases| + releases.each do |raw| + begin + gh_release = ReleaseFormatter.new(project, raw) + gh_release.create! if gh_release.valid? + rescue => e + errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } + end end end end -- cgit v1.2.1