From 6e590af1484c4b98414e0128b58e94342d57b07e Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Wed, 19 Oct 2016 14:21:27 +0200 Subject: Check if repository already exists before trying to re-import it --- app/services/projects/import_service.rb | 2 +- spec/services/projects/import_service_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/projects/import_service.rb b/app/services/projects/import_service.rb index e466ffa60eb..d7221fe993c 100644 --- a/app/services/projects/import_service.rb +++ b/app/services/projects/import_service.rb @@ -29,7 +29,7 @@ module Projects if unknown_url? # In this case, we only want to import issues, not a repository. create_repository - else + elsif !project.repository_exists? import_repository end end diff --git a/spec/services/projects/import_service_spec.rb b/spec/services/projects/import_service_spec.rb index ed1384798ab..ab6e8f537ba 100644 --- a/spec/services/projects/import_service_spec.rb +++ b/spec/services/projects/import_service_spec.rb @@ -110,7 +110,7 @@ describe Projects::ImportService, services: true do end it 'expires existence cache after error' do - allow_any_instance_of(Project).to receive(:repository_exists?).and_return(true) + allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true) expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original -- cgit v1.2.1 From 14fbd25d06248ee268434278e0fe31fec262d23b Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Wed, 19 Oct 2016 20:42:31 +0200 Subject: Modify GitHub importer to be retryable --- CHANGELOG.md | 1 + lib/gitlab/github_import/base_formatter.rb | 4 +- lib/gitlab/github_import/importer.rb | 115 ++++++++++++++++++--- lib/gitlab/github_import/issue_formatter.rb | 8 +- lib/gitlab/github_import/label_formatter.rb | 4 +- lib/gitlab/github_import/milestone_formatter.rb | 8 +- lib/gitlab/github_import/pull_request_formatter.rb | 8 +- lib/gitlab/github_import/release_formatter.rb | 8 +- spec/lib/gitlab/github_import/importer_spec.rb | 7 +- 9 files changed, 136 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56d9d4e2809..ba35d57afb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Please view this file on the master branch, on stable branches it's out of date. - Fix documents and comments on Build API `scope` - Fix applying labels for GitHub-imported MRs - Fix importing MR comments from GitHub + - Modify GitHub importer to be retryable - Refactor email, use setter method instead AR callbacks for email attribute (Semyon Pupkov) - Shortened merge request modal to let clipboard button not overlap diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/github_import/base_formatter.rb index 8cacf4f4925..d07bb43044b 100644 --- a/lib/gitlab/github_import/base_formatter.rb +++ b/lib/gitlab/github_import/base_formatter.rb @@ -10,7 +10,9 @@ module Gitlab end def create! - self.klass.create!(self.attributes) + project.send(project_association).find_or_create_by!(find_condition) do |record| + record.attributes = attributes + end end private diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index 27946dff608..7e13f731002 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -20,13 +20,13 @@ module Gitlab end def execute - import_labels - import_milestones - import_issues - import_pull_requests + import_labels unless imported?(:labels) + import_milestones unless imported?(:milestones) + import_issues unless imported?(:issues) + import_pull_requests unless imported?(:pull_requests) import_comments import_wiki - import_releases + import_releases unless imported?(:releases) handle_errors true @@ -48,7 +48,7 @@ module Gitlab end def import_labels - client.labels(repo, per_page: 100) do |labels| + client.labels(repo, page: current_page(:labels), per_page: 100) do |labels| labels.each do |raw| begin label = LabelFormatter.new(project, raw).create! @@ -57,11 +57,15 @@ module Gitlab errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end end + + increment_page(:labels) end + + imported!(:labels) end def import_milestones - client.milestones(repo, state: :all, per_page: 100) do |milestones| + client.milestones(repo, state: :all, page: current_page(:milestones), per_page: 100) do |milestones| milestones.each do |raw| begin MilestoneFormatter.new(project, raw).create! @@ -69,11 +73,15 @@ module Gitlab errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end end + + increment_page(:milestones) end + + imported!(:milestones) end def import_issues - client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| + client.issues(repo, state: :all, sort: :created, direction: :asc, page: current_page(:issues), per_page: 100) do |issues| issues.each do |raw| gh_issue = IssueFormatter.new(project, raw) @@ -86,11 +94,15 @@ module Gitlab end end end + + increment_page(:issues) end + + imported!(:issues) end def import_pull_requests - client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| + client.pull_requests(repo, state: :all, sort: :created, direction: :asc, page: current_page(:pull_requests), per_page: 100) do |pull_requests| pull_requests.each do |raw| pull_request = PullRequestFormatter.new(project, raw) next unless pull_request.valid? @@ -107,9 +119,13 @@ module Gitlab clean_up_restored_branches(pull_request) end end + + increment_page(:pull_requests) end project.repository.after_remove_branch + + imported!(:pull_requests) end def restore_source_branch(pull_request) @@ -149,13 +165,34 @@ module Gitlab end def import_comments - client.issues_comments(repo, per_page: 100) do |comments| + # We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note, + # compare it against every comment in the current imported page until we find match, and that's where start importing + last_note = Note.where(noteable_type: 'Issue').last + + client.issues_comments(repo, page: current_page(:issue_comments), per_page: 100) do |comments| + if last_note + discard_inserted_comments(comments, last_note) + last_note = nil + end + create_comments(comments) - end + increment_page(:issue_comments) + end unless imported?(:issue_comments) + + imported!(:issue_comments) + + last_note = Note.where(noteable_type: 'MergeRequest').last + client.pull_requests_comments(repo, page: current_page(:pull_request_comments), per_page: 100) do |comments| + if last_note + discard_inserted_comments(comments, last_note) + last_note = nil + end - client.pull_requests_comments(repo, per_page: 100) do |comments| create_comments(comments) - end + increment_page(:pull_request_comments) + end unless imported?(:pull_request_comments) + + imported!(:pull_request_comments) end def create_comments(comments) @@ -177,6 +214,24 @@ module Gitlab end end + def discard_inserted_comments(comments, last_note) + last_note_attrs = nil + + cut_off_index = comments.find_index do |raw| + comment = CommentFormatter.new(project, raw) + comment_attrs = comment.attributes + last_note_attrs ||= last_note.slice(*comment_attrs.keys) + + comment_attrs.with_indifferent_access == last_note_attrs + end + + # No matching resource in the collection, which means we got halted right on the end of the last page, so all good + return unless cut_off_index + + # Otherwise, remove the resouces we've already inserted + comments.shift(cut_off_index + 1) + end + def import_wiki unless project.wiki.repository_exists? wiki = WikiFormatter.new(project) @@ -192,7 +247,7 @@ module Gitlab end def import_releases - client.releases(repo, per_page: 100) do |releases| + client.releases(repo, page: current_page(:releases), per_page: 100) do |releases| releases.each do |raw| begin gh_release = ReleaseFormatter.new(project, raw) @@ -201,7 +256,39 @@ module Gitlab errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end end + + increment_page(:releases) end + + imported!(:releases) + end + + def imported?(resource_type) + Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported") + end + + def imported!(resource_type) + Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day) + end + + def increment_page(resource_type) + key = "#{cache_key_prefix}:#{resource_type}:current-page" + + # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is + # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around + page = Rails.cache.read(key) + page += 1 + Rails.cache.write(key, page) + + page + end + + def current_page(resource_type) + Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 } + end + + def cache_key_prefix + @cache_key_prefix ||= "github-import:#{project.id}" end end end diff --git a/lib/gitlab/github_import/issue_formatter.rb b/lib/gitlab/github_import/issue_formatter.rb index 77621de9f4c..8c32ac59fc5 100644 --- a/lib/gitlab/github_import/issue_formatter.rb +++ b/lib/gitlab/github_import/issue_formatter.rb @@ -20,8 +20,12 @@ module Gitlab raw_data.comments > 0 end - def klass - Issue + def project_association + :issues + end + + def find_condition + { iid: number } end def number diff --git a/lib/gitlab/github_import/label_formatter.rb b/lib/gitlab/github_import/label_formatter.rb index 942dfb3312b..002494739e9 100644 --- a/lib/gitlab/github_import/label_formatter.rb +++ b/lib/gitlab/github_import/label_formatter.rb @@ -9,8 +9,8 @@ module Gitlab } end - def klass - Label + def project_association + :labels end def create! diff --git a/lib/gitlab/github_import/milestone_formatter.rb b/lib/gitlab/github_import/milestone_formatter.rb index b2fa524cf5b..401dd962521 100644 --- a/lib/gitlab/github_import/milestone_formatter.rb +++ b/lib/gitlab/github_import/milestone_formatter.rb @@ -14,8 +14,12 @@ module Gitlab } end - def klass - Milestone + def project_association + :milestones + end + + def find_condition + { iid: raw_data.number } end private diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/github_import/pull_request_formatter.rb index 1408683100f..b9a227fb11a 100644 --- a/lib/gitlab/github_import/pull_request_formatter.rb +++ b/lib/gitlab/github_import/pull_request_formatter.rb @@ -24,8 +24,12 @@ module Gitlab } end - def klass - MergeRequest + def project_association + :merge_requests + end + + def find_condition + { iid: number } end def number diff --git a/lib/gitlab/github_import/release_formatter.rb b/lib/gitlab/github_import/release_formatter.rb index 73d643b00ad..1ad702a6058 100644 --- a/lib/gitlab/github_import/release_formatter.rb +++ b/lib/gitlab/github_import/release_formatter.rb @@ -11,8 +11,12 @@ module Gitlab } end - def klass - Release + def project_association + :releases + end + + def find_condition + { tag: raw_data.tag_name } end def valid? diff --git a/spec/lib/gitlab/github_import/importer_spec.rb b/spec/lib/gitlab/github_import/importer_spec.rb index 84f280ceaae..7478f86bd28 100644 --- a/spec/lib/gitlab/github_import/importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer_spec.rb @@ -2,6 +2,10 @@ require 'spec_helper' describe Gitlab::GithubImport::Importer, lib: true do describe '#execute' do + before do + allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new) + end + context 'when an error occurs' do let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) } let(:octocat) { double(id: 123456, login: 'octocat') } @@ -152,10 +156,9 @@ describe Gitlab::GithubImport::Importer, lib: true do message: 'The remote data could not be fully imported.', errors: [ { type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" }, - { type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" }, { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." }, - { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" }, + { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." }, { type: :wiki, errors: "Gitlab::Shell::Error" }, { type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" } ] -- cgit v1.2.1 From bc6302b942e1eb9e5d7148004c7e6d0d081e13ef Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Thu, 27 Oct 2016 14:54:51 +0200 Subject: Use public_send instead of send --- lib/gitlab/github_import/base_formatter.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/github_import/base_formatter.rb index d07bb43044b..6dbae64a9fe 100644 --- a/lib/gitlab/github_import/base_formatter.rb +++ b/lib/gitlab/github_import/base_formatter.rb @@ -10,7 +10,7 @@ module Gitlab end def create! - project.send(project_association).find_or_create_by!(find_condition) do |record| + project.public_send(project_association).find_or_create_by!(find_condition) do |record| record.attributes = attributes end end -- cgit v1.2.1 From 07275dd79ccfe57c495ccd64ff5632b4fb003b9b Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Thu, 27 Oct 2016 15:00:21 +0200 Subject: Abstract the use of imported[!?] and {current,increment}_page in GitHub importer --- lib/gitlab/github_import/importer.rb | 84 +++++++++++++++--------------------- 1 file changed, 34 insertions(+), 50 deletions(-) diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index 7e13f731002..6c4f5acccb4 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -20,13 +20,14 @@ module Gitlab end def execute - import_labels unless imported?(:labels) - import_milestones unless imported?(:milestones) - import_issues unless imported?(:issues) - import_pull_requests unless imported?(:pull_requests) - import_comments + import_labels + import_milestones + import_issues + import_pull_requests + import_comments(:issues) + import_comments(:pull_requests) import_wiki - import_releases unless imported?(:releases) + import_releases handle_errors true @@ -48,7 +49,7 @@ module Gitlab end def import_labels - client.labels(repo, page: current_page(:labels), per_page: 100) do |labels| + fetch_resources(:labels, repo, per_page: 100) do |labels| labels.each do |raw| begin label = LabelFormatter.new(project, raw).create! @@ -57,15 +58,11 @@ module Gitlab errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end end - - increment_page(:labels) end - - imported!(:labels) end def import_milestones - client.milestones(repo, state: :all, page: current_page(:milestones), per_page: 100) do |milestones| + fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones| milestones.each do |raw| begin MilestoneFormatter.new(project, raw).create! @@ -73,15 +70,11 @@ module Gitlab errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end end - - increment_page(:milestones) end - - imported!(:milestones) end def import_issues - client.issues(repo, state: :all, sort: :created, direction: :asc, page: current_page(:issues), per_page: 100) do |issues| + fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| issues.each do |raw| gh_issue = IssueFormatter.new(project, raw) @@ -94,15 +87,11 @@ module Gitlab end end end - - increment_page(:issues) end - - imported!(:issues) end def import_pull_requests - client.pull_requests(repo, state: :all, sort: :created, direction: :asc, page: current_page(:pull_requests), per_page: 100) do |pull_requests| + fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| pull_requests.each do |raw| pull_request = PullRequestFormatter.new(project, raw) next unless pull_request.valid? @@ -119,13 +108,9 @@ module Gitlab clean_up_restored_branches(pull_request) end end - - increment_page(:pull_requests) end project.repository.after_remove_branch - - imported!(:pull_requests) end def restore_source_branch(pull_request) @@ -164,35 +149,25 @@ module Gitlab end end - def import_comments - # We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note, - # compare it against every comment in the current imported page until we find match, and that's where start importing - last_note = Note.where(noteable_type: 'Issue').last + def import_comments(issuable_type) + resource_type = "#{issuable_type}_comments".to_sym - client.issues_comments(repo, page: current_page(:issue_comments), per_page: 100) do |comments| - if last_note - discard_inserted_comments(comments, last_note) - last_note = nil - end - - create_comments(comments) - increment_page(:issue_comments) - end unless imported?(:issue_comments) - - imported!(:issue_comments) + # Two notes here: + # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note, + # compare it against every comment in the current imported page until we find match, and that's where start importing + # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns + # only comments on diffs, so select last note not based on noteable_type but on line_code + line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL' + last_note = project.notes.where("line_code IS #{line_code_is}").last - last_note = Note.where(noteable_type: 'MergeRequest').last - client.pull_requests_comments(repo, page: current_page(:pull_request_comments), per_page: 100) do |comments| + fetch_resources(resource_type, repo, per_page: 100) do |comments| if last_note discard_inserted_comments(comments, last_note) last_note = nil end create_comments(comments) - increment_page(:pull_request_comments) - end unless imported?(:pull_request_comments) - - imported!(:pull_request_comments) + end end def create_comments(comments) @@ -247,7 +222,7 @@ module Gitlab end def import_releases - client.releases(repo, page: current_page(:releases), per_page: 100) do |releases| + fetch_resources(:releases, repo, per_page: 100) do |releases| releases.each do |raw| begin gh_release = ReleaseFormatter.new(project, raw) @@ -256,11 +231,20 @@ module Gitlab errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end end + end + end + + def fetch_resources(resource_type, *opts) + return if imported?(resource_type) + + opts.last.merge!(page: current_page(resource_type)) - increment_page(:releases) + client.public_send(resource_type, *opts) do |resources| + yield resources + increment_page(resource_type) end - imported!(:releases) + imported!(resource_type) end def imported?(resource_type) -- cgit v1.2.1 From f68d9261b0fa09c03ef8b92d8941d3b6577d9a2a Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Thu, 27 Oct 2016 15:00:31 +0200 Subject: Fix typos --- lib/gitlab/github_import/client.rb | 2 +- lib/gitlab/github_import/importer.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 348005d5659..85df6547a67 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -110,7 +110,7 @@ module Gitlab if block_given? yield data # api.last_response could change while we're yielding (e.g. fetching labels for each PR) - # so we cache our own last request + # so we cache our own last response each_response_page(last_response, &block) else each_response_page(last_response) { |page| data.concat(page) } diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index 6c4f5acccb4..ecc28799737 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -203,7 +203,7 @@ module Gitlab # No matching resource in the collection, which means we got halted right on the end of the last page, so all good return unless cut_off_index - # Otherwise, remove the resouces we've already inserted + # Otherwise, remove the resources we've already inserted comments.shift(cut_off_index + 1) end -- cgit v1.2.1