diff options
Diffstat (limited to 'lib/gitlab/github_import/importer.rb')
-rw-r--r-- | lib/gitlab/github_import/importer.rb | 329 |
1 files changed, 0 insertions, 329 deletions
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb deleted file mode 100644 index b8c07460ebb..00000000000 --- a/lib/gitlab/github_import/importer.rb +++ /dev/null @@ -1,329 +0,0 @@ -module Gitlab - module GithubImport - class Importer - include Gitlab::ShellAdapter - - attr_reader :errors, :project, :repo, :repo_url - - def initialize(project) - @project = project - @repo = project.import_source - @repo_url = project.import_url - @errors = [] - @labels = {} - end - - def client - return @client if defined?(@client) - unless credentials - raise Projects::ImportService::Error, - "Unable to find project import data credentials for project ID: #{@project.id}" - end - - opts = {} - # Gitea plan to be GitHub compliant - if project.gitea_import? - uri = URI.parse(project.import_url) - host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '') - opts = { - host: host, - api_version: 'v1' - } - end - - @client = Client.new(credentials[:user], opts) - end - - def execute - # The ordering of importing is important here due to the way GitHub structures their data - # 1. Labels are required by other items while not having a dependency on anything else - # so need to be first - # 2. Pull requests must come before issues. Every pull request is also an issue but not - # all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab - # doesn't structure data like this so we need to make sure that we've created the MRs - # before we attempt to add the labels defined in the GitHub issue for the related, already - # imported, pull request - import_labels - import_milestones - import_pull_requests - import_issues - import_comments(:issues) - import_comments(:pull_requests) - import_wiki - - # Gitea doesn't have a Release API yet - # See https://github.com/go-gitea/gitea/issues/330 - unless project.gitea_import? - import_releases - end - - handle_errors - - true - end - - private - - def credentials - return @credentials if defined?(@credentials) - - @credentials = project.import_data ? project.import_data.credentials : nil - end - - def handle_errors - return unless errors.any? - - project.update_column(:import_error, { - message: 'The remote data could not be fully imported.', - errors: errors - }.to_json) - end - - def import_labels - fetch_resources(:labels, repo, per_page: 100) do |labels| - labels.each do |raw| - begin - gh_label = LabelFormatter.new(project, raw) - gh_label.create! - rescue => e - errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message } - end - end - end - - cache_labels! - end - - def import_milestones - fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones| - milestones.each do |raw| - begin - gh_milestone = MilestoneFormatter.new(project, raw) - gh_milestone.create! - rescue => e - errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message } - end - end - end - end - - def import_issues - fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| - issues.each do |raw| - gh_issue = IssueFormatter.new(project, raw, client) - - begin - issuable = - if gh_issue.pull_request? - MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number) - else - gh_issue.create! - end - - apply_labels(issuable, raw) - rescue => e - errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message } - end - end - end - end - - def import_pull_requests - fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| - pull_requests.each do |raw| - gh_pull_request = PullRequestFormatter.new(project, raw, client) - - next unless gh_pull_request.valid? - - begin - restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists? - restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists? - - merge_request = gh_pull_request.create! - - # Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage - if project.gitea_import? - apply_labels(merge_request, raw) - end - rescue => e - errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message } - ensure - clean_up_restored_branches(gh_pull_request) - end - end - end - - project.repository.after_remove_branch - end - - def restore_source_branch(pull_request) - project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha) - end - - def restore_target_branch(pull_request) - project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha) - end - - def remove_branch(name) - project.repository.delete_branch(name) - rescue Gitlab::Git::Repository::DeleteBranchFailed - errors << { type: :remove_branch, name: name } - end - - def clean_up_restored_branches(pull_request) - return if pull_request.opened? - - remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists? - remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists? - end - - def apply_labels(issuable, raw) - return unless raw.labels.count > 0 - - label_ids = raw.labels - .map { |attrs| @labels[attrs.name] } - .compact - - issuable.update_attribute(:label_ids, label_ids) - end - - def import_comments(issuable_type) - resource_type = "#{issuable_type}_comments".to_sym - - # Two notes here: - # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note, - # compare it against every comment in the current imported page until we find match, and that's where start importing - # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns - # only comments on diffs, so select last note not based on noteable_type but on line_code - line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL' - last_note = project.notes.where("line_code IS #{line_code_is}").last - - fetch_resources(resource_type, repo, per_page: 100) do |comments| - if last_note - discard_inserted_comments(comments, last_note) - last_note = nil - end - - create_comments(comments) - end - end - - def create_comments(comments) - ActiveRecord::Base.no_touching do - comments.each do |raw| - begin - comment = CommentFormatter.new(project, raw, client) - - # GH does not return info about comment's parent, so we guess it by checking its URL! - *_, parent, iid = URI(raw.html_url).path.split('/') - - issuable = if parent == 'issues' - Issue.find_by(project_id: project.id, iid: iid) - else - MergeRequest.find_by(target_project_id: project.id, iid: iid) - end - - next unless issuable - - issuable.notes.create!(comment.attributes) - rescue => e - errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } - end - end - end - end - - def discard_inserted_comments(comments, last_note) - last_note_attrs = nil - - cut_off_index = comments.find_index do |raw| - comment = CommentFormatter.new(project, raw) - comment_attrs = comment.attributes - last_note_attrs ||= last_note.slice(*comment_attrs.keys) - - comment_attrs.with_indifferent_access == last_note_attrs - end - - # No matching resource in the collection, which means we got halted right on the end of the last page, so all good - return unless cut_off_index - - # Otherwise, remove the resources we've already inserted - comments.shift(cut_off_index + 1) - end - - def import_wiki - unless project.wiki.repository_exists? - wiki = WikiFormatter.new(project) - gitlab_shell.import_repository(project.repository_storage_path, wiki.disk_path, wiki.import_url) - end - rescue Gitlab::Shell::Error => e - # GitHub error message when the wiki repo has not been created, - # this means that repo has wiki enabled, but have no pages. So, - # we can skip the import. - if e.message !~ /repository not exported/ - errors << { type: :wiki, errors: e.message } - end - end - - def import_releases - fetch_resources(:releases, repo, per_page: 100) do |releases| - releases.each do |raw| - begin - gh_release = ReleaseFormatter.new(project, raw) - gh_release.create! if gh_release.valid? - rescue => e - errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message } - end - end - end - end - - def cache_labels! - project.labels.select(:id, :title).find_each do |label| - @labels[label.title] = label.id - end - end - - def fetch_resources(resource_type, *opts) - return if imported?(resource_type) - - opts.last[:page] = current_page(resource_type) - - client.public_send(resource_type, *opts) do |resources| # rubocop:disable GitlabSecurity/PublicSend - yield resources - increment_page(resource_type) - end - - imported!(resource_type) - end - - def imported?(resource_type) - Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported") - end - - def imported!(resource_type) - Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day) - end - - def increment_page(resource_type) - key = "#{cache_key_prefix}:#{resource_type}:current-page" - - # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is - # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around - page = Rails.cache.read(key) - page += 1 - Rails.cache.write(key, page) - - page - end - - def current_page(resource_type) - Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 } - end - - def cache_key_prefix - @cache_key_prefix ||= "github-import:#{project.id}" - end - end - end -end |