diff options
Diffstat (limited to 'lib')
79 files changed, 2486 insertions, 1071 deletions
diff --git a/lib/feature.rb b/lib/feature.rb index 4bd29aed687..ac3bc65c0d5 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -5,6 +5,10 @@ class Feature class FlipperFeature < Flipper::Adapters::ActiveRecord::Feature # Using `self.table_name` won't work. ActiveRecord bug? superclass.table_name = 'features' + + def self.feature_names + pluck(:key) + end end class FlipperGate < Flipper::Adapters::ActiveRecord::Gate @@ -22,11 +26,19 @@ class Feature flipper.feature(key) end + def persisted_names + if RequestStore.active? + RequestStore[:flipper_persisted_names] ||= FlipperFeature.feature_names + else + FlipperFeature.feature_names + end + end + def persisted?(feature) # Flipper creates on-memory features when asked for a not-yet-created one. # If we want to check if a feature has been actually set, we look for it # on the persisted features list. - all.map(&:name).include?(feature.name) + persisted_names.include?(feature.name) end def enabled?(key, thing = nil) diff --git a/lib/github/client.rb b/lib/github/client.rb deleted file mode 100644 index 29bd9c1f39e..00000000000 --- a/lib/github/client.rb +++ /dev/null @@ -1,54 +0,0 @@ -module Github - class Client - TIMEOUT = 60 - DEFAULT_PER_PAGE = 100 - - attr_reader :connection, :rate_limit - - def initialize(options) - @connection = Faraday.new(url: options.fetch(:url, root_endpoint)) do |faraday| - faraday.options.open_timeout = options.fetch(:timeout, TIMEOUT) - faraday.options.timeout = options.fetch(:timeout, TIMEOUT) - faraday.authorization 'token', options.fetch(:token) - faraday.adapter :net_http - faraday.ssl.verify = verify_ssl - end - - @rate_limit = RateLimit.new(connection) - end - - def get(url, query = {}) - exceed, reset_in = rate_limit.get - sleep reset_in if exceed - - Github::Response.new(connection.get(url, { per_page: DEFAULT_PER_PAGE }.merge(query))) - end - - private - - def root_endpoint - custom_endpoint || github_endpoint - end - - def custom_endpoint - github_omniauth_provider.dig('args', 'client_options', 'site') - end - - def verify_ssl - # If there is no config, we're connecting to github.com - # and we should verify ssl. - github_omniauth_provider.fetch('verify_ssl', true) - end - - def github_endpoint - OmniAuth::Strategies::GitHub.default_options[:client_options][:site] - end - - def github_omniauth_provider - @github_omniauth_provider ||= - Gitlab.config.omniauth.providers - .find { |provider| provider.name == 'github' } - .to_h - end - end -end diff --git a/lib/github/collection.rb b/lib/github/collection.rb deleted file mode 100644 index 014b2038c4b..00000000000 --- a/lib/github/collection.rb +++ /dev/null @@ -1,29 +0,0 @@ -module Github - class Collection - attr_reader :options - - def initialize(options) - @options = options - end - - def fetch(url, query = {}) - return [] if url.blank? - - Enumerator.new do |yielder| - loop do - response = client.get(url, query) - response.body.each { |item| yielder << item } - - raise StopIteration unless response.rels.key?(:next) - url = response.rels[:next] - end - end.lazy - end - - private - - def client - @client ||= Github::Client.new(options) - end - end -end diff --git a/lib/github/error.rb b/lib/github/error.rb deleted file mode 100644 index 66d7afaa787..00000000000 --- a/lib/github/error.rb +++ /dev/null @@ -1,3 +0,0 @@ -module Github - RepositoryFetchError = Class.new(StandardError) -end diff --git a/lib/github/import.rb b/lib/github/import.rb deleted file mode 100644 index fef63dd7168..00000000000 --- a/lib/github/import.rb +++ /dev/null @@ -1,377 +0,0 @@ -require_relative 'error' -require_relative 'import/issue' -require_relative 'import/legacy_diff_note' -require_relative 'import/merge_request' -require_relative 'import/note' - -module Github - class Import - include Gitlab::ShellAdapter - - attr_reader :project, :repository, :repo, :repo_url, :wiki_url, - :options, :errors, :cached, :verbose, :last_fetched_at - - def initialize(project, options = {}) - @project = project - @repository = project.repository - @repo = project.import_source - @repo_url = project.import_url - @wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git') - @options = options.reverse_merge(token: project.import_data&.credentials&.fetch(:user)) - @verbose = options.fetch(:verbose, false) - @cached = Hash.new { |hash, key| hash[key] = Hash.new } - @errors = [] - @last_fetched_at = nil - end - - # rubocop: disable Rails/Output - def execute - puts 'Fetching repository...'.color(:aqua) if verbose - setup_and_fetch_repository - puts 'Fetching labels...'.color(:aqua) if verbose - fetch_labels - puts 'Fetching milestones...'.color(:aqua) if verbose - fetch_milestones - puts 'Fetching pull requests...'.color(:aqua) if verbose - fetch_pull_requests - puts 'Fetching issues...'.color(:aqua) if verbose - fetch_issues - puts 'Fetching releases...'.color(:aqua) if verbose - fetch_releases - puts 'Cloning wiki repository...'.color(:aqua) if verbose - fetch_wiki_repository - puts 'Expiring repository cache...'.color(:aqua) if verbose - expire_repository_cache - - errors.empty? - rescue Github::RepositoryFetchError - expire_repository_cache - false - ensure - keep_track_of_errors - end - - private - - def setup_and_fetch_repository - begin - project.ensure_repository - project.repository.add_remote('github', repo_url) - project.repository.set_import_remote_as_mirror('github') - project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head') - fetch_remote(forced: true) - rescue Gitlab::Git::Repository::NoRepository, - Gitlab::Git::RepositoryMirroring::RemoteError, - Gitlab::Shell::Error => e - error(:project, repo_url, e.message) - raise Github::RepositoryFetchError - end - end - - def fetch_remote(forced: false) - @last_fetched_at = Time.now - project.repository.fetch_remote('github', forced: forced) - end - - def fetch_wiki_repository - return if project.wiki.repository_exists? - - wiki_path = project.wiki.disk_path - gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url) - rescue Gitlab::Shell::Error => e - # GitHub error message when the wiki repo has not been created, - # this means that repo has wiki enabled, but have no pages. So, - # we can skip the import. - if e.message !~ /repository not exported/ - error(:wiki, wiki_url, e.message) - end - end - - def fetch_labels - url = "/repos/#{repo}/labels" - - while url - response = Github::Client.new(options).get(url) - - response.body.each do |raw| - begin - representation = Github::Representation::Label.new(raw) - - label = project.labels.find_or_create_by!(title: representation.title) do |label| - label.color = representation.color - end - - cached[:label_ids][representation.title] = label.id - rescue => e - error(:label, representation.url, e.message) - end - end - - url = response.rels[:next] - end - end - - def fetch_milestones - url = "/repos/#{repo}/milestones" - - while url - response = Github::Client.new(options).get(url, state: :all) - - response.body.each do |raw| - begin - milestone = Github::Representation::Milestone.new(raw) - next if project.milestones.where(iid: milestone.iid).exists? - - project.milestones.create!( - iid: milestone.iid, - title: milestone.title, - description: milestone.description, - due_date: milestone.due_date, - state: milestone.state, - created_at: milestone.created_at, - updated_at: milestone.updated_at - ) - rescue => e - error(:milestone, milestone.url, e.message) - end - end - - url = response.rels[:next] - end - end - - def fetch_pull_requests - url = "/repos/#{repo}/pulls" - - while url - response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc) - - response.body.each do |raw| - pull_request = Github::Representation::PullRequest.new(raw, options.merge(project: project)) - merge_request = MergeRequest.find_or_initialize_by(iid: pull_request.iid, source_project_id: project.id) - next unless merge_request.new_record? && pull_request.valid? - - begin - # If the PR has been created/updated after we last fetched the - # remote, we fetch again to get the up-to-date refs. - fetch_remote if pull_request.updated_at > last_fetched_at - - author_id = user_id(pull_request.author, project.creator_id) - description = format_description(pull_request.description, pull_request.author) - - merge_request.attributes = { - iid: pull_request.iid, - title: pull_request.title, - description: description, - source_project: pull_request.source_project, - source_branch: pull_request.source_branch_name, - source_branch_sha: pull_request.source_branch_sha, - target_project: pull_request.target_project, - target_branch: pull_request.target_branch_name, - target_branch_sha: pull_request.target_branch_sha, - state: pull_request.state, - milestone_id: milestone_id(pull_request.milestone), - author_id: author_id, - assignee_id: user_id(pull_request.assignee), - created_at: pull_request.created_at, - updated_at: pull_request.updated_at - } - - merge_request.save!(validate: false) - merge_request.merge_request_diffs.create - - review_comments_url = "/repos/#{repo}/pulls/#{pull_request.iid}/comments" - fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote) - rescue => e - error(:pull_request, pull_request.url, e.message) - end - end - - url = response.rels[:next] - end - end - - def fetch_issues - url = "/repos/#{repo}/issues" - - while url - response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc) - - response.body.each { |raw| populate_issue(raw) } - - url = response.rels[:next] - end - end - - def populate_issue(raw) - representation = Github::Representation::Issue.new(raw, options) - - begin - # Every pull request is an issue, but not every issue - # is a pull request. For this reason, "shared" actions - # for both features, like manipulating assignees, labels - # and milestones, are provided within the Issues API. - if representation.pull_request? - return unless representation.labels? || representation.comments? - - merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid) - - if representation.labels? - merge_request.update_attribute(:label_ids, label_ids(representation.labels)) - end - - fetch_comments_conditionally(merge_request, representation) - else - return if Issue.exists?(iid: representation.iid, project_id: project.id) - - author_id = user_id(representation.author, project.creator_id) - issue = Issue.new - issue.iid = representation.iid - issue.project_id = project.id - issue.title = representation.title - issue.description = format_description(representation.description, representation.author) - issue.state = representation.state - issue.milestone_id = milestone_id(representation.milestone) - issue.author_id = author_id - issue.created_at = representation.created_at - issue.updated_at = representation.updated_at - issue.save!(validate: false) - - issue.update( - label_ids: label_ids(representation.labels), - assignee_ids: assignee_ids(representation.assignees)) - - fetch_comments_conditionally(issue, representation) - end - rescue => e - error(:issue, representation.url, e.message) - end - end - - def fetch_comments_conditionally(issuable, representation) - if representation.comments? - comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments" - fetch_comments(issuable, :comment, comments_url) - end - end - - def fetch_comments(noteable, type, url, klass = Note) - while url - comments = Github::Client.new(options).get(url) - - ActiveRecord::Base.no_touching do - comments.body.each do |raw| - begin - representation = Github::Representation::Comment.new(raw, options) - author_id = user_id(representation.author, project.creator_id) - - note = klass.new - note.project_id = project.id - note.noteable = noteable - note.note = format_description(representation.note, representation.author) - note.commit_id = representation.commit_id - note.line_code = representation.line_code - note.author_id = author_id - note.created_at = representation.created_at - note.updated_at = representation.updated_at - note.save!(validate: false) - rescue => e - error(type, representation.url, e.message) - end - end - end - - url = comments.rels[:next] - end - end - - def fetch_releases - url = "/repos/#{repo}/releases" - - while url - response = Github::Client.new(options).get(url) - - response.body.each do |raw| - representation = Github::Representation::Release.new(raw) - next unless representation.valid? - - release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag) - next unless release.new_record? - - begin - release.description = representation.description - release.created_at = representation.created_at - release.updated_at = representation.updated_at - release.save!(validate: false) - rescue => e - error(:release, representation.url, e.message) - end - end - - url = response.rels[:next] - end - end - - def label_ids(labels) - labels.map { |label| cached[:label_ids][label.title] }.compact - end - - def assignee_ids(assignees) - assignees.map { |assignee| user_id(assignee) }.compact - end - - def milestone_id(milestone) - return unless milestone.present? - - project.milestones.select(:id).find_by(iid: milestone.iid)&.id - end - - def user_id(user, fallback_id = nil) - return unless user.present? - return cached[:user_ids][user.id] if cached[:user_ids][user.id].present? - - gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email) - - cached[:gitlab_user_ids][user.id] = gitlab_user_id.present? - cached[:user_ids][user.id] = gitlab_user_id || fallback_id - end - - def user_id_by_email(email) - return nil unless email - - ::User.find_by_any_email(email)&.id - end - - def user_id_by_external_uid(id) - return nil unless id - - ::User.select(:id) - .joins(:identities) - .merge(::Identity.where(provider: :github, extern_uid: id)) - .first&.id - end - - def format_description(body, author) - return body if cached[:gitlab_user_ids][author.id] - - "*Created by: #{author.username}*\n\n#{body}" - end - - def expire_repository_cache - repository.expire_content_cache if project.repository_exists? - end - - def keep_track_of_errors - return unless errors.any? - - project.update_column(:import_error, { - message: 'The remote data could not be fully imported.', - errors: errors - }.to_json) - end - - def error(type, url, message) - errors << { type: type, url: Gitlab::UrlSanitizer.sanitize(url), error: message } - end - end -end diff --git a/lib/github/import/issue.rb b/lib/github/import/issue.rb deleted file mode 100644 index 171f0872666..00000000000 --- a/lib/github/import/issue.rb +++ /dev/null @@ -1,13 +0,0 @@ -module Github - class Import - class Issue < ::Issue - self.table_name = 'issues' - - self.reset_callbacks :save - self.reset_callbacks :create - self.reset_callbacks :commit - self.reset_callbacks :update - self.reset_callbacks :validate - end - end -end diff --git a/lib/github/import/legacy_diff_note.rb b/lib/github/import/legacy_diff_note.rb deleted file mode 100644 index 18adff560b6..00000000000 --- a/lib/github/import/legacy_diff_note.rb +++ /dev/null @@ -1,12 +0,0 @@ -module Github - class Import - class LegacyDiffNote < ::LegacyDiffNote - self.table_name = 'notes' - self.store_full_sti_class = false - - self.reset_callbacks :commit - self.reset_callbacks :update - self.reset_callbacks :validate - end - end -end diff --git a/lib/github/import/merge_request.rb b/lib/github/import/merge_request.rb deleted file mode 100644 index c258e5d5e0e..00000000000 --- a/lib/github/import/merge_request.rb +++ /dev/null @@ -1,13 +0,0 @@ -module Github - class Import - class MergeRequest < ::MergeRequest - self.table_name = 'merge_requests' - - self.reset_callbacks :create - self.reset_callbacks :save - self.reset_callbacks :commit - self.reset_callbacks :update - self.reset_callbacks :validate - end - end -end diff --git a/lib/github/import/note.rb b/lib/github/import/note.rb deleted file mode 100644 index 8cf4f30e6b7..00000000000 --- a/lib/github/import/note.rb +++ /dev/null @@ -1,13 +0,0 @@ -module Github - class Import - class Note < ::Note - self.table_name = 'notes' - self.store_full_sti_class = false - - self.reset_callbacks :save - self.reset_callbacks :commit - self.reset_callbacks :update - self.reset_callbacks :validate - end - end -end diff --git a/lib/github/rate_limit.rb b/lib/github/rate_limit.rb deleted file mode 100644 index 884693d093c..00000000000 --- a/lib/github/rate_limit.rb +++ /dev/null @@ -1,27 +0,0 @@ -module Github - class RateLimit - SAFE_REMAINING_REQUESTS = 100 - SAFE_RESET_TIME = 500 - RATE_LIMIT_URL = '/rate_limit'.freeze - - attr_reader :connection - - def initialize(connection) - @connection = connection - end - - def get - response = connection.get(RATE_LIMIT_URL) - - # GitHub Rate Limit API returns 404 when the rate limit is disabled - return false unless response.status != 404 - - body = Oj.load(response.body, class_cache: false, mode: :compat) - remaining = body.dig('rate', 'remaining').to_i - reset_in = body.dig('rate', 'reset').to_i - exceed = remaining <= SAFE_REMAINING_REQUESTS - - [exceed, reset_in] - end - end -end diff --git a/lib/github/repositories.rb b/lib/github/repositories.rb deleted file mode 100644 index c1c9448f305..00000000000 --- a/lib/github/repositories.rb +++ /dev/null @@ -1,19 +0,0 @@ -module Github - class Repositories - attr_reader :options - - def initialize(options) - @options = options - end - - def fetch - Collection.new(options).fetch(repos_url) - end - - private - - def repos_url - '/user/repos' - end - end -end diff --git a/lib/github/representation/base.rb b/lib/github/representation/base.rb deleted file mode 100644 index f26bdbdd546..00000000000 --- a/lib/github/representation/base.rb +++ /dev/null @@ -1,30 +0,0 @@ -module Github - module Representation - class Base - def initialize(raw, options = {}) - @raw = raw - @options = options - end - - def id - raw['id'] - end - - def url - raw['url'] - end - - def created_at - raw['created_at'] - end - - def updated_at - raw['updated_at'] - end - - private - - attr_reader :raw, :options - end - end -end diff --git a/lib/github/representation/branch.rb b/lib/github/representation/branch.rb deleted file mode 100644 index 0087a3d3c4f..00000000000 --- a/lib/github/representation/branch.rb +++ /dev/null @@ -1,55 +0,0 @@ -module Github - module Representation - class Branch < Representation::Base - attr_reader :repository - - def user - raw.dig('user', 'login') || 'unknown' - end - - def repo? - raw['repo'].present? - end - - def repo - return unless repo? - - @repo ||= Github::Representation::Repo.new(raw['repo']) - end - - def ref - raw['ref'] - end - - def sha - raw['sha'] - end - - def short_sha - Commit.truncate_sha(sha) - end - - def valid? - sha.present? && ref.present? - end - - def restore!(name) - repository.create_branch(name, sha) - rescue Gitlab::Git::Repository::InvalidRef => e - Rails.logger.error("#{self.class.name}: Could not restore branch #{name}: #{e}") - end - - def remove!(name) - repository.delete_branch(name) - rescue Gitlab::Git::Repository::DeleteBranchError => e - Rails.logger.error("#{self.class.name}: Could not remove branch #{name}: #{e}") - end - - private - - def repository - @repository ||= options.fetch(:repository) - end - end - end -end diff --git a/lib/github/representation/comment.rb b/lib/github/representation/comment.rb deleted file mode 100644 index 83bf0b5310d..00000000000 --- a/lib/github/representation/comment.rb +++ /dev/null @@ -1,42 +0,0 @@ -module Github - module Representation - class Comment < Representation::Base - def note - raw['body'] || '' - end - - def author - @author ||= Github::Representation::User.new(raw['user'], options) - end - - def commit_id - raw['commit_id'] - end - - def line_code - return unless on_diff? - - parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines) - generate_line_code(parsed_lines.to_a.last) - end - - private - - def generate_line_code(line) - Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos) - end - - def on_diff? - diff_hunk.present? - end - - def diff_hunk - raw['diff_hunk'] - end - - def file_path - raw['path'] - end - end - end -end diff --git a/lib/github/representation/issuable.rb b/lib/github/representation/issuable.rb deleted file mode 100644 index 768ba3b993c..00000000000 --- a/lib/github/representation/issuable.rb +++ /dev/null @@ -1,37 +0,0 @@ -module Github - module Representation - class Issuable < Representation::Base - def iid - raw['number'] - end - - def title - raw['title'] - end - - def description - raw['body'] || '' - end - - def milestone - return unless raw['milestone'].present? - - @milestone ||= Github::Representation::Milestone.new(raw['milestone']) - end - - def author - @author ||= Github::Representation::User.new(raw['user'], options) - end - - def labels? - raw['labels'].any? - end - - def labels - @labels ||= Array(raw['labels']).map do |label| - Github::Representation::Label.new(label, options) - end - end - end - end -end diff --git a/lib/github/representation/issue.rb b/lib/github/representation/issue.rb deleted file mode 100644 index 4f1a02cb90f..00000000000 --- a/lib/github/representation/issue.rb +++ /dev/null @@ -1,27 +0,0 @@ -module Github - module Representation - class Issue < Representation::Issuable - def state - raw['state'] == 'closed' ? 'closed' : 'opened' - end - - def comments? - raw['comments'] > 0 - end - - def pull_request? - raw['pull_request'].present? - end - - def assigned? - raw['assignees'].present? - end - - def assignees - @assignees ||= Array(raw['assignees']).map do |user| - Github::Representation::User.new(user, options) - end - end - end - end -end diff --git a/lib/github/representation/label.rb b/lib/github/representation/label.rb deleted file mode 100644 index 60aa51f9569..00000000000 --- a/lib/github/representation/label.rb +++ /dev/null @@ -1,13 +0,0 @@ -module Github - module Representation - class Label < Representation::Base - def color - "##{raw['color']}" - end - - def title - raw['name'] - end - end - end -end diff --git a/lib/github/representation/milestone.rb b/lib/github/representation/milestone.rb deleted file mode 100644 index 917e6394ad4..00000000000 --- a/lib/github/representation/milestone.rb +++ /dev/null @@ -1,25 +0,0 @@ -module Github - module Representation - class Milestone < Representation::Base - def iid - raw['number'] - end - - def title - raw['title'] - end - - def description - raw['description'] - end - - def due_date - raw['due_on'] - end - - def state - raw['state'] == 'closed' ? 'closed' : 'active' - end - end - end -end diff --git a/lib/github/representation/pull_request.rb b/lib/github/representation/pull_request.rb deleted file mode 100644 index 0171179bb0f..00000000000 --- a/lib/github/representation/pull_request.rb +++ /dev/null @@ -1,71 +0,0 @@ -module Github - module Representation - class PullRequest < Representation::Issuable - delegate :sha, to: :source_branch, prefix: true - delegate :sha, to: :target_branch, prefix: true - - def source_project - project - end - - def source_branch_name - # Mimic the "user:branch" displayed in the MR widget, - # i.e. "Request to merge rymai:add-external-mounts into master" - cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref - end - - def target_project - project - end - - def target_branch_name - target_branch.ref - end - - def state - return 'merged' if raw['state'] == 'closed' && raw['merged_at'].present? - return 'closed' if raw['state'] == 'closed' - - 'opened' - end - - def opened? - state == 'opened' - end - - def valid? - source_branch.valid? && target_branch.valid? - end - - def assigned? - raw['assignee'].present? - end - - def assignee - return unless assigned? - - @assignee ||= Github::Representation::User.new(raw['assignee'], options) - end - - private - - def project - @project ||= options.fetch(:project) - end - - def source_branch - @source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository) - end - - def target_branch - @target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository) - end - - def cross_project? - return true unless source_branch.repo? - - source_branch.repo.id != target_branch.repo.id - end - end - end -end diff --git a/lib/github/representation/release.rb b/lib/github/representation/release.rb deleted file mode 100644 index e7e4b428c1a..00000000000 --- a/lib/github/representation/release.rb +++ /dev/null @@ -1,17 +0,0 @@ -module Github - module Representation - class Release < Representation::Base - def description - raw['body'] - end - - def tag - raw['tag_name'] - end - - def valid? - !raw['draft'] - end - end - end -end diff --git a/lib/github/representation/repo.rb b/lib/github/representation/repo.rb deleted file mode 100644 index 6938aa7db05..00000000000 --- a/lib/github/representation/repo.rb +++ /dev/null @@ -1,6 +0,0 @@ -module Github - module Representation - class Repo < Representation::Base - end - end -end diff --git a/lib/github/representation/user.rb b/lib/github/representation/user.rb deleted file mode 100644 index 18591380e25..00000000000 --- a/lib/github/representation/user.rb +++ /dev/null @@ -1,15 +0,0 @@ -module Github - module Representation - class User < Representation::Base - def email - return @email if defined?(@email) - - @email = Github::User.new(username, options).get.fetch('email', nil) - end - - def username - raw['login'] - end - end - end -end diff --git a/lib/github/response.rb b/lib/github/response.rb deleted file mode 100644 index 761c524b553..00000000000 --- a/lib/github/response.rb +++ /dev/null @@ -1,25 +0,0 @@ -module Github - class Response - attr_reader :raw, :headers, :status - - def initialize(response) - @raw = response - @headers = response.headers - @status = response.status - end - - def body - Oj.load(raw.body, class_cache: false, mode: :compat) - end - - def rels - links = headers['Link'].to_s.split(', ').map do |link| - href, name = link.match(/<(.*?)>; rel="(\w+)"/).captures - - [name.to_sym, href] - end - - Hash[*links.flatten] - end - end -end diff --git a/lib/github/user.rb b/lib/github/user.rb deleted file mode 100644 index f88a29e590b..00000000000 --- a/lib/github/user.rb +++ /dev/null @@ -1,24 +0,0 @@ -module Github - class User - attr_reader :username, :options - - def initialize(username, options) - @username = username - @options = options - end - - def get - client.get(user_url).body - end - - private - - def client - @client ||= Github::Client.new(options) - end - - def user_url - "/users/#{username}" - end - end -end diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index 43a00d6cedb..cd7b4c043da 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -108,20 +108,41 @@ module Gitlab end end - def self.bulk_insert(table, rows) + # Bulk inserts a number of rows into a table, optionally returning their + # IDs. + # + # table - The name of the table to insert the rows into. + # rows - An Array of Hash instances, each mapping the columns to their + # values. + # return_ids - When set to true the return value will be an Array of IDs of + # the inserted rows, this only works on PostgreSQL. + def self.bulk_insert(table, rows, return_ids: false) return if rows.empty? keys = rows.first.keys columns = keys.map { |key| connection.quote_column_name(key) } + return_ids = false if mysql? tuples = rows.map do |row| row.values_at(*keys).map { |value| connection.quote(value) } end - connection.execute <<-EOF + sql = <<-EOF INSERT INTO #{table} (#{columns.join(', ')}) VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')} EOF + + if return_ids + sql << 'RETURNING id' + end + + result = connection.execute(sql) + + if return_ids + result.values.map { |tuple| tuple[0].to_i } + else + [] + end end def self.sanitize_timestamp(timestamp) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index df4ad586e12..d236e1b03e6 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -920,6 +920,11 @@ module Gitlab false end + # Returns true if a remote exists. + def remote_exists?(name) + rugged.remotes[name].present? + end + # Update the specified remote using the values in the +options+ hash # # Example diff --git a/lib/gitlab/github_import.rb b/lib/gitlab/github_import.rb new file mode 100644 index 00000000000..d2ae4c1255e --- /dev/null +++ b/lib/gitlab/github_import.rb @@ -0,0 +1,34 @@ +module Gitlab + module GithubImport + def self.new_client_for(project, token: nil, parallel: true) + token_to_use = token || project.import_data&.credentials&.fetch(:user) + + Client.new(token_to_use, parallel: parallel) + end + + # Inserts a raw row and returns the ID of the inserted row. + # + # attributes - The attributes/columns to set. + # relation - An ActiveRecord::Relation to use for finding the ID of the row + # when using MySQL. + def self.insert_and_return_id(attributes, relation) + # We use bulk_insert here so we can bypass any queries executed by + # callbacks or validation rules, as doing this wouldn't scale when + # importing very large projects. + result = Gitlab::Database + .bulk_insert(relation.table_name, [attributes], return_ids: true) + + # MySQL doesn't support returning the IDs of a bulk insert in a way that + # is not a pain, so in this case we'll issue an extra query instead. + result.first || + relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first + end + + # Returns the ID of the ghost user. + def self.ghost_user_id + key = 'github-import/ghost-user-id' + + Caching.read_integer(key) || Caching.write(key, User.select(:id).ghost.id) + end + end +end diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb new file mode 100644 index 00000000000..147597289cf --- /dev/null +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module BulkImporting + # Builds and returns an Array of objects to bulk insert into the + # database. + # + # enum - An Enumerable that returns the objects to turn into database + # rows. + def build_database_rows(enum) + enum.each_with_object([]) do |(object, _), rows| + rows << build(object) unless already_imported?(object) + end + end + + # Bulk inserts the given rows into the database. + def bulk_insert(model, rows, batch_size: 100) + rows.each_slice(batch_size) do |slice| + Gitlab::Database.bulk_insert(model.table_name, slice) + end + end + end + end +end diff --git a/lib/gitlab/github_import/caching.rb b/lib/gitlab/github_import/caching.rb new file mode 100644 index 00000000000..b08f133794f --- /dev/null +++ b/lib/gitlab/github_import/caching.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Caching + # The default timeout of the cache keys. + TIMEOUT = 24.hours.to_i + + WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze + local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2] + local existing = tonumber(redis.call("get", key)) + + if existing == nil or value > existing then + redis.call("set", key, value) + redis.call("expire", key, ttl) + return true + else + return false + end + EOF + + # Reads a cache key. + # + # If the key exists and has a non-empty value its TTL is refreshed + # automatically. + # + # raw_key - The cache key to read. + # timeout - The new timeout of the key if the key is to be refreshed. + def self.read(raw_key, timeout: TIMEOUT) + key = cache_key_for(raw_key) + value = Redis::Cache.with { |redis| redis.get(key) } + + if value.present? + # We refresh the expiration time so frequently used keys stick + # around, removing the need for querying the database as much as + # possible. + # + # A key may be empty when we looked up a GitHub user (for example) but + # did not find a matching GitLab user. In that case we _don't_ want to + # refresh the TTL so we automatically pick up the right data when said + # user were to register themselves on the GitLab instance. + Redis::Cache.with { |redis| redis.expire(key, timeout) } + end + + value + end + + # Reads an integer from the cache, or returns nil if no value was found. + # + # See Caching.read for more information. + def self.read_integer(raw_key, timeout: TIMEOUT) + value = read(raw_key, timeout: timeout) + + value.to_i if value.present? + end + + # Sets a cache key to the given value. + # + # key - The cache key to write. + # value - The value to set. + # timeout - The time after which the cache key should expire. + def self.write(raw_key, value, timeout: TIMEOUT) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.set(key, value, ex: timeout) + end + + value + end + + # Adds a value to a set. + # + # raw_key - The key of the set to add the value to. + # value - The value to add to the set. + # timeout - The new timeout of the key. + def self.set_add(raw_key, value, timeout: TIMEOUT) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.multi do |m| + m.sadd(key, value) + m.expire(key, timeout) + end + end + end + + # Returns true if the given value is present in the set. + # + # raw_key - The key of the set to check. + # value - The value to check for. + def self.set_includes?(raw_key, value) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.sismember(key, value) + end + end + + # Sets multiple keys to a given value. + # + # mapping - A Hash mapping the cache keys to their values. + # timeout - The time after which the cache key should expire. + def self.write_multiple(mapping, timeout: TIMEOUT) + Redis::Cache.with do |redis| + redis.multi do |multi| + mapping.each do |raw_key, value| + multi.set(cache_key_for(raw_key), value, ex: timeout) + end + end + end + end + + # Sets the expiration time of a key. + # + # raw_key - The key for which to change the timeout. + # timeout - The new timeout. + def self.expire(raw_key, timeout) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.expire(key, timeout) + end + end + + # Sets a key to the given integer but only if the existing value is + # smaller than the given value. + # + # This method uses a Lua script to ensure the read and write are atomic. + # + # raw_key - The key to set. + # value - The new value for the key. + # timeout - The key timeout in seconds. + # + # Returns true when the key was overwritten, false otherwise. + def self.write_if_greater(raw_key, value, timeout: TIMEOUT) + key = cache_key_for(raw_key) + val = Redis::Cache.with do |redis| + redis + .eval(WRITE_IF_GREATER_SCRIPT, keys: [key], argv: [value, timeout]) + end + + val ? true : false + end + + def self.cache_key_for(raw_key) + "#{Redis::Cache::CACHE_NAMESPACE}:#{raw_key}" + end + end + end +end diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 0550f9695bd..844530b1ea7 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -1,147 +1,184 @@ +# frozen_string_literal: true + module Gitlab module GithubImport + # HTTP client for interacting with the GitHub API. + # + # This class is basically a fancy wrapped around Octokit while adding some + # functionality to deal with rate limiting and parallel imports. Usage is + # mostly the same as Octokit, for example: + # + # client = GithubImport::Client.new('hunter2') + # + # client.labels.each do |label| + # puts label.name + # end class Client - GITHUB_SAFE_REMAINING_REQUESTS = 100 - GITHUB_SAFE_SLEEP_TIME = 500 + attr_reader :octokit - attr_reader :access_token, :host, :api_version + # A single page of data and the corresponding page number. + Page = Struct.new(:objects, :number) - def initialize(access_token, host: nil, api_version: 'v3') - @access_token = access_token - @host = host.to_s.sub(%r{/+\z}, '') - @api_version = api_version - @users = {} + # The minimum number of requests we want to keep available. + # + # We don't use a value of 0 as multiple threads may be using the same + # token in parallel. This could result in all of them hitting the GitHub + # rate limit at once. The threshold is put in place to not hit the limit + # in most cases. + RATE_LIMIT_THRESHOLD = 50 - if access_token - ::Octokit.auto_paginate = false - end + # token - The GitHub API token to use. + # + # per_page - The number of objects that should be displayed per page. + # + # parallel - When set to true hitting the rate limit will result in a + # dedicated error being raised. When set to `false` we will + # instead just `sleep()` until the rate limit is reset. Setting + # this value to `true` for parallel importing is crucial as + # otherwise hitting the rate limit will result in a thread + # being blocked in a `sleep()` call for up to an hour. + def initialize(token, per_page: 100, parallel: true) + @octokit = Octokit::Client.new(access_token: token, per_page: per_page) + @parallel = parallel end - def api - @api ||= ::Octokit::Client.new( - access_token: access_token, - api_endpoint: api_endpoint, - # If there is no config, we're connecting to github.com and we - # should verify ssl. - connection_options: { - ssl: { verify: config ? config['verify_ssl'] : true } - } - ) + def parallel? + @parallel end - def client - unless config - raise Projects::ImportService::Error, - 'OAuth configuration for GitHub missing.' - end - - @client ||= ::OAuth2::Client.new( - config.app_id, - config.app_secret, - github_options.merge(ssl: { verify: config['verify_ssl'] }) - ) + # Returns the details of a GitHub user. + # + # username - The username of the user. + def user(username) + with_rate_limit { octokit.user(username) } end - def authorize_url(redirect_uri) - client.auth_code.authorize_url({ - redirect_uri: redirect_uri, - scope: "repo, user, user:email" - }) + # Returns the details of a GitHub repository. + # + # name - The path (in the form `owner/repository`) of the repository. + def repository(name) + with_rate_limit { octokit.repo(name) } end - def get_token(code) - client.auth_code.get_token(code).token + def labels(*args) + each_object(:labels, *args) end - def method_missing(method, *args, &block) - if api.respond_to?(method) - request(method, *args, &block) - else - super(method, *args, &block) - end + def milestones(*args) + each_object(:milestones, *args) end - def respond_to?(method) - api.respond_to?(method) || super + def releases(*args) + each_object(:releases, *args) end - def user(login) - return nil unless login.present? - return @users[login] if @users.key?(login) + # Fetches data from the GitHub API and yields a Page object for every page + # of data, without loading all of them into memory. + # + # method - The Octokit method to use for getting the data. + # args - Arguments to pass to the Octokit method. + # + # rubocop: disable GitlabSecurity/PublicSend + def each_page(method, *args, &block) + return to_enum(__method__, method, *args) unless block_given? - @users[login] = api.user(login) - end + page = + if args.last.is_a?(Hash) && args.last[:page] + args.last[:page] + else + 1 + end - private + collection = with_rate_limit { octokit.public_send(method, *args) } + next_url = octokit.last_response.rels[:next] - def api_endpoint - if host.present? && api_version.present? - "#{host}/api/#{api_version}" - else - github_options[:site] + yield Page.new(collection, page) + + while next_url + response = with_rate_limit { next_url.get } + next_url = response.rels[:next] + + yield Page.new(response.data, page += 1) end end - def config - Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" } - end + # Iterates over all of the objects for the given method (e.g. `:labels`). + # + # method - The method to send to Octokit for querying data. + # args - Any arguments to pass to the Octokit method. + def each_object(method, *args, &block) + return to_enum(__method__, method, *args) unless block_given? - def github_options - if config - config["args"]["client_options"].deep_symbolize_keys - else - OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys + each_page(method, *args) do |page| + page.objects.each do |object| + yield object + end end end - def rate_limit - api.rate_limit! - # GitHub Rate Limit API returns 404 when the rate limit is - # disabled. In this case we just want to return gracefully - # instead of spitting out an error. - rescue Octokit::NotFound - nil - end + # Yields the supplied block, responding to any rate limit errors. + # + # The exact strategy used for handling rate limiting errors depends on + # whether we are running in parallel mode or not. For more information see + # `#rate_or_wait_for_rate_limit`. + def with_rate_limit + request_count_counter.increment - def has_rate_limit? - return @has_rate_limit if defined?(@has_rate_limit) + raise_or_wait_for_rate_limit unless requests_remaining? - @has_rate_limit = rate_limit.present? - end + begin + yield + rescue Octokit::TooManyRequests + raise_or_wait_for_rate_limit - def rate_limit_exceed? - has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS + # This retry will only happen when running in sequential mode as we'll + # raise an error in parallel mode. + retry + end end - def rate_limit_sleep_time - rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME + # Returns `true` if we're still allowed to perform API calls. + def requests_remaining? + remaining_requests > RATE_LIMIT_THRESHOLD end - def request(method, *args, &block) - sleep rate_limit_sleep_time if rate_limit_exceed? - - data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend - return data unless data.is_a?(Array) + def remaining_requests + octokit.rate_limit.remaining + end - last_response = api.last_response + def raise_or_wait_for_rate_limit + rate_limit_counter.increment - if block_given? - yield data - # api.last_response could change while we're yielding (e.g. fetching labels for each PR) - # so we cache our own last response - each_response_page(last_response, &block) + if parallel? + raise RateLimitError else - each_response_page(last_response) { |page| data.concat(page) } - data + sleep(rate_limit_resets_in) end end - def each_response_page(last_response) - while last_response.rels[:next] - sleep rate_limit_sleep_time if rate_limit_exceed? - last_response = last_response.rels[:next].get - yield last_response.data if last_response.data.is_a?(Array) - end + def rate_limit_resets_in + # We add a few seconds to the rate limit so we don't _immediately_ + # resume when the rate limit resets as this may result in us performing + # a request before GitHub has a chance to reset the limit. + octokit.rate_limit.resets_in + 5 + end + + def respond_to_missing?(method, include_private = false) + octokit.respond_to?(method, include_private) + end + + def rate_limit_counter + @rate_limit_counter ||= Gitlab::Metrics.counter( + :github_importer_rate_limit_hits, + 'The number of times we hit the GitHub rate limit when importing projects' + ) + end + + def request_count_counter + @request_counter ||= Gitlab::Metrics.counter( + :github_importer_request_count, + 'The number of GitHub API calls performed when importing projects' + ) end end end diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb new file mode 100644 index 00000000000..8274f37d358 --- /dev/null +++ b/lib/gitlab/github_import/importer/diff_note_importer.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class DiffNoteImporter + attr_reader :note, :project, :client, :user_finder + + # note - An instance of `Gitlab::GithubImport::Representation::DiffNote`. + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(note, project, client) + @note = note + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + end + + def execute + return unless (mr_id = find_merge_request_id) + + author_id, author_found = user_finder.author_id_for(note) + + note_body = + MarkdownText.format(note.note, note.author, author_found) + + attributes = { + noteable_type: 'MergeRequest', + noteable_id: mr_id, + project_id: project.id, + author_id: author_id, + note: note_body, + system: false, + commit_id: note.commit_id, + line_code: note.line_code, + type: 'LegacyDiffNote', + created_at: note.created_at, + updated_at: note.updated_at, + st_diff: note.diff_hash.to_yaml + } + + # It's possible that during an import we'll insert tens of thousands + # of diff notes. If we were to use the Note/LegacyDiffNote model here + # we'd also have to run additional queries for both validations and + # callbacks, putting a lot of pressure on the database. + # + # To work around this we're using bulk_insert with a single row. This + # allows us to efficiently insert data (even if it's just 1 row) + # without having to use all sorts of hacks to disable callbacks. + Gitlab::Database.bulk_insert(LegacyDiffNote.table_name, [attributes]) + rescue ActiveRecord::InvalidForeignKey + # It's possible the project and the issue have been deleted since + # scheduling this job. In this case we'll just skip creating the note. + end + + # Returns the ID of the merge request this note belongs to. + def find_merge_request_id + GithubImport::IssuableFinder.new(project, note).database_id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/diff_notes_importer.rb b/lib/gitlab/github_import/importer/diff_notes_importer.rb new file mode 100644 index 00000000000..966f12c5c2f --- /dev/null +++ b/lib/gitlab/github_import/importer/diff_notes_importer.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class DiffNotesImporter + include ParallelScheduling + + def representation_class + Representation::DiffNote + end + + def importer_class + DiffNoteImporter + end + + def sidekiq_worker_class + ImportDiffNoteWorker + end + + def collection_method + :pull_requests_comments + end + + def id_for_already_imported_cache(note) + note.id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb b/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb new file mode 100644 index 00000000000..bad064b76c8 --- /dev/null +++ b/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class IssueAndLabelLinksImporter + attr_reader :issue, :project, :client + + # issue - An instance of `Gitlab::GithubImport::Representation::Issue`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(issue, project, client) + @issue = issue + @project = project + @client = client + end + + def execute + IssueImporter.import_if_issue(issue, project, client) + LabelLinksImporter.new(issue, project, client).execute + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb new file mode 100644 index 00000000000..31fefebf787 --- /dev/null +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class IssueImporter + attr_reader :project, :issue, :client, :user_finder, :milestone_finder, + :issuable_finder + + # Imports an issue if it's a regular issue and not a pull request. + def self.import_if_issue(issue, project, client) + new(issue, project, client).execute unless issue.pull_request? + end + + # issue - An instance of `Gitlab::GithubImport::Representation::Issue`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(issue, project, client) + @issue = issue + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + @milestone_finder = MilestoneFinder.new(project) + @issuable_finder = GithubImport::IssuableFinder.new(project, issue) + end + + def execute + Issue.transaction do + if (issue_id = create_issue) + create_assignees(issue_id) + issuable_finder.cache_database_id(issue_id) + end + end + end + + # Creates a new GitLab issue for the current GitHub issue. + # + # Returns the ID of the created issue as an Integer. If the issue + # couldn't be created this method will return `nil` instead. + def create_issue + author_id, author_found = user_finder.author_id_for(issue) + + description = + MarkdownText.format(issue.description, issue.author, author_found) + + attributes = { + iid: issue.iid, + title: issue.truncated_title, + author_id: author_id, + project_id: project.id, + description: description, + milestone_id: milestone_finder.id_for(issue), + state: issue.state, + created_at: issue.created_at, + updated_at: issue.updated_at + } + + GithubImport.insert_and_return_id(attributes, project.issues) + rescue ActiveRecord::InvalidForeignKey + # It's possible the project has been deleted since scheduling this + # job. In this case we'll just skip creating the issue. + end + + # Stores all issue assignees in the database. + # + # issue_id - The ID of the created issue. + def create_assignees(issue_id) + assignees = [] + + issue.assignees.each do |assignee| + if (user_id = user_finder.user_id_for(assignee)) + assignees << { issue_id: issue_id, user_id: user_id } + end + end + + Gitlab::Database.bulk_insert(IssueAssignee.table_name, assignees) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issues_importer.rb b/lib/gitlab/github_import/importer/issues_importer.rb new file mode 100644 index 00000000000..ac6d0666b3a --- /dev/null +++ b/lib/gitlab/github_import/importer/issues_importer.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class IssuesImporter + include ParallelScheduling + + def importer_class + IssueAndLabelLinksImporter + end + + def representation_class + Representation::Issue + end + + def sidekiq_worker_class + ImportIssueWorker + end + + def collection_method + :issues + end + + def id_for_already_imported_cache(issue) + issue.number + end + + def collection_options + { state: 'all', sort: 'created', direction: 'asc' } + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb new file mode 100644 index 00000000000..2001b7e3482 --- /dev/null +++ b/lib/gitlab/github_import/importer/label_links_importer.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class LabelLinksImporter + attr_reader :issue, :project, :client, :label_finder + + # issue - An instance of `Gitlab::GithubImport::Representation::Issue` + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(issue, project, client) + @issue = issue + @project = project + @client = client + @label_finder = LabelFinder.new(project) + end + + def execute + create_labels + end + + def create_labels + time = Time.zone.now + rows = [] + target_id = find_target_id + + issue.label_names.each do |label_name| + # Although unlikely it's technically possible for an issue to be + # given a label that was created and assigned after we imported all + # the project's labels. + next unless (label_id = label_finder.id_for(label_name)) + + rows << { + label_id: label_id, + target_id: target_id, + target_type: issue.issuable_type, + created_at: time, + updated_at: time + } + end + + Gitlab::Database.bulk_insert(LabelLink.table_name, rows) + end + + def find_target_id + GithubImport::IssuableFinder.new(project, issue).database_id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/labels_importer.rb b/lib/gitlab/github_import/importer/labels_importer.rb new file mode 100644 index 00000000000..a73033d35ba --- /dev/null +++ b/lib/gitlab/github_import/importer/labels_importer.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class LabelsImporter + include BulkImporting + + attr_reader :project, :client, :existing_labels + + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(project, client) + @project = project + @client = client + @existing_labels = project.labels.pluck(:title).to_set + end + + def execute + bulk_insert(Label, build_labels) + build_labels_cache + end + + def build_labels + build_database_rows(each_label) + end + + def already_imported?(label) + existing_labels.include?(label.name) + end + + def build_labels_cache + LabelFinder.new(project).build_cache + end + + def build(label) + time = Time.zone.now + + { + title: label.name, + color: '#' + label.color, + project_id: project.id, + type: 'ProjectLabel', + created_at: time, + updated_at: time + } + end + + def each_label + client.labels(project.import_source) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb new file mode 100644 index 00000000000..c53480e828a --- /dev/null +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class MilestonesImporter + include BulkImporting + + attr_reader :project, :client, :existing_milestones + + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(project, client) + @project = project + @client = client + @existing_milestones = project.milestones.pluck(:iid).to_set + end + + def execute + bulk_insert(Milestone, build_milestones) + build_milestones_cache + end + + def build_milestones + build_database_rows(each_milestone) + end + + def already_imported?(milestone) + existing_milestones.include?(milestone.number) + end + + def build_milestones_cache + MilestoneFinder.new(project).build_cache + end + + def build(milestone) + { + iid: milestone.number, + title: milestone.title, + description: milestone.description, + project_id: project.id, + state: state_for(milestone), + created_at: milestone.created_at, + updated_at: milestone.updated_at + } + end + + def state_for(milestone) + milestone.state == 'open' ? :active : :closed + end + + def each_milestone + client.milestones(project.import_source, state: 'all') + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb new file mode 100644 index 00000000000..c890f2df360 --- /dev/null +++ b/lib/gitlab/github_import/importer/note_importer.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class NoteImporter + attr_reader :note, :project, :client, :user_finder + + # note - An instance of `Gitlab::GithubImport::Representation::Note`. + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(note, project, client) + @note = note + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + end + + def execute + return unless (noteable_id = find_noteable_id) + + author_id, author_found = user_finder.author_id_for(note) + + note_body = + MarkdownText.format(note.note, note.author, author_found) + + attributes = { + noteable_type: note.noteable_type, + noteable_id: noteable_id, + project_id: project.id, + author_id: author_id, + note: note_body, + system: false, + created_at: note.created_at, + updated_at: note.updated_at + } + + # We're using bulk_insert here so we can bypass any validations and + # callbacks. Running these would result in a lot of unnecessary SQL + # queries being executed when importing large projects. + Gitlab::Database.bulk_insert(Note.table_name, [attributes]) + rescue ActiveRecord::InvalidForeignKey + # It's possible the project and the issue have been deleted since + # scheduling this job. In this case we'll just skip creating the note. + end + + # Returns the ID of the issue or merge request to create the note for. + def find_noteable_id + GithubImport::IssuableFinder.new(project, note).database_id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/notes_importer.rb b/lib/gitlab/github_import/importer/notes_importer.rb new file mode 100644 index 00000000000..5aec760ea5f --- /dev/null +++ b/lib/gitlab/github_import/importer/notes_importer.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class NotesImporter + include ParallelScheduling + + def importer_class + NoteImporter + end + + def representation_class + Representation::Note + end + + def sidekiq_worker_class + ImportNoteWorker + end + + def collection_method + :issues_comments + end + + def id_for_already_imported_cache(note) + note.id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb new file mode 100644 index 00000000000..49d859f9624 --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_request_importer.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class PullRequestImporter + attr_reader :pull_request, :project, :client, :user_finder, + :milestone_finder, :issuable_finder + + # pull_request - An instance of + # `Gitlab::GithubImport::Representation::PullRequest`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(pull_request, project, client) + @pull_request = pull_request + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + @milestone_finder = MilestoneFinder.new(project) + @issuable_finder = + GithubImport::IssuableFinder.new(project, pull_request) + end + + def execute + if (mr_id = create_merge_request) + issuable_finder.cache_database_id(mr_id) + end + end + + # Creates the merge request and returns its ID. + # + # This method will return `nil` if the merge request could not be + # created. + def create_merge_request + author_id, author_found = user_finder.author_id_for(pull_request) + + description = MarkdownText + .format(pull_request.description, pull_request.author, author_found) + + # This work must be wrapped in a transaction as otherwise we can leave + # behind incomplete data in the event of an error. This can then lead + # to duplicate key errors when jobs are retried. + MergeRequest.transaction do + attributes = { + iid: pull_request.iid, + title: pull_request.truncated_title, + description: description, + source_project_id: project.id, + target_project_id: project.id, + source_branch: pull_request.formatted_source_branch, + target_branch: pull_request.target_branch, + state: pull_request.state, + milestone_id: milestone_finder.id_for(pull_request), + author_id: author_id, + assignee_id: user_finder.assignee_id_for(pull_request), + created_at: pull_request.created_at, + updated_at: pull_request.updated_at + } + + # When creating merge requests there are a lot of hooks that may + # run, for many different reasons. Many of these hooks (e.g. the + # ones used for rendering Markdown) are completely unnecessary and + # may even lead to transaction timeouts. + # + # To ensure importing pull requests has a minimal impact and can + # complete in a reasonable time we bypass all the hooks by inserting + # the row and then retrieving it. We then only perform the + # additional work that is strictly necessary. + merge_request_id = GithubImport + .insert_and_return_id(attributes, project.merge_requests) + + merge_request = project.merge_requests.find(merge_request_id) + + # These fields are set so we can create the correct merge request + # diffs. + merge_request.source_branch_sha = pull_request.source_branch_sha + merge_request.target_branch_sha = pull_request.target_branch_sha + + merge_request.keep_around_commit + merge_request.merge_request_diffs.create + + merge_request.id + end + rescue ActiveRecord::InvalidForeignKey + # It's possible the project has been deleted since scheduling this + # job. In this case we'll just skip creating the merge request. + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests_importer.rb new file mode 100644 index 00000000000..5437e32e9f1 --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_requests_importer.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class PullRequestsImporter + include ParallelScheduling + + def importer_class + PullRequestImporter + end + + def representation_class + Representation::PullRequest + end + + def sidekiq_worker_class + ImportPullRequestWorker + end + + def id_for_already_imported_cache(pr) + pr.number + end + + def each_object_to_import + super do |pr| + update_repository if update_repository?(pr) + yield pr + end + end + + def update_repository + # We set this column _before_ fetching the repository, and this is + # deliberate. If we were to update this column after the fetch we may + # miss out on changes pushed during the fetch or between the fetch and + # updating the timestamp. + project.update_column(:last_repository_updated_at, Time.zone.now) + + project.repository.fetch_remote('github', forced: false) + + pname = project.path_with_namespace + + Rails.logger + .info("GitHub importer finished updating repository for #{pname}") + + repository_updates_counter.increment(project: pname) + end + + def update_repository?(pr) + last_update = project.last_repository_updated_at || project.created_at + + return false if pr.updated_at < last_update + + # PRs may be updated without there actually being new commits, thus we + # check to make sure we only re-fetch if truly necessary. + !(commit_exists?(pr.head.sha) && commit_exists?(pr.base.sha)) + end + + def commit_exists?(sha) + project.repository.lookup(sha) + true + rescue Rugged::Error + false + end + + def collection_method + :pull_requests + end + + def collection_options + { state: 'all', sort: 'created', direction: 'asc' } + end + + def repository_updates_counter + @repository_updates_counter ||= Gitlab::Metrics.counter( + :github_importer_repository_updates, + 'The number of times repositories have to be updated again' + ) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb new file mode 100644 index 00000000000..100f459fdcc --- /dev/null +++ b/lib/gitlab/github_import/importer/releases_importer.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ReleasesImporter + include BulkImporting + + attr_reader :project, :client, :existing_tags + + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(project, client) + @project = project + @client = client + @existing_tags = project.releases.pluck(:tag).to_set + end + + def execute + bulk_insert(Release, build_releases) + end + + def build_releases + build_database_rows(each_release) + end + + def already_imported?(release) + existing_tags.include?(release.tag_name) + end + + def build(release) + { + tag: release.tag_name, + description: description_for(release), + created_at: release.created_at, + updated_at: release.updated_at, + project_id: project.id + } + end + + def each_release + client.releases(project.import_source) + end + + def description_for(release) + if release.body.present? + release.body + else + "Release for tag #{release.tag_name}" + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb new file mode 100644 index 00000000000..0b67fc8db73 --- /dev/null +++ b/lib/gitlab/github_import/importer/repository_importer.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class RepositoryImporter + include Gitlab::ShellAdapter + + attr_reader :project, :client + + def initialize(project, client) + @project = project + @client = client + end + + # Returns true if we should import the wiki for the project. + def import_wiki? + client.repository(project.import_source)&.has_wiki && + !project.wiki_repository_exists? + end + + # Imports the repository data. + # + # This method will return true if the data was imported successfully or + # the repository had already been imported before. + def execute + imported = + # It's possible a repository has already been imported when running + # this code, e.g. because we had to retry this job after + # `import_wiki?` raised a rate limit error. In this case we'll skip + # re-importing the main repository. + if project.repository.empty_repo? + import_repository + else + true + end + + update_clone_time if imported + + imported = import_wiki_repository if import_wiki? && imported + + imported + end + + def import_repository + project.ensure_repository + + configure_repository_remote + + project.repository.fetch_remote('github', forced: true) + + true + rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e + fail_import("Failed to import the repository: #{e.message}") + end + + def configure_repository_remote + return if project.repository.remote_exists?('github') + + project.repository.add_remote('github', project.import_url) + project.repository.set_import_remote_as_mirror('github') + + project.repository.add_remote_fetch_config( + 'github', + '+refs/pull/*/head:refs/merge-requests/*/head' + ) + end + + def import_wiki_repository + wiki_path = "#{project.disk_path}.wiki" + wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git') + storage_path = project.repository_storage_path + + gitlab_shell.import_repository(storage_path, wiki_path, wiki_url) + + true + rescue Gitlab::Shell::Error => e + if e.message !~ /repository not exported/ + fail_import("Failed to import the wiki: #{e.message}") + else + true + end + end + + def update_clone_time + project.update_column(:last_repository_updated_at, Time.zone.now) + end + + def fail_import(message) + project.mark_import_as_failed(message) + false + end + end + end + end +end diff --git a/lib/gitlab/github_import/issuable_finder.rb b/lib/gitlab/github_import/issuable_finder.rb new file mode 100644 index 00000000000..211915f1d87 --- /dev/null +++ b/lib/gitlab/github_import/issuable_finder.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # IssuableFinder can be used for caching and retrieving database IDs for + # issuable objects such as issues and pull requests. By caching these IDs we + # remove the need for running a lot of database queries when importing + # GitHub projects. + class IssuableFinder + attr_reader :project, :object + + # The base cache key to use for storing/retrieving issuable IDs. + CACHE_KEY = 'github-import/issuable-finder/%{project}/%{type}/%{iid}'.freeze + + # project - An instance of `Project`. + # object - The object to look up or set a database ID for. + def initialize(project, object) + @project = project + @object = object + end + + # Returns the database ID for the object. + # + # This method will return `nil` if no ID could be found. + def database_id + val = Caching.read(cache_key) + + val.to_i if val.present? + end + + # Associates the given database ID with the current object. + # + # database_id - The ID of the corresponding database row. + def cache_database_id(database_id) + Caching.write(cache_key, database_id) + end + + private + + def cache_key + CACHE_KEY % { + project: project.id, + type: cache_key_type, + iid: cache_key_iid + } + end + + # Returns the identifier to use for cache keys. + # + # For issues and pull requests this will be "Issue" or "MergeRequest" + # respectively. For diff notes this will return "MergeRequest", for + # regular notes it will either return "Issue" or "MergeRequest" depending + # on what type of object the note belongs to. + def cache_key_type + if object.respond_to?(:issuable_type) + object.issuable_type + elsif object.respond_to?(:noteable_type) + object.noteable_type + else + raise( + TypeError, + "Instances of #{object.class} are not supported" + ) + end + end + + def cache_key_iid + if object.respond_to?(:noteable_id) + object.noteable_id + elsif object.respond_to?(:iid) + object.iid + else + raise( + TypeError, + "Instances of #{object.class} are not supported" + ) + end + end + end + end +end diff --git a/lib/gitlab/github_import/label_finder.rb b/lib/gitlab/github_import/label_finder.rb new file mode 100644 index 00000000000..9be071141db --- /dev/null +++ b/lib/gitlab/github_import/label_finder.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class LabelFinder + attr_reader :project + + # The base cache key to use for storing/retrieving label IDs. + CACHE_KEY = 'github-import/label-finder/%{project}/%{name}'.freeze + + # project - An instance of `Project`. + def initialize(project) + @project = project + end + + # Returns the label ID for the given name. + def id_for(name) + Caching.read_integer(cache_key_for(name)) + end + + def build_cache + mapping = @project + .labels + .pluck(:id, :name) + .each_with_object({}) do |(id, name), hash| + hash[cache_key_for(name)] = id + end + + Caching.write_multiple(mapping) + end + + def cache_key_for(name) + CACHE_KEY % { project: project.id, name: name } + end + end + end +end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb new file mode 100644 index 00000000000..b25c4f7becf --- /dev/null +++ b/lib/gitlab/github_import/markdown_text.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class MarkdownText + attr_reader :text, :author, :exists + + def self.format(*args) + new(*args).to_s + end + + # text - The Markdown text as a String. + # author - An instance of `Gitlab::GithubImport::Representation::User` + # exists - Boolean that indicates the user exists in the GitLab database. + def initialize(text, author, exists = false) + @text = text + @author = author + @exists = exists + end + + def to_s + if exists + text + else + "*Created by: #{author.login}*\n\n#{text}" + end + end + end + end +end diff --git a/lib/gitlab/github_import/milestone_finder.rb b/lib/gitlab/github_import/milestone_finder.rb new file mode 100644 index 00000000000..208d15dc144 --- /dev/null +++ b/lib/gitlab/github_import/milestone_finder.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class MilestoneFinder + attr_reader :project + + # The base cache key to use for storing/retrieving milestone IDs. + CACHE_KEY = 'github-import/milestone-finder/%{project}/%{iid}'.freeze + + # project - An instance of `Project` + def initialize(project) + @project = project + end + + # issuable - An instance of `Gitlab::GithubImport::Representation::Issue` + # or `Gitlab::GithubImport::Representation::PullRequest`. + def id_for(issuable) + return unless issuable.milestone_number + + Caching.read_integer(cache_key_for(issuable.milestone_number)) + end + + def build_cache + mapping = @project + .milestones + .pluck(:id, :iid) + .each_with_object({}) do |(id, iid), hash| + hash[cache_key_for(iid)] = id + end + + Caching.write_multiple(mapping) + end + + def cache_key_for(iid) + CACHE_KEY % { project: project.id, iid: iid } + end + end + end +end diff --git a/lib/gitlab/github_import/page_counter.rb b/lib/gitlab/github_import/page_counter.rb new file mode 100644 index 00000000000..c3db2d0b469 --- /dev/null +++ b/lib/gitlab/github_import/page_counter.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # PageCounter can be used to keep track of the last imported page of a + # collection, allowing workers to resume where they left off in the event of + # an error. + class PageCounter + attr_reader :cache_key + + # The base cache key to use for storing the last page number. + CACHE_KEY = 'github-importer/page-counter/%{project}/%{collection}'.freeze + + def initialize(project, collection) + @cache_key = CACHE_KEY % { project: project.id, collection: collection } + end + + # Sets the page number to the given value. + # + # Returns true if the page number was overwritten, false otherwise. + def set(page) + Caching.write_if_greater(cache_key, page) + end + + # Returns the current value from the cache. + def current + Caching.read_integer(cache_key) || 1 + end + end + end +end diff --git a/lib/gitlab/github_import/parallel_importer.rb b/lib/gitlab/github_import/parallel_importer.rb new file mode 100644 index 00000000000..81739834b41 --- /dev/null +++ b/lib/gitlab/github_import/parallel_importer.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # The ParallelImporter schedules the importing of a GitHub project using + # Sidekiq. + class ParallelImporter + attr_reader :project + + def self.async? + true + end + + def initialize(project) + @project = project + end + + def execute + jid = generate_jid + + # The original import JID is the JID of the RepositoryImportWorker job, + # which will be removed once that job completes. Reusing that JID could + # result in StuckImportJobsWorker marking the job as stuck before we get + # to running Stage::ImportRepositoryWorker. + # + # We work around this by setting the JID to a custom generated one, then + # refreshing it in the various stages whenever necessary. + Gitlab::SidekiqStatus + .set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION) + + project.update_column(:import_jid, jid) + + Stage::ImportRepositoryWorker + .perform_async(project.id) + + true + end + + def generate_jid + "github-importer/#{project.id}" + end + end + end +end diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb new file mode 100644 index 00000000000..d4d1357f5a3 --- /dev/null +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module ParallelScheduling + attr_reader :project, :client, :page_counter, :already_imported_cache_key + + # The base cache key to use for tracking already imported objects. + ALREADY_IMPORTED_CACHE_KEY = + 'github-importer/already-imported/%{project}/%{collection}'.freeze + + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + # parallel - When set to true the objects will be imported in parallel. + def initialize(project, client, parallel: true) + @project = project + @client = client + @parallel = parallel + @page_counter = PageCounter.new(project, collection_method) + @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY % + { project: project.id, collection: collection_method } + end + + def parallel? + @parallel + end + + def execute + retval = + if parallel? + parallel_import + else + sequential_import + end + + # Once we have completed all work we can remove our "already exists" + # cache so we don't put too much pressure on Redis. + # + # We don't immediately remove it since it's technically possible for + # other instances of this job to still run, instead we set the + # expiration time to a lower value. This prevents the other jobs from + # still scheduling duplicates while. Since all work has already been + # completed those jobs will just cycle through any remaining pages while + # not scheduling anything. + Caching.expire(already_imported_cache_key, 15.minutes.to_i) + + retval + end + + # Imports all the objects in sequence in the current thread. + def sequential_import + each_object_to_import do |object| + repr = representation_class.from_api_response(object) + + importer_class.new(repr, project, client).execute + end + end + + # Imports all objects in parallel by scheduling a Sidekiq job for every + # individual object. + def parallel_import + waiter = JobWaiter.new + + each_object_to_import do |object| + repr = representation_class.from_api_response(object) + + sidekiq_worker_class + .perform_async(project.id, repr.to_hash, waiter.key) + + waiter.jobs_remaining += 1 + end + + waiter + end + + # The method that will be called for traversing through all the objects to + # import, yielding them to the supplied block. + def each_object_to_import + repo = project.import_source + + # We inject the page number here to make sure that all importers always + # start where they left off. Simply starting over wouldn't work for + # repositories with a lot of data (e.g. tens of thousands of comments). + options = collection_options.merge(page: page_counter.current) + + client.each_page(collection_method, repo, options) do |page| + # Technically it's possible that the same work is performed multiple + # times, as Sidekiq doesn't guarantee there will ever only be one + # instance of a job. In such a scenario it's possible for one job to + # have a lower page number (e.g. 5) compared to another (e.g. 10). In + # this case we skip over all the objects until we have caught up, + # reducing the number of duplicate jobs scheduled by the provided + # block. + next unless page_counter.set(page.number) + + page.objects.each do |object| + next if already_imported?(object) + + yield object + + # We mark the object as imported immediately so we don't end up + # scheduling it multiple times. + mark_as_imported(object) + end + end + end + + # Returns true if the given object has already been imported, false + # otherwise. + # + # object - The object to check. + def already_imported?(object) + id = id_for_already_imported_cache(object) + + Caching.set_includes?(already_imported_cache_key, id) + end + + # Marks the given object as "already imported". + def mark_as_imported(object) + id = id_for_already_imported_cache(object) + + Caching.set_add(already_imported_cache_key, id) + end + + # Returns the ID to use for the cache used for checking if an object has + # already been imported or not. + # + # object - The object we may want to import. + def id_for_already_imported_cache(object) + raise NotImplementedError + end + + # The class used for converting API responses to Hashes when performing + # the import. + def representation_class + raise NotImplementedError + end + + # The class to use for importing objects when importing them sequentially. + def importer_class + raise NotImplementedError + end + + # The Sidekiq worker class used for scheduling the importing of objects in + # parallel. + def sidekiq_worker_class + raise NotImplementedError + end + + # The name of the method to call to retrieve the data to import. + def collection_method + raise NotImplementedError + end + + # Any options to be passed to the method used for retrieving the data to + # import. + def collection_options + {} + end + end + end +end diff --git a/lib/gitlab/github_import/rate_limit_error.rb b/lib/gitlab/github_import/rate_limit_error.rb new file mode 100644 index 00000000000..cc2de909c29 --- /dev/null +++ b/lib/gitlab/github_import/rate_limit_error.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # Error that will be raised when we're about to reach (or have reached) the + # GitHub API's rate limit. + RateLimitError = Class.new(StandardError) + end +end diff --git a/lib/gitlab/github_import/representation.rb b/lib/gitlab/github_import/representation.rb new file mode 100644 index 00000000000..639477ef2a2 --- /dev/null +++ b/lib/gitlab/github_import/representation.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + TIMESTAMP_KEYS = %i[created_at updated_at merged_at].freeze + + # Converts a Hash with String based keys to one that can be used by the + # various Representation classes. + # + # Example: + # + # Representation.symbolize_hash('number' => 10) # => { number: 10 } + def self.symbolize_hash(raw_hash = nil) + hash = raw_hash.deep_symbolize_keys + + TIMESTAMP_KEYS.each do |key| + hash[key] = Time.parse(hash[key]) if hash[key].is_a?(String) + end + + hash + end + end + end +end diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb new file mode 100644 index 00000000000..bb7439a0641 --- /dev/null +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class DiffNote + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :noteable_type, :noteable_id, :commit_id, :file_path, + :diff_hunk, :author, :note, :created_at, :updated_at, + :github_id + + NOTEABLE_ID_REGEX = /\/pull\/(?<iid>\d+)/i + + # Builds a diff note from a GitHub API response. + # + # note - An instance of `Sawyer::Resource` containing the note details. + def self.from_api_response(note) + matches = note.html_url.match(NOTEABLE_ID_REGEX) + + unless matches + raise( + ArgumentError, + "The note URL #{note.html_url.inspect} is not supported" + ) + end + + user = Representation::User.from_api_response(note.user) if note.user + hash = { + noteable_type: 'MergeRequest', + noteable_id: matches[:iid].to_i, + file_path: note.path, + commit_id: note.commit_id, + diff_hunk: note.diff_hunk, + author: user, + note: note.body, + created_at: note.created_at, + updated_at: note.updated_at, + github_id: note.id + } + + new(hash) + end + + # Builds a new note using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + new(hash) + end + + # attributes - A Hash containing the raw note details. The keys of this + # Hash must be Symbols. + def initialize(attributes) + @attributes = attributes + end + + def line_code + diff_line = Gitlab::Diff::Parser.new.parse(diff_hunk.lines).to_a.last + + Gitlab::Git + .diff_line_code(file_path, diff_line.new_pos, diff_line.old_pos) + end + + # Returns a Hash that can be used to populate `notes.st_diff`, removing + # the need for requesting Git data for every diff note. + def diff_hash + { + diff: diff_hunk, + new_path: file_path, + old_path: file_path, + + # These fields are not displayed for LegacyDiffNote notes, so it + # doesn't really matter what we set them to. + a_mode: '100644', + b_mode: '100644', + new_file: false + } + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/expose_attribute.rb b/lib/gitlab/github_import/representation/expose_attribute.rb new file mode 100644 index 00000000000..c3405759631 --- /dev/null +++ b/lib/gitlab/github_import/representation/expose_attribute.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + module ExposeAttribute + extend ActiveSupport::Concern + + module ClassMethods + # Defines getter methods for the given attribute names. + # + # Example: + # + # expose_attribute :iid, :title + def expose_attribute(*names) + names.each do |name| + name = name.to_sym + + define_method(name) { attributes[name] } + end + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/issue.rb b/lib/gitlab/github_import/representation/issue.rb new file mode 100644 index 00000000000..f3071b3e2b3 --- /dev/null +++ b/lib/gitlab/github_import/representation/issue.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class Issue + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :iid, :title, :description, :milestone_number, + :created_at, :updated_at, :state, :assignees, + :label_names, :author + + # Builds an issue from a GitHub API response. + # + # issue - An instance of `Sawyer::Resource` containing the issue + # details. + def self.from_api_response(issue) + user = + if issue.user + Representation::User.from_api_response(issue.user) + end + + hash = { + iid: issue.number, + title: issue.title, + description: issue.body, + milestone_number: issue.milestone&.number, + state: issue.state == 'open' ? :opened : :closed, + assignees: issue.assignees.map do |u| + Representation::User.from_api_response(u) + end, + label_names: issue.labels.map(&:name), + author: user, + created_at: issue.created_at, + updated_at: issue.updated_at, + pull_request: issue.pull_request ? true : false + } + + new(hash) + end + + # Builds a new issue using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + + hash[:state] = hash[:state].to_sym + hash[:assignees].map! { |u| Representation::User.from_json_hash(u) } + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + new(hash) + end + + # attributes - A hash containing the raw issue details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def truncated_title + title.truncate(255) + end + + def labels? + label_names && label_names.any? + end + + def pull_request? + attributes[:pull_request] + end + + def issuable_type + pull_request? ? 'MergeRequest' : 'Issue' + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/note.rb b/lib/gitlab/github_import/representation/note.rb new file mode 100644 index 00000000000..a68bc4c002f --- /dev/null +++ b/lib/gitlab/github_import/representation/note.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class Note + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :noteable_id, :noteable_type, :author, :note, + :created_at, :updated_at, :github_id + + NOTEABLE_TYPE_REGEX = /\/(?<type>(pull|issues))\/(?<iid>\d+)/i + + # Builds a note from a GitHub API response. + # + # note - An instance of `Sawyer::Resource` containing the note details. + def self.from_api_response(note) + matches = note.html_url.match(NOTEABLE_TYPE_REGEX) + + if !matches || !matches[:type] + raise( + ArgumentError, + "The note URL #{note.html_url.inspect} is not supported" + ) + end + + noteable_type = + if matches[:type] == 'pull' + 'MergeRequest' + else + 'Issue' + end + + user = Representation::User.from_api_response(note.user) if note.user + hash = { + noteable_type: noteable_type, + noteable_id: matches[:iid].to_i, + author: user, + note: note.body, + created_at: note.created_at, + updated_at: note.updated_at, + github_id: note.id + } + + new(hash) + end + + # Builds a new note using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + new(hash) + end + + # attributes - A Hash containing the raw note details. The keys of this + # Hash must be Symbols. + def initialize(attributes) + @attributes = attributes + end + + alias_method :issuable_type, :noteable_type + end + end + end +end diff --git a/lib/gitlab/github_import/representation/pull_request.rb b/lib/gitlab/github_import/representation/pull_request.rb new file mode 100644 index 00000000000..593b491a837 --- /dev/null +++ b/lib/gitlab/github_import/representation/pull_request.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class PullRequest + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :iid, :title, :description, :source_branch, + :source_branch_sha, :target_branch, :target_branch_sha, + :milestone_number, :author, :assignee, :created_at, + :updated_at, :merged_at, :source_repository_id, + :target_repository_id, :source_repository_owner + + # Builds a PR from a GitHub API response. + # + # issue - An instance of `Sawyer::Resource` containing the PR details. + def self.from_api_response(pr) + assignee = + if pr.assignee + Representation::User.from_api_response(pr.assignee) + end + + user = Representation::User.from_api_response(pr.user) if pr.user + hash = { + iid: pr.number, + title: pr.title, + description: pr.body, + source_branch: pr.head.ref, + target_branch: pr.base.ref, + source_branch_sha: pr.head.sha, + target_branch_sha: pr.base.sha, + source_repository_id: pr.head&.repo&.id, + target_repository_id: pr.base&.repo&.id, + source_repository_owner: pr.head&.user&.login, + state: pr.state == 'open' ? :opened : :closed, + milestone_number: pr.milestone&.number, + author: user, + assignee: assignee, + created_at: pr.created_at, + updated_at: pr.updated_at, + merged_at: pr.merged_at + } + + new(hash) + end + + # Builds a new PR using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + + hash[:state] = hash[:state].to_sym + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + # Assignees are optional so we only convert it from a Hash if one was + # set. + hash[:assignee] &&= Representation::User + .from_json_hash(hash[:assignee]) + + new(hash) + end + + # attributes - A Hash containing the raw PR details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def truncated_title + title.truncate(255) + end + + # Returns a formatted source branch. + # + # For cross-project pull requests the branch name will be in the format + # `owner-name:branch-name`. + def formatted_source_branch + if cross_project? && source_repository_owner + "#{source_repository_owner}:#{source_branch}" + elsif source_branch == target_branch + # Sometimes the source and target branch are the same, but GitLab + # doesn't support this. This can happen when both the user and + # source repository have been deleted, and the PR was submitted from + # the fork's master branch. + "#{source_branch}-#{iid}" + else + source_branch + end + end + + def state + if merged_at + :merged + else + attributes[:state] + end + end + + def cross_project? + return true unless source_repository_id + + source_repository_id != target_repository_id + end + + def issuable_type + 'MergeRequest' + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/to_hash.rb b/lib/gitlab/github_import/representation/to_hash.rb new file mode 100644 index 00000000000..4a0f36ab8f0 --- /dev/null +++ b/lib/gitlab/github_import/representation/to_hash.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + module ToHash + # Converts the current representation to a Hash. The keys of this Hash + # will be Symbols. + def to_hash + hash = {} + + attributes.each do |key, value| + hash[key] = convert_value_for_to_hash(value) + end + + hash + end + + def convert_value_for_to_hash(value) + if value.is_a?(Array) + value.map { |v| convert_value_for_to_hash(v) } + elsif value.respond_to?(:to_hash) + value.to_hash + else + value + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/user.rb b/lib/gitlab/github_import/representation/user.rb new file mode 100644 index 00000000000..e00dcfca33d --- /dev/null +++ b/lib/gitlab/github_import/representation/user.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class User + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :id, :login + + # Builds a user from a GitHub API response. + # + # user - An instance of `Sawyer::Resource` containing the user details. + def self.from_api_response(user) + new(id: user.id, login: user.login) + end + + # Builds a user using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + new(Representation.symbolize_hash(raw_hash)) + end + + # attributes - A Hash containing the user details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + end + end + end +end diff --git a/lib/gitlab/github_import/sequential_importer.rb b/lib/gitlab/github_import/sequential_importer.rb new file mode 100644 index 00000000000..4f7324536a0 --- /dev/null +++ b/lib/gitlab/github_import/sequential_importer.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # The SequentialImporter imports a GitHub project in a single thread, + # without using Sidekiq. This makes it useful for testing purposes as well + # as Rake tasks, but it should be avoided for anything else in favour of the + # parallel importer. + class SequentialImporter + attr_reader :project, :client + + SEQUENTIAL_IMPORTERS = [ + Importer::LabelsImporter, + Importer::MilestonesImporter, + Importer::ReleasesImporter + ].freeze + + PARALLEL_IMPORTERS = [ + Importer::PullRequestsImporter, + Importer::IssuesImporter, + Importer::DiffNotesImporter, + Importer::NotesImporter + ].freeze + + # project - The project to import the data into. + # token - The token to use for the GitHub API. + def initialize(project, token: nil) + @project = project + @client = GithubImport + .new_client_for(project, token: token, parallel: false) + end + + def execute + Importer::RepositoryImporter.new(project, client).execute + + SEQUENTIAL_IMPORTERS.each do |klass| + klass.new(project, client).execute + end + + PARALLEL_IMPORTERS.each do |klass| + klass.new(project, client, parallel: false).execute + end + + project.repository.after_import + + true + end + end + end +end diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb new file mode 100644 index 00000000000..be1259662a7 --- /dev/null +++ b/lib/gitlab/github_import/user_finder.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # Class that can be used for finding a GitLab user ID based on a GitHub user + # ID or username. + # + # Any found user IDs are cached in Redis to reduce the number of SQL queries + # executed over time. Valid keys are refreshed upon access so frequently + # used keys stick around. + # + # Lookups are cached even if no ID was found to remove the need for querying + # the database when most queries are not going to return results anyway. + class UserFinder + attr_reader :project, :client + + # The base cache key to use for caching user IDs for a given GitHub user + # ID. + ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'.freeze + + # The base cache key to use for caching user IDs for a given GitHub email + # address. + ID_FOR_EMAIL_CACHE_KEY = + 'github-import/user-finder/id-for-email/%s'.freeze + + # The base cache key to use for caching the Email addresses of GitHub + # usernames. + EMAIL_FOR_USERNAME_CACHE_KEY = + 'github-import/user-finder/email-for-username/%s'.freeze + + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(project, client) + @project = project + @client = client + end + + # Returns the GitLab user ID of an object's author. + # + # If the object has no author ID we'll use the ID of the GitLab ghost + # user. + def author_id_for(object) + id = + if object&.author + user_id_for(object.author) + else + GithubImport.ghost_user_id + end + + if id + [id, true] + else + [project.creator_id, false] + end + end + + # Returns the GitLab user ID of an issuable's assignee. + def assignee_id_for(issuable) + user_id_for(issuable.assignee) if issuable.assignee + end + + # Returns the GitLab user ID for a GitHub user. + # + # user - An instance of `Gitlab::GithubImport::Representation::User`. + def user_id_for(user) + find(user.id, user.login) + end + + # Returns the GitLab ID for the given GitHub ID or username. + # + # id - The ID of the GitHub user. + # username - The username of the GitHub user. + def find(id, username) + email = email_for_github_username(username) + cached, found_id = find_from_cache(id, email) + + return found_id if found_id + + # We only want to query the database if necessary. If previous lookups + # didn't yield a user ID we won't query the database again until the + # keys expire. + find_id_from_database(id, email) unless cached + end + + # Finds a user ID from the cache for a given GitHub ID or Email. + def find_from_cache(id, email = nil) + id_exists, id_for_github_id = cached_id_for_github_id(id) + + return [id_exists, id_for_github_id] if id_for_github_id + + # Just in case no Email address could be retrieved (for whatever reason) + return [false] unless email + + cached_id_for_github_email(email) + end + + # Finds a GitLab user ID from the database for a given GitHub user ID or + # Email. + def find_id_from_database(id, email) + id_for_github_id(id) || id_for_github_email(email) + end + + def email_for_github_username(username) + cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username + email = Caching.read(cache_key) + + unless email + user = client.user(username) + email = Caching.write(cache_key, user.email) if user + end + + email + end + + def cached_id_for_github_id(id) + read_id_from_cache(ID_CACHE_KEY % id) + end + + def cached_id_for_github_email(email) + read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email) + end + + # Queries and caches the GitLab user ID for a GitHub user ID, if one was + # found. + def id_for_github_id(id) + gitlab_id = query_id_for_github_id(id) || nil + + Caching.write(ID_CACHE_KEY % id, gitlab_id) + end + + # Queries and caches the GitLab user ID for a GitHub email, if one was + # found. + def id_for_github_email(email) + gitlab_id = query_id_for_github_email(email) || nil + + Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id) + end + + def query_id_for_github_id(id) + User.for_github_id(id).pluck(:id).first + end + + def query_id_for_github_email(email) + User.by_any_email(email).pluck(:id).first + end + + # Reads an ID from the cache. + # + # The return value is an Array with two values: + # + # 1. A boolean indicating if the key was present or not. + # 2. The ID as an Integer, or nil in case no ID could be found. + def read_id_from_cache(key) + value = Caching.read(key) + exists = !value.nil? + number = value.to_i + + # The cache key may be empty to indicate a previously looked up user for + # which we couldn't find an ID. + [exists, number.positive? ? number : nil] + end + end + end +end diff --git a/lib/gitlab/import_sources.rb b/lib/gitlab/import_sources.rb index 5404dc11a87..eeb03625479 100644 --- a/lib/gitlab/import_sources.rb +++ b/lib/gitlab/import_sources.rb @@ -8,14 +8,14 @@ module Gitlab ImportSource = Struct.new(:name, :title, :importer) ImportTable = [ - ImportSource.new('github', 'GitHub', Github::Import), + ImportSource.new('github', 'GitHub', Gitlab::GithubImport::ParallelImporter), ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer), ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer), ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer), ImportSource.new('fogbugz', 'FogBugz', Gitlab::FogbugzImport::Importer), ImportSource.new('git', 'Repo by URL', nil), ImportSource.new('gitlab_project', 'GitLab export', Gitlab::ImportExport::Importer), - ImportSource.new('gitea', 'Gitea', Gitlab::GithubImport::Importer) + ImportSource.new('gitea', 'Gitea', Gitlab::LegacyGithubImport::Importer) ].freeze class << self diff --git a/lib/gitlab/job_waiter.rb b/lib/gitlab/job_waiter.rb index 4d6bbda15f3..f654508c391 100644 --- a/lib/gitlab/job_waiter.rb +++ b/lib/gitlab/job_waiter.rb @@ -19,11 +19,13 @@ module Gitlab Gitlab::Redis::SharedState.with { |redis| redis.lpush(key, jid) } end - attr_reader :key, :jobs_remaining, :finished + attr_reader :key, :finished + attr_accessor :jobs_remaining # jobs_remaining - the number of jobs left to wait for - def initialize(jobs_remaining) - @key = "gitlab:job_waiter:#{SecureRandom.uuid}" + # key - The key of this waiter. + def initialize(jobs_remaining = 0, key = "gitlab:job_waiter:#{SecureRandom.uuid}") + @key = key @jobs_remaining = jobs_remaining @finished = [] end diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/legacy_github_import/base_formatter.rb index f330041cc00..2f07fde406c 100644 --- a/lib/gitlab/github_import/base_formatter.rb +++ b/lib/gitlab/legacy_github_import/base_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class BaseFormatter attr_reader :client, :formatter, :project, :raw_data diff --git a/lib/gitlab/github_import/branch_formatter.rb b/lib/gitlab/legacy_github_import/branch_formatter.rb index 8aa885fb811..80fe1d67209 100644 --- a/lib/gitlab/github_import/branch_formatter.rb +++ b/lib/gitlab/legacy_github_import/branch_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class BranchFormatter < BaseFormatter delegate :repo, :sha, :ref, to: :raw_data diff --git a/lib/gitlab/legacy_github_import/client.rb b/lib/gitlab/legacy_github_import/client.rb new file mode 100644 index 00000000000..53c910d44bd --- /dev/null +++ b/lib/gitlab/legacy_github_import/client.rb @@ -0,0 +1,148 @@ +module Gitlab + module LegacyGithubImport + class Client + GITHUB_SAFE_REMAINING_REQUESTS = 100 + GITHUB_SAFE_SLEEP_TIME = 500 + + attr_reader :access_token, :host, :api_version + + def initialize(access_token, host: nil, api_version: 'v3') + @access_token = access_token + @host = host.to_s.sub(%r{/+\z}, '') + @api_version = api_version + @users = {} + + if access_token + ::Octokit.auto_paginate = false + end + end + + def api + @api ||= ::Octokit::Client.new( + access_token: access_token, + api_endpoint: api_endpoint, + # If there is no config, we're connecting to github.com and we + # should verify ssl. + connection_options: { + ssl: { verify: config ? config['verify_ssl'] : true } + } + ) + end + + def client + unless config + raise Projects::ImportService::Error, + 'OAuth configuration for GitHub missing.' + end + + @client ||= ::OAuth2::Client.new( + config.app_id, + config.app_secret, + github_options.merge(ssl: { verify: config['verify_ssl'] }) + ) + end + + def authorize_url(redirect_uri) + client.auth_code.authorize_url({ + redirect_uri: redirect_uri, + scope: "repo, user, user:email" + }) + end + + def get_token(code) + client.auth_code.get_token(code).token + end + + def method_missing(method, *args, &block) + if api.respond_to?(method) + request(method, *args, &block) + else + super(method, *args, &block) + end + end + + def respond_to?(method) + api.respond_to?(method) || super + end + + def user(login) + return nil unless login.present? + return @users[login] if @users.key?(login) + + @users[login] = api.user(login) + end + + private + + def api_endpoint + if host.present? && api_version.present? + "#{host}/api/#{api_version}" + else + github_options[:site] + end + end + + def config + Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" } + end + + def github_options + if config + config["args"]["client_options"].deep_symbolize_keys + else + OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys + end + end + + def rate_limit + api.rate_limit! + # GitHub Rate Limit API returns 404 when the rate limit is + # disabled. In this case we just want to return gracefully + # instead of spitting out an error. + rescue Octokit::NotFound + nil + end + + def has_rate_limit? + return @has_rate_limit if defined?(@has_rate_limit) + + @has_rate_limit = rate_limit.present? + end + + def rate_limit_exceed? + has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS + end + + def rate_limit_sleep_time + rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME + end + + def request(method, *args, &block) + sleep rate_limit_sleep_time if rate_limit_exceed? + + data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend + return data unless data.is_a?(Array) + + last_response = api.last_response + + if block_given? + yield data + # api.last_response could change while we're yielding (e.g. fetching labels for each PR) + # so we cache our own last response + each_response_page(last_response, &block) + else + each_response_page(last_response) { |page| data.concat(page) } + data + end + end + + def each_response_page(last_response) + while last_response.rels[:next] + sleep rate_limit_sleep_time if rate_limit_exceed? + last_response = last_response.rels[:next].get + yield last_response.data if last_response.data.is_a?(Array) + end + end + end + end +end diff --git a/lib/gitlab/github_import/comment_formatter.rb b/lib/gitlab/legacy_github_import/comment_formatter.rb index 8911b81ec9a..d2c7a8ae9f4 100644 --- a/lib/gitlab/github_import/comment_formatter.rb +++ b/lib/gitlab/legacy_github_import/comment_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class CommentFormatter < BaseFormatter attr_writer :author_id diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/legacy_github_import/importer.rb index b8c07460ebb..12c968805f5 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/legacy_github_import/importer.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class Importer include Gitlab::ShellAdapter diff --git a/lib/gitlab/github_import/issuable_formatter.rb b/lib/gitlab/legacy_github_import/issuable_formatter.rb index 27b171d6ddb..de55382d3ad 100644 --- a/lib/gitlab/github_import/issuable_formatter.rb +++ b/lib/gitlab/legacy_github_import/issuable_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class IssuableFormatter < BaseFormatter attr_writer :assignee_id, :author_id diff --git a/lib/gitlab/github_import/issue_formatter.rb b/lib/gitlab/legacy_github_import/issue_formatter.rb index 977cd0423ba..4c8825ccf19 100644 --- a/lib/gitlab/github_import/issue_formatter.rb +++ b/lib/gitlab/legacy_github_import/issue_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class IssueFormatter < IssuableFormatter def attributes { diff --git a/lib/gitlab/github_import/label_formatter.rb b/lib/gitlab/legacy_github_import/label_formatter.rb index 211ccdc51bb..c3eed12e739 100644 --- a/lib/gitlab/github_import/label_formatter.rb +++ b/lib/gitlab/legacy_github_import/label_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class LabelFormatter < BaseFormatter def attributes { diff --git a/lib/gitlab/github_import/milestone_formatter.rb b/lib/gitlab/legacy_github_import/milestone_formatter.rb index dd782eff059..a565294384d 100644 --- a/lib/gitlab/github_import/milestone_formatter.rb +++ b/lib/gitlab/legacy_github_import/milestone_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class MilestoneFormatter < BaseFormatter def attributes { diff --git a/lib/gitlab/github_import/project_creator.rb b/lib/gitlab/legacy_github_import/project_creator.rb index a55adc9b1c8..41e7eac4d08 100644 --- a/lib/gitlab/github_import/project_creator.rb +++ b/lib/gitlab/legacy_github_import/project_creator.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class ProjectCreator include Gitlab::CurrentSettings diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/legacy_github_import/pull_request_formatter.rb index 150afa31432..94c2e99066a 100644 --- a/lib/gitlab/github_import/pull_request_formatter.rb +++ b/lib/gitlab/legacy_github_import/pull_request_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class PullRequestFormatter < IssuableFormatter delegate :user, :project, :ref, :repo, :sha, to: :source_branch, prefix: true delegate :user, :exists?, :project, :ref, :repo, :sha, :short_sha, to: :target_branch, prefix: true diff --git a/lib/gitlab/github_import/release_formatter.rb b/lib/gitlab/legacy_github_import/release_formatter.rb index 1ad702a6058..3ed9d4f76da 100644 --- a/lib/gitlab/github_import/release_formatter.rb +++ b/lib/gitlab/legacy_github_import/release_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class ReleaseFormatter < BaseFormatter def attributes { diff --git a/lib/gitlab/github_import/user_formatter.rb b/lib/gitlab/legacy_github_import/user_formatter.rb index 04c2964da20..6d8055622f1 100644 --- a/lib/gitlab/github_import/user_formatter.rb +++ b/lib/gitlab/legacy_github_import/user_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class UserFormatter attr_reader :client, :raw diff --git a/lib/gitlab/github_import/wiki_formatter.rb b/lib/gitlab/legacy_github_import/wiki_formatter.rb index ca8d96f5650..27f45875c7c 100644 --- a/lib/gitlab/github_import/wiki_formatter.rb +++ b/lib/gitlab/legacy_github_import/wiki_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class WikiFormatter attr_reader :project diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 7f86fd7b45e..8e9aef49e89 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -7,7 +7,7 @@ class GithubImport end def initialize(token, gitlab_username, project_path, extras) - @options = { token: token, verbose: true } + @options = { token: token } @project_path = project_path @current_user = User.find_by_username(gitlab_username) @github_repo = extras.empty? ? nil : extras.first @@ -42,7 +42,9 @@ class GithubImport import_success = false timings = Benchmark.measure do - import_success = Github::Import.new(@project, @options).execute + import_success = Gitlab::GithubImport::SequentialImporter + .new(@project, token: @options[:token]) + .execute end if import_success |