diff options
Diffstat (limited to 'lib/github')
-rw-r--r-- | lib/github/client.rb | 23 | ||||
-rw-r--r-- | lib/github/collection.rb | 29 | ||||
-rw-r--r-- | lib/github/error.rb | 3 | ||||
-rw-r--r-- | lib/github/import.rb | 409 | ||||
-rw-r--r-- | lib/github/rate_limit.rb | 27 | ||||
-rw-r--r-- | lib/github/repositories.rb | 19 | ||||
-rw-r--r-- | lib/github/representation/base.rb | 30 | ||||
-rw-r--r-- | lib/github/representation/branch.rb | 51 | ||||
-rw-r--r-- | lib/github/representation/comment.rb | 42 | ||||
-rw-r--r-- | lib/github/representation/issuable.rb | 37 | ||||
-rw-r--r-- | lib/github/representation/issue.rb | 25 | ||||
-rw-r--r-- | lib/github/representation/label.rb | 13 | ||||
-rw-r--r-- | lib/github/representation/milestone.rb | 25 | ||||
-rw-r--r-- | lib/github/representation/pull_request.rb | 78 | ||||
-rw-r--r-- | lib/github/representation/release.rb | 17 | ||||
-rw-r--r-- | lib/github/representation/repo.rb | 6 | ||||
-rw-r--r-- | lib/github/representation/user.rb | 15 | ||||
-rw-r--r-- | lib/github/response.rb | 25 | ||||
-rw-r--r-- | lib/github/user.rb | 24 |
19 files changed, 898 insertions, 0 deletions
diff --git a/lib/github/client.rb b/lib/github/client.rb new file mode 100644 index 00000000000..e65d908d232 --- /dev/null +++ b/lib/github/client.rb @@ -0,0 +1,23 @@ +module Github + class Client + attr_reader :connection, :rate_limit + + def initialize(options) + @connection = Faraday.new(url: options.fetch(:url)) do |faraday| + faraday.options.open_timeout = options.fetch(:timeout, 60) + faraday.options.timeout = options.fetch(:timeout, 60) + faraday.authorization 'token', options.fetch(:token) + faraday.adapter :net_http + end + + @rate_limit = RateLimit.new(connection) + end + + def get(url, query = {}) + exceed, reset_in = rate_limit.get + sleep reset_in if exceed + + Github::Response.new(connection.get(url, query)) + end + end +end diff --git a/lib/github/collection.rb b/lib/github/collection.rb new file mode 100644 index 00000000000..014b2038c4b --- /dev/null +++ b/lib/github/collection.rb @@ -0,0 +1,29 @@ +module Github + class Collection + attr_reader :options + + def initialize(options) + @options = options + end + + def fetch(url, query = {}) + return [] if url.blank? + + Enumerator.new do |yielder| + loop do + response = client.get(url, query) + response.body.each { |item| yielder << item } + + raise StopIteration unless response.rels.key?(:next) + url = response.rels[:next] + end + end.lazy + end + + private + + def client + @client ||= Github::Client.new(options) + end + end +end diff --git a/lib/github/error.rb b/lib/github/error.rb new file mode 100644 index 00000000000..66d7afaa787 --- /dev/null +++ b/lib/github/error.rb @@ -0,0 +1,3 @@ +module Github + RepositoryFetchError = Class.new(StandardError) +end diff --git a/lib/github/import.rb b/lib/github/import.rb new file mode 100644 index 00000000000..d49761fd6c6 --- /dev/null +++ b/lib/github/import.rb @@ -0,0 +1,409 @@ +require_relative 'error' +module Github + class Import + include Gitlab::ShellAdapter + + class MergeRequest < ::MergeRequest + self.table_name = 'merge_requests' + + self.reset_callbacks :save + self.reset_callbacks :commit + self.reset_callbacks :update + self.reset_callbacks :validate + end + + class Issue < ::Issue + self.table_name = 'issues' + + self.reset_callbacks :save + self.reset_callbacks :commit + self.reset_callbacks :update + self.reset_callbacks :validate + end + + class Note < ::Note + self.table_name = 'notes' + + self.reset_callbacks :save + self.reset_callbacks :commit + self.reset_callbacks :update + self.reset_callbacks :validate + end + + class LegacyDiffNote < ::LegacyDiffNote + self.table_name = 'notes' + + self.reset_callbacks :commit + self.reset_callbacks :update + self.reset_callbacks :validate + end + + attr_reader :project, :repository, :repo, :options, :errors, :cached, :verbose + + def initialize(project, options) + @project = project + @repository = project.repository + @repo = project.import_source + @options = options + @verbose = options.fetch(:verbose, false) + @cached = Hash.new { |hash, key| hash[key] = Hash.new } + @errors = [] + end + + # rubocop: disable Rails/Output + def execute + puts 'Fetching repository...'.color(:aqua) if verbose + fetch_repository + puts 'Fetching labels...'.color(:aqua) if verbose + fetch_labels + puts 'Fetching milestones...'.color(:aqua) if verbose + fetch_milestones + puts 'Fetching pull requests...'.color(:aqua) if verbose + fetch_pull_requests + puts 'Fetching issues...'.color(:aqua) if verbose + fetch_issues + puts 'Cloning wiki repository...'.color(:aqua) if verbose + fetch_wiki_repository + puts 'Expiring repository cache...'.color(:aqua) if verbose + expire_repository_cache + + true + rescue Github::RepositoryFetchError + false + ensure + keep_track_of_errors + end + + private + + def fetch_repository + begin + project.create_repository unless project.repository.exists? + project.repository.add_remote('github', "https://{options.fetch(:token)}@github.com/#{repo}.git") + project.repository.set_remote_as_mirror('github') + project.repository.fetch_remote('github', forced: true) + rescue Gitlab::Shell::Error => e + error(:project, "https://github.com/#{repo}.git", e.message) + raise Github::RepositoryFetchError + end + end + + def fetch_wiki_repository + wiki_url = "https://{options.fetch(:token)}@github.com/#{repo}.wiki.git" + wiki_path = "#{project.path_with_namespace}.wiki" + + unless project.wiki.repository_exists? + gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url) + end + rescue Gitlab::Shell::Error => e + # GitHub error message when the wiki repo has not been created, + # this means that repo has wiki enabled, but have no pages. So, + # we can skip the import. + if e.message !~ /repository not exported/ + errors(:wiki, wiki_url, e.message) + end + end + + def fetch_labels + url = "/repos/#{repo}/labels" + + while url + response = Github::Client.new(options).get(url) + + response.body.each do |raw| + begin + representation = Github::Representation::Label.new(raw) + + label = project.labels.find_or_create_by!(title: representation.title) do |label| + label.color = representation.color + end + + cached[:label_ids][label.title] = label.id + rescue => e + error(:label, representation.url, e.message) + end + end + + url = response.rels[:next] + end + end + + def fetch_milestones + url = "/repos/#{repo}/milestones" + + while url + response = Github::Client.new(options).get(url, state: :all) + + response.body.each do |raw| + begin + milestone = Github::Representation::Milestone.new(raw) + next if project.milestones.where(iid: milestone.iid).exists? + + project.milestones.create!( + iid: milestone.iid, + title: milestone.title, + description: milestone.description, + due_date: milestone.due_date, + state: milestone.state, + created_at: milestone.created_at, + updated_at: milestone.updated_at + ) + rescue => e + error(:milestone, milestone.url, e.message) + end + end + + url = response.rels[:next] + end + end + + def fetch_pull_requests + url = "/repos/#{repo}/pulls" + + while url + response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc) + + response.body.each do |raw| + pull_request = Github::Representation::PullRequest.new(raw, options.merge(project: project)) + merge_request = MergeRequest.find_or_initialize_by(iid: pull_request.iid, source_project_id: project.id) + next unless merge_request.new_record? && pull_request.valid? + + begin + restore_branches(pull_request) + + author_id = user_id(pull_request.author, project.creator_id) + description = format_description(pull_request.description, pull_request.author) + + merge_request.attributes = { + iid: pull_request.iid, + title: pull_request.title, + description: description, + source_project: pull_request.source_project, + source_branch: pull_request.source_branch_name, + source_branch_sha: pull_request.source_branch_sha, + target_project: pull_request.target_project, + target_branch: pull_request.target_branch_name, + target_branch_sha: pull_request.target_branch_sha, + state: pull_request.state, + milestone_id: milestone_id(pull_request.milestone), + author_id: author_id, + assignee_id: user_id(pull_request.assignee), + created_at: pull_request.created_at, + updated_at: pull_request.updated_at + } + + merge_request.save!(validate: false) + merge_request.merge_request_diffs.create + + # Fetch review comments + review_comments_url = "/repos/#{repo}/pulls/#{pull_request.iid}/comments" + fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote) + + # Fetch comments + comments_url = "/repos/#{repo}/issues/#{pull_request.iid}/comments" + fetch_comments(merge_request, :comment, comments_url) + rescue => e + error(:pull_request, pull_request.url, e.message) + ensure + clean_up_restored_branches(pull_request) + end + end + + url = response.rels[:next] + end + end + + def fetch_issues + url = "/repos/#{repo}/issues" + + while url + response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc) + + response.body.each do |raw| + representation = Github::Representation::Issue.new(raw, options) + + begin + # Every pull request is an issue, but not every issue + # is a pull request. For this reason, "shared" actions + # for both features, like manipulating assignees, labels + # and milestones, are provided within the Issues API. + if representation.pull_request? + next unless representation.has_labels? + + merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid) + merge_request.update_attribute(:label_ids, label_ids(representation.labels)) + else + next if Issue.where(iid: representation.iid, project_id: project.id).exists? + + author_id = user_id(representation.author, project.creator_id) + issue = Issue.new + issue.iid = representation.iid + issue.project_id = project.id + issue.title = representation.title + issue.description = format_description(representation.description, representation.author) + issue.state = representation.state + issue.label_ids = label_ids(representation.labels) + issue.milestone_id = milestone_id(representation.milestone) + issue.author_id = author_id + issue.assignee_id = user_id(representation.assignee) + issue.created_at = representation.created_at + issue.updated_at = representation.updated_at + issue.save!(validate: false) + + # Fetch comments + if representation.has_comments? + comments_url = "/repos/#{repo}/issues/#{issue.iid}/comments" + fetch_comments(issue, :comment, comments_url) + end + end + rescue => e + error(:issue, representation.url, e.message) + end + end + + url = response.rels[:next] + end + end + + def fetch_comments(noteable, type, url, klass = Note) + while url + comments = Github::Client.new(options).get(url) + + ActiveRecord::Base.no_touching do + comments.body.each do |raw| + begin + representation = Github::Representation::Comment.new(raw, options) + author_id = user_id(representation.author, project.creator_id) + + note = klass.new + note.project_id = project.id + note.noteable = noteable + note.note = format_description(representation.note, representation.author) + note.commit_id = representation.commit_id + note.line_code = representation.line_code + note.author_id = author_id + note.created_at = representation.created_at + note.updated_at = representation.updated_at + note.save!(validate: false) + rescue => e + error(type, representation.url, e.message) + end + end + end + + url = comments.rels[:next] + end + end + + def fetch_releases + url = "/repos/#{repo}/releases" + + while url + response = Github::Client.new(options).get(url) + + response.body.each do |raw| + representation = Github::Representation::Release.new(raw) + next unless representation.valid? + + release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag) + next unless relese.new_record? + + begin + release.description = representation.description + release.created_at = representation.created_at + release.updated_at = representation.updated_at + release.save!(validate: false) + rescue => e + error(:release, representation.url, e.message) + end + end + + url = response.rels[:next] + end + end + + def restore_branches(pull_request) + restore_source_branch(pull_request) unless pull_request.source_branch_exists? + restore_target_branch(pull_request) unless pull_request.target_branch_exists? + end + + def restore_source_branch(pull_request) + repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha) + end + + def restore_target_branch(pull_request) + repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha) + end + + def remove_branch(name) + repository.delete_branch(name) + rescue Rugged::ReferenceError + errors << { type: :branch, url: nil, error: "Could not clean up restored branch: #{name}" } + end + + def clean_up_restored_branches(pull_request) + return if pull_request.opened? + + remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists? + remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists? + end + + def label_ids(labels) + labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact + end + + def milestone_id(milestone) + return unless milestone.present? + + project.milestones.select(:id).find_by(iid: milestone.iid)&.id + end + + def user_id(user, fallback_id = nil) + return unless user.present? + return cached[:user_ids][user.id] if cached[:user_ids].key?(user.id) + + gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email) + + cached[:gitlab_user_ids][user.id] = gitlab_user_id.present? + cached[:user_ids][user.id] = gitlab_user_id || fallback_id + end + + def user_id_by_email(email) + return nil unless email + + ::User.find_by_any_email(email)&.id + end + + def user_id_by_external_uid(id) + return nil unless id + + ::User.select(:id) + .joins(:identities) + .merge(::Identity.where(provider: :github, extern_uid: id)) + .first&.id + end + + def format_description(body, author) + return body if cached[:gitlab_user_ids][author.id] + + "*Created by: #{author.username}*\n\n#{body}" + end + + def expire_repository_cache + repository.expire_content_cache + end + + def keep_track_of_errors + return unless errors.any? + + project.update_column(:import_error, { + message: 'The remote data could not be fully imported.', + errors: errors + }.to_json) + end + + def error(type, url, message) + errors << { type: type, url: Gitlab::UrlSanitizer.sanitize(url), error: message } + end + end +end diff --git a/lib/github/rate_limit.rb b/lib/github/rate_limit.rb new file mode 100644 index 00000000000..884693d093c --- /dev/null +++ b/lib/github/rate_limit.rb @@ -0,0 +1,27 @@ +module Github + class RateLimit + SAFE_REMAINING_REQUESTS = 100 + SAFE_RESET_TIME = 500 + RATE_LIMIT_URL = '/rate_limit'.freeze + + attr_reader :connection + + def initialize(connection) + @connection = connection + end + + def get + response = connection.get(RATE_LIMIT_URL) + + # GitHub Rate Limit API returns 404 when the rate limit is disabled + return false unless response.status != 404 + + body = Oj.load(response.body, class_cache: false, mode: :compat) + remaining = body.dig('rate', 'remaining').to_i + reset_in = body.dig('rate', 'reset').to_i + exceed = remaining <= SAFE_REMAINING_REQUESTS + + [exceed, reset_in] + end + end +end diff --git a/lib/github/repositories.rb b/lib/github/repositories.rb new file mode 100644 index 00000000000..c1c9448f305 --- /dev/null +++ b/lib/github/repositories.rb @@ -0,0 +1,19 @@ +module Github + class Repositories + attr_reader :options + + def initialize(options) + @options = options + end + + def fetch + Collection.new(options).fetch(repos_url) + end + + private + + def repos_url + '/user/repos' + end + end +end diff --git a/lib/github/representation/base.rb b/lib/github/representation/base.rb new file mode 100644 index 00000000000..f26bdbdd546 --- /dev/null +++ b/lib/github/representation/base.rb @@ -0,0 +1,30 @@ +module Github + module Representation + class Base + def initialize(raw, options = {}) + @raw = raw + @options = options + end + + def id + raw['id'] + end + + def url + raw['url'] + end + + def created_at + raw['created_at'] + end + + def updated_at + raw['updated_at'] + end + + private + + attr_reader :raw, :options + end + end +end diff --git a/lib/github/representation/branch.rb b/lib/github/representation/branch.rb new file mode 100644 index 00000000000..d1dac6944f0 --- /dev/null +++ b/lib/github/representation/branch.rb @@ -0,0 +1,51 @@ +module Github + module Representation + class Branch < Representation::Base + attr_reader :repository + + def user + raw.dig('user', 'login') || 'unknown' + end + + def repo + return @repo if defined?(@repo) + + @repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present? + end + + def ref + raw['ref'] + end + + def sha + raw['sha'] + end + + def short_sha + Commit.truncate_sha(sha) + end + + def exists? + branch_exists? && commit_exists? + end + + def valid? + sha.present? && ref.present? + end + + private + + def branch_exists? + repository.branch_exists?(ref) + end + + def commit_exists? + repository.branch_names_contains(sha).include?(ref) + end + + def repository + @repository ||= options.fetch(:repository) + end + end + end +end diff --git a/lib/github/representation/comment.rb b/lib/github/representation/comment.rb new file mode 100644 index 00000000000..1b5be91461b --- /dev/null +++ b/lib/github/representation/comment.rb @@ -0,0 +1,42 @@ +module Github + module Representation + class Comment < Representation::Base + def note + raw['body'] || '' + end + + def author + @author ||= Github::Representation::User.new(raw['user'], options) + end + + def commit_id + raw['commit_id'] + end + + def line_code + return unless on_diff? + + parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines) + generate_line_code(parsed_lines.to_a.last) + end + + private + + def generate_line_code(line) + Gitlab::Diff::LineCode.generate(file_path, line.new_pos, line.old_pos) + end + + def on_diff? + diff_hunk.present? + end + + def diff_hunk + raw['diff_hunk'] + end + + def file_path + raw['path'] + end + end + end +end diff --git a/lib/github/representation/issuable.rb b/lib/github/representation/issuable.rb new file mode 100644 index 00000000000..9713b82615d --- /dev/null +++ b/lib/github/representation/issuable.rb @@ -0,0 +1,37 @@ +module Github + module Representation + class Issuable < Representation::Base + def iid + raw['number'] + end + + def title + raw['title'] + end + + def description + raw['body'] || '' + end + + def milestone + return unless raw['milestone'].present? + + @milestone ||= Github::Representation::Milestone.new(raw['milestone']) + end + + def author + @author ||= Github::Representation::User.new(raw['user'], options) + end + + def assignee + return unless assigned? + + @assignee ||= Github::Representation::User.new(raw['assignee'], options) + end + + def assigned? + raw['assignee'].present? + end + end + end +end diff --git a/lib/github/representation/issue.rb b/lib/github/representation/issue.rb new file mode 100644 index 00000000000..df3540a6e6c --- /dev/null +++ b/lib/github/representation/issue.rb @@ -0,0 +1,25 @@ +module Github + module Representation + class Issue < Representation::Issuable + def labels + raw['labels'] + end + + def state + raw['state'] == 'closed' ? 'closed' : 'opened' + end + + def has_comments? + raw['comments'] > 0 + end + + def has_labels? + labels.count > 0 + end + + def pull_request? + raw['pull_request'].present? + end + end + end +end diff --git a/lib/github/representation/label.rb b/lib/github/representation/label.rb new file mode 100644 index 00000000000..60aa51f9569 --- /dev/null +++ b/lib/github/representation/label.rb @@ -0,0 +1,13 @@ +module Github + module Representation + class Label < Representation::Base + def color + "##{raw['color']}" + end + + def title + raw['name'] + end + end + end +end diff --git a/lib/github/representation/milestone.rb b/lib/github/representation/milestone.rb new file mode 100644 index 00000000000..917e6394ad4 --- /dev/null +++ b/lib/github/representation/milestone.rb @@ -0,0 +1,25 @@ +module Github + module Representation + class Milestone < Representation::Base + def iid + raw['number'] + end + + def title + raw['title'] + end + + def description + raw['description'] + end + + def due_date + raw['due_on'] + end + + def state + raw['state'] == 'closed' ? 'closed' : 'active' + end + end + end +end diff --git a/lib/github/representation/pull_request.rb b/lib/github/representation/pull_request.rb new file mode 100644 index 00000000000..ac9c8283b4b --- /dev/null +++ b/lib/github/representation/pull_request.rb @@ -0,0 +1,78 @@ +module Github + module Representation + class PullRequest < Representation::Issuable + attr_reader :project + + delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true + delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true + + def source_project + project + end + + def source_branch_exists? + !cross_project? && source_branch.exists? + end + + def source_branch_name + @source_branch_name ||= + if cross_project? || !source_branch_exists? + source_branch_name_prefixed + else + source_branch_ref + end + end + + def target_project + project + end + + def target_branch_name + @target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed + end + + def state + return 'merged' if raw['state'] == 'closed' && raw['merged_at'].present? + return 'closed' if raw['state'] == 'closed' + + 'opened' + end + + def opened? + state == 'opened' + end + + def valid? + source_branch.valid? && target_branch.valid? + end + + private + + def project + @project ||= options.fetch(:project) + end + + def source_branch + @source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository) + end + + def source_branch_name_prefixed + "gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}" + end + + def target_branch + @target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository) + end + + def target_branch_name_prefixed + "gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}" + end + + def cross_project? + return true if source_branch_repo.nil? + + source_branch_repo.id != target_branch_repo.id + end + end + end +end diff --git a/lib/github/representation/release.rb b/lib/github/representation/release.rb new file mode 100644 index 00000000000..e7e4b428c1a --- /dev/null +++ b/lib/github/representation/release.rb @@ -0,0 +1,17 @@ +module Github + module Representation + class Release < Representation::Base + def description + raw['body'] + end + + def tag + raw['tag_name'] + end + + def valid? + !raw['draft'] + end + end + end +end diff --git a/lib/github/representation/repo.rb b/lib/github/representation/repo.rb new file mode 100644 index 00000000000..6938aa7db05 --- /dev/null +++ b/lib/github/representation/repo.rb @@ -0,0 +1,6 @@ +module Github + module Representation + class Repo < Representation::Base + end + end +end diff --git a/lib/github/representation/user.rb b/lib/github/representation/user.rb new file mode 100644 index 00000000000..18591380e25 --- /dev/null +++ b/lib/github/representation/user.rb @@ -0,0 +1,15 @@ +module Github + module Representation + class User < Representation::Base + def email + return @email if defined?(@email) + + @email = Github::User.new(username, options).get.fetch('email', nil) + end + + def username + raw['login'] + end + end + end +end diff --git a/lib/github/response.rb b/lib/github/response.rb new file mode 100644 index 00000000000..761c524b553 --- /dev/null +++ b/lib/github/response.rb @@ -0,0 +1,25 @@ +module Github + class Response + attr_reader :raw, :headers, :status + + def initialize(response) + @raw = response + @headers = response.headers + @status = response.status + end + + def body + Oj.load(raw.body, class_cache: false, mode: :compat) + end + + def rels + links = headers['Link'].to_s.split(', ').map do |link| + href, name = link.match(/<(.*?)>; rel="(\w+)"/).captures + + [name.to_sym, href] + end + + Hash[*links.flatten] + end + end +end diff --git a/lib/github/user.rb b/lib/github/user.rb new file mode 100644 index 00000000000..f88a29e590b --- /dev/null +++ b/lib/github/user.rb @@ -0,0 +1,24 @@ +module Github + class User + attr_reader :username, :options + + def initialize(username, options) + @username = username + @options = options + end + + def get + client.get(user_url).body + end + + private + + def client + @client ||= Github::Client.new(options) + end + + def user_url + "/users/#{username}" + end + end +end |