diff options
Diffstat (limited to 'lib/gitlab/phabricator_import')
-rw-r--r-- | lib/gitlab/phabricator_import/base_worker.rb | 80 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/cache/map.rb | 65 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/conduit.rb | 9 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/conduit/client.rb | 41 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/conduit/maniphest.rb | 28 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/conduit/pagination.rb | 24 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/conduit/response.rb | 60 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/conduit/tasks_response.rb | 24 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/import_tasks_worker.rb | 10 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/importer.rb | 44 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/issues/importer.rb | 42 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/issues/task_importer.rb | 54 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/project_creator.rb | 78 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/representation/task.rb | 60 | ||||
-rw-r--r-- | lib/gitlab/phabricator_import/worker_state.rb | 47 |
15 files changed, 666 insertions, 0 deletions
diff --git a/lib/gitlab/phabricator_import/base_worker.rb b/lib/gitlab/phabricator_import/base_worker.rb new file mode 100644 index 00000000000..b69c65e78f8 --- /dev/null +++ b/lib/gitlab/phabricator_import/base_worker.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +# All workers within a Phabricator import should inherit from this worker and +# implement the `#import` method. The jobs should then be scheduled using the +# `.schedule` class method instead of `.perform_async` +# +# Doing this makes sure that only one job of that type is running at the same time +# for a certain project. This will avoid deadlocks. When a job is already running +# we'll wait for it for 10 times 5 seconds to restart. If the running job hasn't +# finished, by then, we'll retry in 30 seconds. +# +# It also makes sure that we keep the import state of the project up to date: +# - It keeps track of the jobs so we know how many jobs are running for the +# project +# - It refreshes the import jid, so it doesn't get cleaned up by the +# `StuckImportJobsWorker` +# - It marks the import as failed if a job failed to many times +# - It marks the import as finished when all remaining jobs are done +module Gitlab + module PhabricatorImport + class BaseWorker + include ApplicationWorker + include ProjectImportOptions # This marks the project as failed after too many tries + include Gitlab::ExclusiveLeaseHelpers + + class << self + def schedule(project_id, *args) + perform_async(project_id, *args) + add_job(project_id) + end + + def add_job(project_id) + worker_state(project_id).add_job + end + + def remove_job(project_id) + worker_state(project_id).remove_job + end + + def worker_state(project_id) + Gitlab::PhabricatorImport::WorkerState.new(project_id) + end + end + + def perform(project_id, *args) + in_lock("#{self.class.name.underscore}/#{project_id}/#{args}", ttl: 2.hours, sleep_sec: 5.seconds) do + project = Project.find_by_id(project_id) + next unless project + + # Bail if the import job already failed + next unless project.import_state&.in_progress? + + project.import_state.refresh_jid_expiration + + import(project, *args) + + # If this is the last running job, finish the import + project.after_import if self.class.worker_state(project_id).running_count < 2 + + self.class.remove_job(project_id) + end + rescue Gitlab::ExclusiveLeaseHelpers::FailedToObtainLockError + # Reschedule a job if there was already a running one + # Running them at the same time could cause a deadlock updating the same + # resource + self.class.perform_in(30.seconds, project_id, *args) + end + + private + + def import(project, *args) + importer_class.new(project, *args).execute + end + + def importer_class + raise NotImplementedError, "Implement `#{__method__}` on #{self.class}" + end + end + end +end diff --git a/lib/gitlab/phabricator_import/cache/map.rb b/lib/gitlab/phabricator_import/cache/map.rb new file mode 100644 index 00000000000..fa8b37b20ca --- /dev/null +++ b/lib/gitlab/phabricator_import/cache/map.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Cache + class Map + def initialize(project) + @project = project + end + + def get_gitlab_model(phabricator_id) + cached_info = get(phabricator_id) + return unless cached_info[:classname] && cached_info[:database_id] + + cached_info[:classname].constantize.find_by_id(cached_info[:database_id]) + end + + def set_gitlab_model(object, phabricator_id) + set(object.class, object.id, phabricator_id) + end + + private + + attr_reader :project + + def set(klass_name, object_id, phabricator_id) + key = cache_key_for_phabricator_id(phabricator_id) + + redis.with do |r| + r.multi do |multi| + multi.mapped_hmset(key, + { classname: klass_name, database_id: object_id }) + multi.expire(key, timeout) + end + end + end + + def get(phabricator_id) + key = cache_key_for_phabricator_id(phabricator_id) + + redis.with do |r| + r.pipelined do |pipe| + # Extend the TTL when a key was + pipe.expire(key, timeout) + pipe.mapped_hmget(key, :classname, :database_id) + end.last + end + end + + def cache_key_for_phabricator_id(phabricator_id) + "#{Redis::Cache::CACHE_NAMESPACE}/phabricator-import/#{project.id}/#{phabricator_id}" + end + + def redis + Gitlab::Redis::Cache + end + + def timeout + # Setting the timeout to the same one as we do for clearing stuck jobs + # this makes sure all cache is available while the import is running. + StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION + end + end + end + end +end diff --git a/lib/gitlab/phabricator_import/conduit.rb b/lib/gitlab/phabricator_import/conduit.rb new file mode 100644 index 00000000000..4c64d737389 --- /dev/null +++ b/lib/gitlab/phabricator_import/conduit.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Conduit + ApiError = Class.new(Gitlab::PhabricatorImport::BaseError) + ResponseError = Class.new(ApiError) + end + end +end diff --git a/lib/gitlab/phabricator_import/conduit/client.rb b/lib/gitlab/phabricator_import/conduit/client.rb new file mode 100644 index 00000000000..4469a3f5849 --- /dev/null +++ b/lib/gitlab/phabricator_import/conduit/client.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Conduit + class Client + def initialize(phabricator_url, api_token) + @phabricator_url = phabricator_url + @api_token = api_token + end + + def get(path, params: {}) + response = Gitlab::HTTP.get(build_url(path), body: build_params(params), headers: headers) + Response.parse!(response) + rescue *Gitlab::HTTP::HTTP_ERRORS => e + # Wrap all errors from the API into an API-error. + raise ApiError.new(e) + end + + private + + attr_reader :phabricator_url, :api_token + + def headers + { "Accept" => 'application/json' } + end + + def build_url(path) + URI.join(phabricator_url, '/api/', path).to_s + end + + def build_params(params) + params = params.dup + params.compact! + params.reverse_merge!("api.token" => api_token) + + CGI.unescape(params.to_query) + end + end + end + end +end diff --git a/lib/gitlab/phabricator_import/conduit/maniphest.rb b/lib/gitlab/phabricator_import/conduit/maniphest.rb new file mode 100644 index 00000000000..848b71e49e7 --- /dev/null +++ b/lib/gitlab/phabricator_import/conduit/maniphest.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Conduit + class Maniphest + def initialize(phabricator_url:, api_token:) + @client = Client.new(phabricator_url, api_token) + end + + def tasks(after: nil) + TasksResponse.new(get_tasks(after)) + end + + private + + def get_tasks(after) + client.get('maniphest.search', + params: { + after: after, + attachments: { projects: 1, subscribers: 1, columns: 1 } + }) + end + + attr_reader :client + end + end + end +end diff --git a/lib/gitlab/phabricator_import/conduit/pagination.rb b/lib/gitlab/phabricator_import/conduit/pagination.rb new file mode 100644 index 00000000000..5f54cccdbc8 --- /dev/null +++ b/lib/gitlab/phabricator_import/conduit/pagination.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Conduit + class Pagination + def initialize(cursor_json) + @cursor_json = cursor_json + end + + def has_next_page? + next_page.present? + end + + def next_page + cursor_json["after"] + end + + private + + attr_reader :cursor_json + end + end + end +end diff --git a/lib/gitlab/phabricator_import/conduit/response.rb b/lib/gitlab/phabricator_import/conduit/response.rb new file mode 100644 index 00000000000..6053ecfbd5e --- /dev/null +++ b/lib/gitlab/phabricator_import/conduit/response.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Conduit + class Response + def self.parse!(http_response) + unless http_response.success? + raise Gitlab::PhabricatorImport::Conduit::ResponseError, + "Phabricator responded with #{http_response.status}" + end + + response = new(JSON.parse(http_response.body)) + + unless response.success? + raise ResponseError, + "Phabricator Error: #{response.error_code}: #{response.error_info}" + end + + response + rescue JSON::JSONError => e + raise ResponseError.new(e) + end + + def initialize(json) + @json = json + end + + def success? + error_code.nil? + end + + def error_code + json['error_code'] + end + + def error_info + json['error_info'] + end + + def data + json_result&.fetch('data') + end + + def pagination + return unless cursor_info = json_result&.fetch('cursor') + + @pagination ||= Pagination.new(cursor_info) + end + + private + + attr_reader :json + + def json_result + json['result'] + end + end + end + end +end diff --git a/lib/gitlab/phabricator_import/conduit/tasks_response.rb b/lib/gitlab/phabricator_import/conduit/tasks_response.rb new file mode 100644 index 00000000000..cbcf7259fb2 --- /dev/null +++ b/lib/gitlab/phabricator_import/conduit/tasks_response.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Conduit + class TasksResponse + def initialize(conduit_response) + @conduit_response = conduit_response + end + + delegate :pagination, to: :conduit_response + + def tasks + @tasks ||= conduit_response.data.map do |task_json| + Gitlab::PhabricatorImport::Representation::Task.new(task_json) + end + end + + private + + attr_reader :conduit_response + end + end + end +end diff --git a/lib/gitlab/phabricator_import/import_tasks_worker.rb b/lib/gitlab/phabricator_import/import_tasks_worker.rb new file mode 100644 index 00000000000..c36954a8d41 --- /dev/null +++ b/lib/gitlab/phabricator_import/import_tasks_worker.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + class ImportTasksWorker < BaseWorker + def importer_class + Gitlab::PhabricatorImport::Issues::Importer + end + end + end +end diff --git a/lib/gitlab/phabricator_import/importer.rb b/lib/gitlab/phabricator_import/importer.rb new file mode 100644 index 00000000000..c1797f4027e --- /dev/null +++ b/lib/gitlab/phabricator_import/importer.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Gitlab + module PhabricatorImport + class Importer + def self.async? + true + end + + def self.imports_repository? + # This does not really import a repository, but we want to skip all + # repository related tasks in the `Projects::ImportService` + true + end + + def initialize(project) + @project = project + end + + def execute + Gitlab::Import::SetAsyncJid.set_jid(project) + schedule_first_tasks_page + + true + rescue => e + fail_import(e.message) + + false + end + + private + + attr_reader :project + + def schedule_first_tasks_page + ImportTasksWorker.schedule(project.id) + end + + def fail_import(message) + project.import_state.mark_as_failed(message) + end + end + end +end diff --git a/lib/gitlab/phabricator_import/issues/importer.rb b/lib/gitlab/phabricator_import/issues/importer.rb new file mode 100644 index 00000000000..a58438452ff --- /dev/null +++ b/lib/gitlab/phabricator_import/issues/importer.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Issues + class Importer + def initialize(project, after = nil) + @project, @after = project, after + end + + def execute + schedule_next_batch + + tasks_response.tasks.each do |task| + TaskImporter.new(project, task).execute + end + end + + private + + attr_reader :project, :after + + def schedule_next_batch + return unless tasks_response.pagination.has_next_page? + + Gitlab::PhabricatorImport::ImportTasksWorker + .schedule(project.id, tasks_response.pagination.next_page) + end + + def tasks_response + @tasks_response ||= client.tasks(after: after) + end + + def client + @client ||= + Gitlab::PhabricatorImport::Conduit::Maniphest + .new(phabricator_url: project.import_data.data['phabricator_url'], + api_token: project.import_data.credentials[:api_token]) + end + end + end + end +end diff --git a/lib/gitlab/phabricator_import/issues/task_importer.rb b/lib/gitlab/phabricator_import/issues/task_importer.rb new file mode 100644 index 00000000000..40d4392cbc1 --- /dev/null +++ b/lib/gitlab/phabricator_import/issues/task_importer.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Issues + class TaskImporter + def initialize(project, task) + @project, @task = project, task + end + + def execute + # TODO: get the user from the project namespace from the username loaded by Phab-id + # https://gitlab.com/gitlab-org/gitlab-ce/issues/60565 + issue.author = User.ghost + + # TODO: Reformat the description with attachments, escaping accidental + # links and add attachments + # https://gitlab.com/gitlab-org/gitlab-ce/issues/60603 + issue.assign_attributes(task.issue_attributes) + + save! + + issue + end + + private + + attr_reader :project, :task + + def save! + # Just avoiding an extra redis call, we've already updated the expiry + # when reading the id from the map + was_persisted = issue.persisted? + + issue.save! if issue.changed? + + object_map.set_gitlab_model(issue, task.phabricator_id) unless was_persisted + end + + def issue + @issue ||= find_issue_by_phabricator_id(task.phabricator_id) || + project.issues.new + end + + def find_issue_by_phabricator_id(phabricator_id) + object_map.get_gitlab_model(phabricator_id) + end + + def object_map + Gitlab::PhabricatorImport::Cache::Map.new(project) + end + end + end + end +end diff --git a/lib/gitlab/phabricator_import/project_creator.rb b/lib/gitlab/phabricator_import/project_creator.rb new file mode 100644 index 00000000000..b37a5b44980 --- /dev/null +++ b/lib/gitlab/phabricator_import/project_creator.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module Gitlab + module PhabricatorImport + class ProjectCreator + def initialize(current_user, params) + @current_user = current_user + @params = params.dup + end + + def execute + return unless import_url.present? && api_token.present? + + project = Projects::CreateService.new(current_user, create_params).execute + return project unless project.persisted? + + project.project_feature.update!(project_feature_attributes) + + project + end + + private + + attr_reader :current_user, :params + + def create_params + { + name: project_name, + path: project_path, + namespace_id: namespace_id, + import_type: 'phabricator', + import_url: Project::UNKNOWN_IMPORT_URL, + import_data: import_data + } + end + + def project_name + params[:name] + end + + def project_path + params[:path] + end + + def namespace_id + params[:namespace_id] || current_user.namespace_id + end + + def import_url + params[:phabricator_server_url] + end + + def api_token + params[:api_token] + end + + def project_feature_attributes + @project_features_attributes ||= begin + # everything disabled except for issues + ProjectFeature::FEATURES.map do |feature| + [ProjectFeature.access_level_attribute(feature), ProjectFeature::DISABLED] + end.to_h.merge(ProjectFeature.access_level_attribute(:issues) => ProjectFeature::ENABLED) + end + end + + def import_data + { + data: { + phabricator_url: import_url + }, + credentials: { + api_token: params.fetch(:api_token) + } + } + end + end + end +end diff --git a/lib/gitlab/phabricator_import/representation/task.rb b/lib/gitlab/phabricator_import/representation/task.rb new file mode 100644 index 00000000000..6aedc71b626 --- /dev/null +++ b/lib/gitlab/phabricator_import/representation/task.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + module Representation + class Task + def initialize(json) + @json = json + end + + def phabricator_id + json['phid'] + end + + def issue_attributes + @issue_attributes ||= { + title: issue_title, + description: issue_description, + state: issue_state, + created_at: issue_created_at, + closed_at: issue_closed_at + } + end + + private + + attr_reader :json + + def issue_title + # The 255 limit is the validation we impose on the Issue title in + # Issuable + @issue_title ||= json['fields']['name'].truncate(255) + end + + def issue_description + json['fields']['description']['raw'] + end + + def issue_state + issue_closed_at.present? ? :closed : :opened + end + + def issue_created_at + return unless json['fields']['dateCreated'] + + @issue_created_at ||= cast_datetime(json['fields']['dateCreated']) + end + + def issue_closed_at + return unless json['fields']['dateClosed'] + + @issue_closed_at ||= cast_datetime(json['fields']['dateClosed']) + end + + def cast_datetime(value) + Time.at(value.to_i) + end + end + end + end +end diff --git a/lib/gitlab/phabricator_import/worker_state.rb b/lib/gitlab/phabricator_import/worker_state.rb new file mode 100644 index 00000000000..38829e34509 --- /dev/null +++ b/lib/gitlab/phabricator_import/worker_state.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true +module Gitlab + module PhabricatorImport + class WorkerState + def initialize(project_id) + @project_id = project_id + end + + def add_job + redis.with do |r| + r.pipelined do |pipe| + pipe.incr(all_jobs_key) + pipe.expire(all_jobs_key, timeout) + end + end + end + + def remove_job + redis.with do |r| + r.decr(all_jobs_key) + end + end + + def running_count + redis.with { |r| r.get(all_jobs_key) }.to_i + end + + private + + attr_reader :project_id + + def redis + Gitlab::Redis::SharedState + end + + def all_jobs_key + @all_jobs_key ||= "phabricator-import/jobs/project-#{project_id}/job-count" + end + + def timeout + # Make sure we get rid of all the information after a job is marked + # as failed/succeeded + StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION + end + end + end +end |