summaryrefslogtreecommitdiff
path: root/app/workers/gitlab/github_import/stage/import_repository_worker.rb
blob: 8c1a2cd2677f8b68b5953a7775ce0c4ce6f2ea7a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# frozen_string_literal: true

module Gitlab
  module GithubImport
    module Stage
      class ImportRepositoryWorker # rubocop:disable Scalability/IdempotentWorker
        include ApplicationWorker

        data_consistency :always

        sidekiq_options retry: 3
        include GithubImport::Queue
        include StageMethods

        # technical debt: https://gitlab.com/gitlab-org/gitlab/issues/33991
        sidekiq_options memory_killer_memory_growth_kb: ENV.fetch('MEMORY_KILLER_IMPORT_REPOSITORY_WORKER_MEMORY_GROWTH_KB', 50).to_i
        sidekiq_options memory_killer_max_memory_growth_kb: ENV.fetch('MEMORY_KILLER_IMPORT_REPOSITORY_WORKER_MAX_MEMORY_GROWTH_KB', 300_000).to_i

        # client - An instance of Gitlab::GithubImport::Client.
        # project - An instance of Project.
        def import(client, project)
          # In extreme cases it's possible for a clone to take more than the
          # import job expiration time. To work around this we schedule a
          # separate job that will periodically run and refresh the import
          # expiration time.
          RefreshImportJidWorker.perform_in_the_future(project.id, jid)

          info(project.id, message: "starting importer", importer: 'Importer::RepositoryImporter')

          # If a user creates an issue while the import is in progress, this can lead to an import failure.
          # The workaround is to allocate IIDs before starting the importer.
          allocate_issues_internal_id!(project, client)

          importer = Importer::RepositoryImporter.new(project, client)

          importer.execute

          counter.increment

          ImportBaseDataWorker.perform_async(project.id)

        rescue StandardError => e
          Gitlab::Import::ImportFailureService.track(
            project_id: project.id,
            error_source: self.class.name,
            exception: e,
            fail_import: abort_on_failure,
            metrics: true
          )

          raise(e)
        end

        def counter
          Gitlab::Metrics.counter(
            :github_importer_imported_repositories,
            'The number of imported GitHub repositories'
          )
        end

        def abort_on_failure
          true
        end

        private

        def allocate_issues_internal_id!(project, client)
          return if InternalId.exists?(project: project, usage: :issues) # rubocop: disable CodeReuse/ActiveRecord

          options = { state: 'all', sort: 'number', direction: 'desc', per_page: '1' }
          last_github_issue = client.each_object(:issues, project.import_source, options).first

          return unless last_github_issue

          Issue.track_project_iid!(project, last_github_issue[:number])
        end
      end
    end
  end
end