summaryrefslogtreecommitdiff
path: root/app/workers/bulk_imports/pipeline_worker.rb
blob: 1a98705c151c425e30c59dd8b09cf6376816e939 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# frozen_string_literal: true

module BulkImports
  class PipelineWorker # rubocop:disable Scalability/IdempotentWorker
    include ApplicationWorker

    NDJSON_PIPELINE_PERFORM_DELAY = 10.seconds

    data_consistency :always
    feature_category :importers
    sidekiq_options retry: false, dead: false
    worker_has_external_dependencies!

    def perform(pipeline_tracker_id, stage, entity_id)
      pipeline_tracker = ::BulkImports::Tracker
        .with_status(:enqueued)
        .find_by_id(pipeline_tracker_id)

      if pipeline_tracker.present?
        logger.info(
          structured_payload(
            entity_id: pipeline_tracker.entity.id,
            pipeline_name: pipeline_tracker.pipeline_name
          )
        )

        run(pipeline_tracker)
      else
        logger.error(
          structured_payload(
            entity_id: entity_id,
            pipeline_tracker_id: pipeline_tracker_id,
            message: 'Unstarted pipeline not found'
          )
        )
      end

    ensure
      ::BulkImports::EntityWorker.perform_async(entity_id, stage)
    end

    private

    def run(pipeline_tracker)
      if pipeline_tracker.entity.failed?
        raise(Entity::FailedError, 'Failed entity status')
      end

      if ndjson_pipeline?(pipeline_tracker)
        status = ExportStatus.new(pipeline_tracker, pipeline_tracker.pipeline_class.relation)

        raise(Pipeline::ExpiredError, 'Pipeline timeout') if job_timeout?(pipeline_tracker)
        raise(Pipeline::FailedError, status.error) if status.failed?

        return reenqueue(pipeline_tracker) if status.started?
      end

      pipeline_tracker.update!(status_event: 'start', jid: jid)

      context = ::BulkImports::Pipeline::Context.new(pipeline_tracker)

      pipeline_tracker.pipeline_class.new(context).run

      pipeline_tracker.finish!
    rescue BulkImports::NetworkError => e
      if e.retriable?(pipeline_tracker)
        logger.error(
          structured_payload(
            entity_id: pipeline_tracker.entity.id,
            pipeline_name: pipeline_tracker.pipeline_name,
            message: "Retrying error: #{e.message}"
          )
        )

        pipeline_tracker.update!(status_event: 'retry', jid: jid)

        reenqueue(pipeline_tracker, delay: e.retry_delay)
      else
        fail_tracker(pipeline_tracker, e)
      end
    rescue StandardError => e
      fail_tracker(pipeline_tracker, e)
    end

    def fail_tracker(pipeline_tracker, exception)
      pipeline_tracker.update!(status_event: 'fail_op', jid: jid)

      logger.error(
        structured_payload(
          entity_id: pipeline_tracker.entity.id,
          pipeline_name: pipeline_tracker.pipeline_name,
          message: exception.message
        )
      )

      Gitlab::ErrorTracking.track_exception(
        exception,
        entity_id: pipeline_tracker.entity.id,
        pipeline_name: pipeline_tracker.pipeline_name
      )
    end

    def logger
      @logger ||= Gitlab::Import::Logger.build
    end

    def ndjson_pipeline?(pipeline_tracker)
      pipeline_tracker.pipeline_class.ndjson_pipeline?
    end

    def job_timeout?(pipeline_tracker)
      (Time.zone.now - pipeline_tracker.entity.created_at) > Pipeline::NDJSON_EXPORT_TIMEOUT
    end

    def reenqueue(pipeline_tracker, delay: NDJSON_PIPELINE_PERFORM_DELAY)
      self.class.perform_in(
        delay,
        pipeline_tracker.id,
        pipeline_tracker.stage,
        pipeline_tracker.entity.id
      )
    end
  end
end