summaryrefslogtreecommitdiff
path: root/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
blob: 543c29a21a05ac28fbca8abe7e1b5de3f806af48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# frozen_string_literal: true

module Gitlab
  module GithubImport
    module Importer
      class PullRequestsReviewsImporter
        include ParallelScheduling

        def initialize(...)
          super

          @merge_requests_already_imported_cache_key =
            "github-importer/merge_request/already-imported/#{project.id}"
        end

        def importer_class
          PullRequestReviewImporter
        end

        def representation_class
          Gitlab::GithubImport::Representation::PullRequestReview
        end

        def sidekiq_worker_class
          ImportPullRequestReviewWorker
        end

        def collection_method
          :pull_request_reviews
        end

        def object_type
          :pull_request_review
        end

        def id_for_already_imported_cache(review)
          review[:id]
        end

        # The worker can be interrupted, by rate limit for instance,
        # in different situations. To avoid requesting already imported data,
        # if the worker is interrupted:
        # - before importing all reviews of a merge request
        #   The reviews page is cached with the `PageCounter`, by merge request.
        # - before importing all merge requests reviews
        #   Merge requests that had all the reviews imported are cached with
        #   `mark_merge_request_reviews_imported`
        def each_object_to_import(&block)
          each_review_page do |page, merge_request|
            page.objects.each do |review|
              review = review.to_h

              next if already_imported?(review)

              Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)

              review[:merge_request_id] = merge_request.id
              yield(review)

              mark_as_imported(review)
            end
          end
        end

        private

        attr_reader :merge_requests_already_imported_cache_key

        def each_review_page
          merge_requests_to_import.find_each do |merge_request|
            # The page counter needs to be scoped by merge request to avoid skipping
            # pages of reviews from already imported merge requests.
            page_counter = PageCounter.new(project, page_counter_id(merge_request))
            repo = project.import_source
            options = collection_options.merge(page: page_counter.current)

            client.each_page(collection_method, repo, merge_request.iid, options) do |page|
              next unless page_counter.set(page.number)

              yield(page, merge_request)
            end

            # Avoid unnecessary Redis cache keys after the work is done.
            page_counter.expire!
            mark_merge_request_reviews_imported(merge_request)
          end
        end

        # Returns only the merge requests that still have reviews to be imported.
        def merge_requests_to_import
          project.merge_requests.id_not_in(already_imported_merge_requests)
        end

        def already_imported_merge_requests
          Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key)
        end

        def page_counter_id(merge_request)
          "merge_request/#{merge_request.id}/#{collection_method}"
        end

        def mark_merge_request_reviews_imported(merge_request)
          Gitlab::Cache::Import::Caching.set_add(
            merge_requests_already_imported_cache_key,
            merge_request.id
          )
        end
      end
    end
  end
end