summaryrefslogtreecommitdiff
path: root/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb')
-rw-r--r--lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb94
1 files changed, 91 insertions, 3 deletions
diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
index 827027203ff..809a518d13a 100644
--- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb
@@ -6,6 +6,13 @@ module Gitlab
class PullRequestsReviewsImporter
include ParallelScheduling
+ def initialize(...)
+ super
+
+ @merge_requests_already_imported_cache_key =
+ "github-importer/merge_request/already-imported/#{project.id}"
+ end
+
def importer_class
PullRequestReviewImporter
end
@@ -22,11 +29,31 @@ module Gitlab
:pull_request_reviews
end
- def id_for_already_imported_cache(merge_request)
- merge_request.id
+ def id_for_already_imported_cache(review)
+ review.id
+ end
+
+ def each_object_to_import(&block)
+ if use_github_review_importer_query_only_unimported_merge_requests?
+ each_merge_request_to_import(&block)
+ else
+ each_merge_request_skipping_imported(&block)
+ end
end
- def each_object_to_import
+ private
+
+ attr_reader :merge_requests_already_imported_cache_key
+
+ # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036#note_587181108
+ def use_github_review_importer_query_only_unimported_merge_requests?
+ Feature.enabled?(
+ :github_review_importer_query_only_unimported_merge_requests,
+ default_enabled: :yaml
+ )
+ end
+
+ def each_merge_request_skipping_imported
project.merge_requests.find_each do |merge_request|
next if already_imported?(merge_request)
@@ -40,6 +67,67 @@ module Gitlab
mark_as_imported(merge_request)
end
end
+
+ # The worker can be interrupted, by rate limit for instance,
+ # in different situations. To avoid requesting already imported data,
+ # if the worker is interrupted:
+ # - before importing all reviews of a merge request
+ # The reviews page is cached with the `PageCounter`, by merge request.
+ # - before importing all merge requests reviews
+ # Merge requests that had all the reviews imported are cached with
+ # `mark_merge_request_reviews_imported`
+ def each_merge_request_to_import
+ each_review_page do |page, merge_request|
+ page.objects.each do |review|
+ next if already_imported?(review)
+
+ review.merge_request_id = merge_request.id
+ yield(review)
+
+ mark_as_imported(review)
+ end
+ end
+ end
+
+ def each_review_page
+ merge_requests_to_import.find_each do |merge_request|
+ # The page counter needs to be scoped by merge request to avoid skipping
+ # pages of reviews from already imported merge requests.
+ page_counter = PageCounter.new(project, page_counter_id(merge_request))
+ repo = project.import_source
+ options = collection_options.merge(page: page_counter.current)
+
+ client.each_page(collection_method, repo, merge_request.iid, options) do |page|
+ next unless page_counter.set(page.number)
+
+ yield(page, merge_request)
+ end
+
+ # Avoid unnecessary Redis cache keys after the work is done.
+ page_counter.expire!
+ mark_merge_request_reviews_imported(merge_request)
+ end
+ end
+
+ # Returns only the merge requests that still have reviews to be imported.
+ def merge_requests_to_import
+ project.merge_requests.where.not(id: already_imported_merge_requests) # rubocop: disable CodeReuse/ActiveRecord
+ end
+
+ def already_imported_merge_requests
+ Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key)
+ end
+
+ def page_counter_id(merge_request)
+ "merge_request/#{merge_request.id}/#{collection_method}"
+ end
+
+ def mark_merge_request_reviews_imported(merge_request)
+ Gitlab::Cache::Import::Caching.set_add(
+ merge_requests_already_imported_cache_key,
+ merge_request.id
+ )
+ end
end
end
end