diff options
Diffstat (limited to 'lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb')
-rw-r--r-- | lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb | 94 |
1 files changed, 91 insertions, 3 deletions
diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb index 827027203ff..809a518d13a 100644 --- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb @@ -6,6 +6,13 @@ module Gitlab class PullRequestsReviewsImporter include ParallelScheduling + def initialize(...) + super + + @merge_requests_already_imported_cache_key = + "github-importer/merge_request/already-imported/#{project.id}" + end + def importer_class PullRequestReviewImporter end @@ -22,11 +29,31 @@ module Gitlab :pull_request_reviews end - def id_for_already_imported_cache(merge_request) - merge_request.id + def id_for_already_imported_cache(review) + review.id + end + + def each_object_to_import(&block) + if use_github_review_importer_query_only_unimported_merge_requests? + each_merge_request_to_import(&block) + else + each_merge_request_skipping_imported(&block) + end end - def each_object_to_import + private + + attr_reader :merge_requests_already_imported_cache_key + + # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036#note_587181108 + def use_github_review_importer_query_only_unimported_merge_requests? + Feature.enabled?( + :github_review_importer_query_only_unimported_merge_requests, + default_enabled: :yaml + ) + end + + def each_merge_request_skipping_imported project.merge_requests.find_each do |merge_request| next if already_imported?(merge_request) @@ -40,6 +67,67 @@ module Gitlab mark_as_imported(merge_request) end end + + # The worker can be interrupted, by rate limit for instance, + # in different situations. To avoid requesting already imported data, + # if the worker is interrupted: + # - before importing all reviews of a merge request + # The reviews page is cached with the `PageCounter`, by merge request. + # - before importing all merge requests reviews + # Merge requests that had all the reviews imported are cached with + # `mark_merge_request_reviews_imported` + def each_merge_request_to_import + each_review_page do |page, merge_request| + page.objects.each do |review| + next if already_imported?(review) + + review.merge_request_id = merge_request.id + yield(review) + + mark_as_imported(review) + end + end + end + + def each_review_page + merge_requests_to_import.find_each do |merge_request| + # The page counter needs to be scoped by merge request to avoid skipping + # pages of reviews from already imported merge requests. + page_counter = PageCounter.new(project, page_counter_id(merge_request)) + repo = project.import_source + options = collection_options.merge(page: page_counter.current) + + client.each_page(collection_method, repo, merge_request.iid, options) do |page| + next unless page_counter.set(page.number) + + yield(page, merge_request) + end + + # Avoid unnecessary Redis cache keys after the work is done. + page_counter.expire! + mark_merge_request_reviews_imported(merge_request) + end + end + + # Returns only the merge requests that still have reviews to be imported. + def merge_requests_to_import + project.merge_requests.where.not(id: already_imported_merge_requests) # rubocop: disable CodeReuse/ActiveRecord + end + + def already_imported_merge_requests + Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key) + end + + def page_counter_id(merge_request) + "merge_request/#{merge_request.id}/#{collection_method}" + end + + def mark_merge_request_reviews_imported(merge_request) + Gitlab::Cache::Import::Caching.set_add( + merge_requests_already_imported_cache_key, + merge_request.id + ) + end end end end |