summaryrefslogtreecommitdiff
path: root/qa/qa/specs/features/api/1_manage/import/import_large_github_repo_spec.rb
diff options
context:
space:
mode:
Diffstat (limited to 'qa/qa/specs/features/api/1_manage/import/import_large_github_repo_spec.rb')
-rw-r--r--qa/qa/specs/features/api/1_manage/import/import_large_github_repo_spec.rb620
1 files changed, 620 insertions, 0 deletions
diff --git a/qa/qa/specs/features/api/1_manage/import/import_large_github_repo_spec.rb b/qa/qa/specs/features/api/1_manage/import/import_large_github_repo_spec.rb
new file mode 100644
index 00000000000..5acf15dd2b4
--- /dev/null
+++ b/qa/qa/specs/features/api/1_manage/import/import_large_github_repo_spec.rb
@@ -0,0 +1,620 @@
+# frozen_string_literal: true
+
+require "etc"
+
+# Lifesize project import test executed from https://gitlab.com/gitlab-org/manage/import/import-metrics
+
+# rubocop:disable Rails/Pluck
+module QA
+ RSpec.describe 'Manage', :github, requires_admin: 'creates users', only: { job: 'large-github-import' } do
+ describe 'Project import', product_group: :import do # rubocop:disable RSpec/MultipleMemoizedHelpers
+ let(:github_repo) { ENV['QA_LARGE_IMPORT_REPO'] || 'rspec/rspec-core' }
+ let(:import_max_duration) { ENV['QA_LARGE_IMPORT_DURATION']&.to_i || 7200 }
+ let(:logger) { Runtime::Logger.logger }
+ let(:differ) { RSpec::Support::Differ.new(color: true) }
+ let(:gitlab_address) { QA::Runtime::Scenario.gitlab_address.chomp("/") }
+ let(:dummy_url) { "https://example.com" }
+ let(:api_request_params) { { auto_paginate: true, attempts: 2 } }
+
+ let(:created_by_pattern) { /\*Created by: \S+\*\n\n/ }
+ let(:suggestion_pattern) { /suggestion:-\d+\+\d+/ }
+ let(:gh_link_pattern) { %r{https://github.com/#{github_repo}/(issues|pull)} }
+ let(:gl_link_pattern) { %r{#{gitlab_address}/#{imported_project.path_with_namespace}/-/(issues|merge_requests)} }
+ # rubocop:disable Lint/MixedRegexpCaptureTypes
+ let(:event_pattern) do
+ Regexp.union(
+ [
+ /(?<event>(un)?assigned)( to)? @\S+/,
+ /(?<event>mentioned) in (issue|merge request) [!#]\d+/,
+ /(?<event>changed title) from \*\*.*\*\* to \*\*.*\*\*/,
+ /(?<event>requested review) from @\w+/,
+ /\*(?<event>Merged) by:/,
+ /\*\*(Review):\*\*/
+ ]
+ )
+ end
+ # rubocop:enable Lint/MixedRegexpCaptureTypes
+
+ # mapping from gitlab to github names
+ let(:event_mapping) do
+ {
+ "label_add" => "labeled",
+ "label_remove" => "unlabeled",
+ "milestone_add" => "milestoned",
+ "milestone_remove" => "demilestoned",
+ "assigned" => "assigned",
+ "unassigned" => "unassigned",
+ "changed title" => "renamed",
+ "requested review" => "review_requested",
+ "Merged" => "merged"
+ }
+ end
+
+ # github events that are not migrated or are not correctly mapable in gitlab
+ let(:unsupported_events) do
+ [
+ "head_ref_deleted",
+ "head_ref_force_pushed",
+ "head_ref_restored",
+ "base_ref_force_pushed",
+ "base_ref_changed",
+ "review_request_removed",
+ "review_dismissed",
+ "auto_squash_enabled",
+ "auto_merge_disabled",
+ "comment_deleted",
+ "convert_to_draft",
+ "ready_for_review",
+ "subscribed",
+ "unsubscribed",
+ "transferred",
+ "locked",
+ "unlocked",
+ # mentions are supported but they can be reported differently on gitlab's side
+ # for example mention of issue creation in pr will be reported in the issue on gitlab side
+ # or referenced in github will still create a 'mentioned in' comment in gitlab
+ "referenced",
+ "mentioned"
+ ]
+ end
+
+ let(:api_client) { Runtime::API::Client.as_admin }
+
+ let(:user) do
+ Resource::User.fabricate_via_api! do |resource|
+ resource.api_client = api_client
+ end
+ end
+
+ let(:github_client) do
+ Octokit::Client.new(
+ access_token: ENV['QA_LARGE_IMPORT_GH_TOKEN'] || Runtime::Env.github_access_token,
+ auto_paginate: true,
+ middleware: Faraday::RackBuilder.new do |builder|
+ builder.use(Faraday::Retry::Middleware, exceptions: [Octokit::InternalServerError, Octokit::ServerError])
+ end
+ )
+ end
+
+ let(:gh_repo) { github_client.repository(github_repo) }
+
+ let(:gh_branches) do
+ logger.info("= Fetching branches =")
+ github_client.branches(github_repo).map(&:name)
+ end
+
+ let(:gh_commits) do
+ logger.info("= Fetching commits =")
+ github_client.commits(github_repo).map(&:sha)
+ end
+
+ let(:gh_labels) do
+ logger.info("= Fetching labels =")
+ github_client.labels(github_repo).map { |label| { name: label.name, color: "##{label.color}" } }
+ end
+
+ let(:gh_milestones) do
+ logger.info("= Fetching milestones =")
+ github_client
+ .list_milestones(github_repo, state: 'all')
+ .map { |ms| { title: ms.title, description: ms.description } }
+ end
+
+ let(:gh_prs) do
+ gh_all_issues.select(&:pull_request).each_with_object({}) do |pr, hash|
+ id = pr.number
+ hash[id] = {
+ url: pr.html_url,
+ title: pr.title,
+ body: pr.body || '',
+ comments: [*gh_pr_comments[id], *gh_issue_comments[id]].compact,
+ events: gh_pr_events[id].reject { |event| unsupported_events.include?(event) }
+ }
+ end
+ end
+
+ let(:gh_issues) do
+ gh_all_issues.reject(&:pull_request).each_with_object({}) do |issue, hash|
+ id = issue.number
+ hash[id] = {
+ url: issue.html_url,
+ title: issue.title,
+ body: issue.body || '',
+ comments: gh_issue_comments[id],
+ events: gh_issue_events[id].reject { |event| unsupported_events.include?(event) }
+ }
+ end
+ end
+
+ let(:gh_all_issues) do
+ logger.info("= Fetching issues and prs =")
+ github_client.list_issues(github_repo, state: 'all')
+ end
+
+ let(:gh_all_events) do
+ logger.info("- Fetching issue and pr events -")
+ github_client.repository_issue_events(github_repo).map do |event|
+ { name: event[:event], **(event[:issue] || {}) } # some events don't have issue object at all
+ end
+ end
+
+ let(:gh_issue_events) do
+ gh_all_events.each_with_object(Hash.new { |h, k| h[k] = [] }) do |event, hash|
+ next if event[:pull_request] || !event[:number]
+
+ hash[event[:number]] << event[:name]
+ end
+ end
+
+ let(:gh_pr_events) do
+ gh_all_events.each_with_object(Hash.new { |h, k| h[k] = [] }) do |event, hash|
+ next unless event[:pull_request]
+
+ hash[event[:number]] << event[:name]
+ end
+ end
+
+ let(:gh_issue_comments) do
+ logger.info("- Fetching issue comments -")
+ github_client.issues_comments(github_repo).each_with_object(Hash.new { |h, k| h[k] = [] }) do |c, hash|
+ hash[id_from_url(c.html_url)] << c.body&.gsub(gh_link_pattern, dummy_url)
+ end
+ end
+
+ let(:gh_pr_comments) do
+ logger.info("- Fetching pr comments -")
+ github_client.pull_requests_comments(github_repo).each_with_object(Hash.new { |h, k| h[k] = [] }) do |c, hash|
+ hash[id_from_url(c.html_url)] << c.body
+ # some suggestions can contain extra whitespaces which gitlab will remove
+ &.gsub(/suggestion\s+\r/, "suggestion\r")
+ &.gsub(gh_link_pattern, dummy_url)
+ end
+ end
+
+ let(:imported_project) do
+ Resource::ProjectImportedFromGithub.fabricate_via_api! do |project|
+ project.add_name_uuid = false
+ project.name = 'imported-project'
+ project.github_personal_access_token = Runtime::Env.github_access_token
+ project.github_repository_path = github_repo
+ project.personal_namespace = user.username
+ project.api_client = Runtime::API::Client.new(user: user)
+ project.issue_events_import = true
+ project.full_notes_import = true
+ end
+ end
+
+ after do |example|
+ next unless defined?(@import_time)
+
+ # save data for comparison notification creation
+ save_json(
+ "data",
+ {
+ importer: :github,
+ import_time: @import_time,
+ errors: imported_project.project_import_status[:failed_relations],
+ reported_stats: @stats,
+ source: {
+ name: "GitHub",
+ project_name: github_repo,
+ address: "https://github.com",
+ data: {
+ branches: gh_branches.length,
+ commits: gh_commits.length,
+ labels: gh_labels.length,
+ milestones: gh_milestones.length,
+ mrs: gh_prs.length,
+ mr_comments: gh_prs.sum { |_k, v| v[:comments].length },
+ mr_events: gh_prs.sum { |_k, v| v[:events].length },
+ issues: gh_issues.length,
+ issue_comments: gh_issues.sum { |_k, v| v[:comments].length },
+ issue_events: gh_issues.sum { |_k, v| v[:events].length }
+ }
+ },
+ target: {
+ name: "GitLab",
+ project_name: imported_project.path_with_namespace,
+ address: gitlab_address,
+ data: {
+ branches: gl_branches.length,
+ commits: gl_commits.length,
+ labels: gl_labels.length,
+ milestones: gl_milestones.length,
+ mrs: mrs.length,
+ mr_comments: mrs.sum { |_k, v| v[:comments].length },
+ mr_events: mrs.sum { |_k, v| v[:events].length },
+ issues: gl_issues.length,
+ issue_comments: gl_issues.sum { |_k, v| v[:comments].length },
+ issue_events: gl_issues.sum { |_k, v| v[:events].length }
+ }
+ },
+ not_imported: {
+ mrs: @mr_diff,
+ issues: @issue_diff
+ }
+ }
+ )
+ end
+
+ it(
+ 'imports large Github repo via api',
+ testcase: 'https://gitlab.com/gitlab-org/gitlab/-/quality/test_cases/347668'
+ ) do
+ start = Time.now
+
+ # trigger import and log project paths
+ logger.info("== Triggering import of project '#{github_repo}' in to '#{imported_project.reload!.full_path}' ==")
+
+ # fetch all objects right after import has started
+ fetch_github_objects
+
+ import_status = lambda do
+ imported_project.project_import_status.yield_self do |status|
+ @stats = status.dig(:stats, :imported)
+
+ # fail fast if import explicitly failed
+ raise "Import of '#{imported_project.full_path}' failed!" if status[:import_status] == 'failed'
+
+ status[:import_status]
+ end
+ end
+
+ logger.info("== Waiting for import to be finished ==")
+ expect(import_status).to eventually_eq('finished').within(max_duration: import_max_duration, sleep_interval: 30)
+
+ @import_time = Time.now - start
+
+ aggregate_failures do
+ verify_repository_import
+ verify_labels_import
+ verify_milestones_import
+ verify_merge_requests_import
+ verify_issues_import
+ end
+ end
+
+ # Persist all objects from repository being imported
+ #
+ # @return [void]
+ def fetch_github_objects
+ logger.info("== Fetching github repo objects ==")
+
+ gh_repo
+ gh_branches
+ gh_commits
+ gh_labels
+ gh_milestones
+ gh_prs
+ gh_issues
+ end
+
+ # Verify repository imported correctly
+ #
+ # @return [void]
+ def verify_repository_import
+ logger.info("== Verifying repository import ==")
+ expect(imported_project.description).to eq(gh_repo.description)
+ # check via include, importer creates more branches
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/332711
+ expect(gl_branches).to include(*gh_branches)
+ expect(gl_commits).to match_array(gh_commits)
+ end
+
+ # Verify imported labels
+ #
+ # @return [void]
+ def verify_labels_import
+ logger.info("== Verifying label import ==")
+ # check via include, additional labels can be inherited from parent group
+ expect(gl_labels).to include(*gh_labels)
+ end
+
+ # Verify milestones import
+ #
+ # @return [void]
+ def verify_milestones_import
+ logger.info("== Verifying milestones import ==")
+ expect(gl_milestones).to match_array(gh_milestones)
+ end
+
+ # Verify imported merge requests and mr issues
+ #
+ # @return [void]
+ def verify_merge_requests_import
+ logger.info("== Verifying merge request import ==")
+ @mr_diff = verify_mrs_or_issues('mr')
+ end
+
+ # Verify imported issues and issue comments
+ #
+ # @return [void]
+ def verify_issues_import
+ logger.info("== Verifying issue import ==")
+ @issue_diff = verify_mrs_or_issues('issue')
+ end
+
+ private
+
+ # Verify imported mrs or issues and return missing items
+ #
+ # @param [String] type verification object, 'mrs' or 'issues'
+ # @return [Hash]
+ def verify_mrs_or_issues(type)
+ # Compare length to have easy to read overview how many objects are missing
+ #
+ expected = type == 'mr' ? mrs : gl_issues
+ actual = type == 'mr' ? gh_prs : gh_issues
+ count_msg = "Expected to contain same amount of #{type}s. Gitlab: #{expected.length}, Github: #{actual.length}"
+ expect(expected.length).to eq(actual.length), count_msg
+
+ missing_objects = (actual.keys - expected.keys).map { |it| actual[it].slice(:title, :url) }
+ missing_content = verify_comments_and_events(type, actual, expected)
+
+ {
+ "#{type}s": missing_objects.empty? ? nil : missing_objects,
+ "#{type}_content": missing_content.empty? ? nil : missing_content
+ }.compact
+ end
+
+ # Verify imported comments and events
+ #
+ # @param [String] type verification object, 'mrs' or 'issues'
+ # @param [Hash] actual
+ # @param [Hash] expected
+ # @return [Hash]
+ def verify_comments_and_events(type, actual, expected)
+ actual.each_with_object([]) do |(key, actual_item), missing_content|
+ expected_item = expected[key]
+ title = actual_item[:title]
+ msg = "expected #{type} with iid '#{key}' to have"
+
+ # Print title in the error message to see which object is missing
+ #
+ expect(expected_item).to be_truthy, "#{msg} been imported"
+ next unless expected_item
+
+ # Print difference in the description
+ #
+ expected_body = expected_item[:body]
+ actual_body = actual_item[:body]
+ body_msg = <<~MSG
+ #{msg} same description. diff:\n#{differ.diff(expected_body, actual_body)}
+ MSG
+ expect(expected_body).to eq(actual_body), body_msg
+
+ # Print amount difference first
+ #
+ expected_comments = expected_item[:comments]
+ actual_comments = actual_item[:comments]
+ comment_count_msg = <<~MSG
+ #{msg} same amount of comments. Gitlab: #{expected_comments.length}, Github: #{actual_comments.length}
+ MSG
+ expect(expected_comments.length).to eq(actual_comments.length), comment_count_msg
+ expect(expected_comments).to match_array(actual_comments)
+
+ expected_events = expected_item[:events]
+ actual_events = actual_item[:events]
+ event_count_msg = <<~MSG
+ #{msg} same amount of events. Gitlab: #{expected_events.length}, Github: #{actual_events.length}
+ MSG
+ expect(expected_events.length).to eq(actual_events.length), event_count_msg
+ expect(expected_events).to match_array(actual_events)
+
+ # Save missing comments and events
+ #
+ comment_diff = actual_comments - expected_comments
+ event_diff = actual_events - expected_events
+ next if comment_diff.empty? && event_diff.empty?
+
+ missing_content << {
+ title: title,
+ github_url: actual_item[:url],
+ gitlab_url: expected_item[:url],
+ missing_comments: comment_diff.empty? ? nil : comment_diff,
+ missing_events: event_diff.empty? ? nil : event_diff
+ }.compact
+ end
+ end
+
+ # Imported project branches
+ #
+ # @return [Array]
+ def gl_branches
+ @gl_branches ||= begin
+ logger.debug("= Fetching branches =")
+ imported_project.repository_branches(auto_paginate: true).map { |b| b[:name] }
+ end
+ end
+
+ # Imported project commits
+ #
+ # @return [Array]
+ def gl_commits
+ @gl_commits ||= begin
+ logger.debug("= Fetching commits =")
+ imported_project.commits(auto_paginate: true, attempts: 2).map { |c| c[:id] }
+ end
+ end
+
+ # Imported project labels
+ #
+ # @return [Array]
+ def gl_labels
+ @gl_labels ||= begin
+ logger.debug("= Fetching labels =")
+ imported_project.labels(auto_paginate: true).map { |label| label.slice(:name, :color) }
+ end
+ end
+
+ # Imported project milestones
+ #
+ # @return [<Type>] <description>
+ def gl_milestones
+ @gl_milestones ||= begin
+ logger.debug("= Fetching milestones =")
+ imported_project.milestones(auto_paginate: true).map { |ms| ms.slice(:title, :description) }
+ end
+ end
+
+ # Imported project merge requests
+ #
+ # @return [Hash]
+ def mrs
+ @mrs ||= begin
+ logger.debug("= Fetching merge requests =")
+ imported_mrs = imported_project.merge_requests(**api_request_params)
+
+ logger.debug("= Fetching merge request comments =")
+ Parallel.map(imported_mrs, in_threads: Etc.nprocessors) do |mr|
+ resource = Resource::MergeRequest.init do |resource|
+ resource.project = imported_project
+ resource.iid = mr[:iid]
+ resource.api_client = api_client
+ end
+
+ logger.debug("Fetching events and comments for mr '!#{mr[:iid]}'")
+ comments = resource.comments(**api_request_params)
+ label_events = resource.label_events(**api_request_params)
+ state_events = resource.state_events(**api_request_params)
+ milestone_events = resource.milestone_events(**api_request_params)
+
+ [mr[:iid], {
+ url: mr[:web_url],
+ title: mr[:title],
+ body: sanitize_description(mr[:description]) || '',
+ events: events(comments, label_events, state_events, milestone_events),
+ comments: non_event_comments(comments)
+ }]
+ end.to_h
+ end
+ end
+
+ # Imported project issues
+ #
+ # @return [Hash]
+ def gl_issues
+ @gl_issues ||= begin
+ logger.debug("= Fetching issues =")
+ imported_issues = imported_project.issues(**api_request_params)
+
+ logger.debug("= Fetching issue comments =")
+ Parallel.map(imported_issues, in_threads: Etc.nprocessors) do |issue|
+ resource = Resource::Issue.init do |issue_resource|
+ issue_resource.project = imported_project
+ issue_resource.iid = issue[:iid]
+ issue_resource.api_client = api_client
+ end
+
+ logger.debug("Fetching events and comments for issue '!#{issue[:iid]}'")
+ comments = resource.comments(**api_request_params)
+ label_events = resource.label_events(**api_request_params)
+ state_events = resource.state_events(**api_request_params)
+ milestone_events = resource.milestone_events(**api_request_params)
+
+ [issue[:iid], {
+ url: issue[:web_url],
+ title: issue[:title],
+ body: sanitize_description(issue[:description]) || '',
+ events: events(comments, label_events, state_events, milestone_events),
+ comments: non_event_comments(comments)
+ }]
+ end.to_h
+ end
+ end
+
+ # Filter out event comments
+ #
+ # @param [Array] comments
+ # @return [Array]
+ def non_event_comments(comments)
+ comments
+ .reject { |c| c[:system] || c[:body].match?(event_pattern) }
+ .map { |c| sanitize_comment(c[:body]) }
+ end
+
+ # Events
+ #
+ # @param [Array] comments
+ # @param [Array] label_events
+ # @param [Array] state_events
+ # @param [Array] milestone_events
+ # @return [Array]
+ def events(comments, label_events, state_events, milestone_events)
+ mapped_label_events = label_events.map { |event| event_mapping["label_#{event[:action]}"] }
+ mapped_milestone_events = milestone_events.map { |event| event_mapping["milestone_#{event[:action]}"] }
+ mapped_state_event = state_events.map { |event| event[:state] }
+ mapped_comment_events = comments.map do |c|
+ event_mapping[c[:body].match(event_pattern)&.named_captures&.fetch("event", nil)]
+ end
+
+ [*mapped_label_events, *mapped_milestone_events, *mapped_state_event, *mapped_comment_events].compact
+ end
+
+ # Normalize comments and make them directly comparable
+ #
+ # * remove created by prefixes
+ # * unify suggestion format
+ # * replace github and gitlab urls - some of the links to objects get transformed to gitlab entities, some don't,
+ # update all links to example.com for now
+ #
+ # @param [String] body
+ # @return [String]
+ def sanitize_comment(body)
+ body
+ .gsub(created_by_pattern, "")
+ .gsub(suggestion_pattern, "suggestion\r")
+ .gsub(gl_link_pattern, dummy_url)
+ .gsub(gh_link_pattern, dummy_url)
+ end
+
+ # Remove created by prefix from descripion
+ #
+ # @param [String] body
+ # @return [String]
+ def sanitize_description(body)
+ body&.gsub(created_by_pattern, "")
+ end
+
+ # Save json as file
+ #
+ # @param [String] name
+ # @param [Hash] json
+ # @return [void]
+ def save_json(name, json)
+ File.open("tmp/#{name}.json", "w") { |file| file.write(JSON.pretty_generate(json)) }
+ end
+
+ # Extract id number from web url of issue or pull request
+ #
+ # Some endpoints don't return object id as separate parameter so web url can be used as a workaround
+ #
+ # @param [String] url
+ # @return [Integer]
+ def id_from_url(url)
+ url.match(%r{(?<type>issues|pull)/(?<id>\d+)})&.named_captures&.fetch("id", nil).to_i
+ end
+ end
+ end
+end
+# rubocop:enable Rails/Pluck