summaryrefslogtreecommitdiff
path: root/qa/qa/specs/features/api/1_manage/import_large_github_repo_spec.rb
blob: 385908f21764bc9cd27dca3de80b2f86cbfa4591 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# frozen_string_literal: true

require 'octokit'

# rubocop:disable Rails/Pluck
module QA
  # Only executes in custom job/pipeline
  RSpec.describe 'Manage', :github, :requires_admin, only: { job: 'large-github-import' } do
    describe 'Project import' do
      let(:logger) { Runtime::Logger.logger }
      let(:differ) { RSpec::Support::Differ.new(color: true) }

      let(:api_client) { Runtime::API::Client.as_admin }
      let(:group) do
        Resource::Group.fabricate_via_api! do |resource|
          resource.api_client = api_client
        end
      end

      let(:user) do
        Resource::User.fabricate_via_api! do |resource|
          resource.api_client = api_client
        end
      end

      let(:github_repo) { ENV['QA_LARGE_GH_IMPORT_REPO'] || 'rspec/rspec-core' }
      let(:import_max_duration) { ENV['QA_LARGE_GH_IMPORT_DURATION'] ? ENV['QA_LARGE_GH_IMPORT_DURATION'].to_i : 7200 }
      let(:github_client) do
        Octokit.middleware = Faraday::RackBuilder.new do |builder|
          builder.response(:logger, logger, headers: false, bodies: false)
        end

        Octokit::Client.new(
          access_token: ENV['QA_LARGE_GH_IMPORT_GH_TOKEN'] || Runtime::Env.github_access_token,
          auto_paginate: true
        )
      end

      let(:gh_branches) { github_client.branches(github_repo).map(&:name) }
      let(:gh_commits) { github_client.commits(github_repo).map(&:sha) }
      let(:gh_repo) { github_client.repository(github_repo) }

      let(:gh_labels) do
        github_client.labels(github_repo).map { |label| { name: label.name, color: "##{label.color}" } }
      end

      let(:gh_milestones) do
        github_client
          .list_milestones(github_repo, state: 'all')
          .map { |ms| { title: ms.title, description: ms.description } }
      end

      let(:gh_all_issues) do
        github_client.list_issues(github_repo, state: 'all')
      end

      let(:gh_prs) do
        gh_all_issues.select(&:pull_request).each_with_object({}) do |pr, hash|
          hash[pr.title] = {
            body: pr.body || '',
            comments: [*gh_pr_comments[pr.html_url], *gh_issue_comments[pr.html_url]].compact.sort
          }
        end
      end

      let(:gh_issues) do
        gh_all_issues.reject(&:pull_request).each_with_object({}) do |issue, hash|
          hash[issue.title] = {
            body: issue.body || '',
            comments: gh_issue_comments[issue.html_url]
          }
        end
      end

      let(:gh_issue_comments) do
        github_client.issues_comments(github_repo).each_with_object(Hash.new { |h, k| h[k] = [] }) do |c, hash|
          hash[c.html_url.gsub(/\#\S+/, "")] << c.body # use base html url as key
        end
      end

      let(:gh_pr_comments) do
        github_client.pull_requests_comments(github_repo).each_with_object(Hash.new { |h, k| h[k] = [] }) do |c, hash|
          hash[c.html_url.gsub(/\#\S+/, "")] << c.body # use base html url as key
        end
      end

      let(:imported_project) do
        Resource::ProjectImportedFromGithub.fabricate_via_api! do |project|
          project.add_name_uuid = false
          project.name = 'imported-project'
          project.group = group
          project.github_personal_access_token = Runtime::Env.github_access_token
          project.github_repository_path = github_repo
          project.api_client = api_client
        end
      end

      before do
        group.add_member(user, Resource::Members::AccessLevel::MAINTAINER)
      end

      after do |example|
        user.remove_via_api!
        next unless defined?(@import_time)

        # save data for comparison after run finished
        save_json(
          "data",
          {
            import_time: @import_time,
            github: {
              project_name: github_repo,
              branches: gh_branches,
              commits: gh_commits,
              labels: gh_labels,
              milestones: gh_milestones,
              prs: gh_prs,
              issues: gh_issues
            },
            gitlab: {
              project_name: imported_project.path_with_namespace,
              branches: gl_branches,
              commits: gl_commits,
              labels: gl_labels,
              milestones: gl_milestones,
              mrs: mrs,
              issues: gl_issues
            }
          }.to_json
        )
      end

      it 'imports large Github repo via api' do
        start = Time.now

        imported_project # import the project
        fetch_github_objects # fetch all objects right after import has started

        import_status = lambda do
          imported_project.reload!.import_status.tap do |status|
            raise "Import of '#{imported_project.name}' failed!" if status == 'failed'
          end
        end
        expect(import_status).to eventually_eq('finished').within(max_duration: import_max_duration, sleep_interval: 30)
        @import_time = Time.now - start

        aggregate_failures do
          verify_repository_import
          verify_labels_import
          verify_milestones_import
          verify_merge_requests_import
          verify_issues_import
        end
      end

      # Persist all objects from repository being imported
      #
      # @return [void]
      def fetch_github_objects
        logger.debug("== Fetching objects for github repo: '#{github_repo}' ==")

        gh_repo
        gh_branches
        gh_commits
        gh_prs
        gh_issues
        gh_labels
        gh_milestones
      end

      # Verify repository imported correctly
      #
      # @return [void]
      def verify_repository_import
        logger.debug("== Verifying repository import ==")
        expect(imported_project.description).to eq(gh_repo.description)
        # check via include, importer creates more branches
        # https://gitlab.com/gitlab-org/gitlab/-/issues/332711
        expect(gl_branches).to include(*gh_branches)
        expect(gl_commits).to match_array(gh_commits)
      end

      # Verify imported merge requests and mr issues
      #
      # @return [void]
      def verify_merge_requests_import
        logger.debug("== Verifying merge request import ==")
        verify_mrs_or_issues('mr')
      end

      # Verify imported issues and issue comments
      #
      # @return [void]
      def verify_issues_import
        logger.debug("== Verifying issue import ==")
        verify_mrs_or_issues('issue')
      end

      # Verify imported labels
      #
      # @return [void]
      def verify_labels_import
        logger.debug("== Verifying label import ==")
        # check via include, additional labels can be inherited from parent group
        expect(gl_labels).to include(*gh_labels)
      end

      # Verify milestones import
      #
      # @return [void]
      def verify_milestones_import
        logger.debug("== Verifying milestones import ==")
        expect(gl_milestones).to match_array(gh_milestones)
      end

      private

      # Verify imported mrs or issues
      #
      # @param [String] type verification object, 'mrs' or 'issues'
      # @return [void]
      def verify_mrs_or_issues(type)
        msg = ->(title) { "expected #{type} with title '#{title}' to have" }
        expected = type == 'mr' ? mrs : gl_issues
        actual = type == 'mr' ? gh_prs : gh_issues

        # Compare length to have easy to read overview how many objects are missing
        expect(expected.length).to(
          eq(actual.length),
          "Expected to contain same amount of #{type}s. Expected: #{expected.length}, actual: #{actual.length}"
        )
        logger.debug("= Comparing #{type}s =")
        actual.each do |title, actual_item|
          print "." # indicate that it is still going but don't spam the output with newlines

          expected_item = expected[title]

          expect(expected_item).to be_truthy, "#{msg.call(title)} been imported"
          next unless expected_item

          expect(expected_item[:body]).to(
            include(actual_item[:body]),
            "#{msg.call(title)} same description. diff:\n#{differ.diff(expected_item[:body], actual_item[:body])}"
          )
          expect(expected_item[:comments].length).to(
            eq(actual_item[:comments].length),
            "#{msg.call(title)} same amount of comments"
          )
          expect(expected_item[:comments]).to match_array(actual_item[:comments])
        end
        puts # print newline after last print to make output pretty
      end

      # Imported project branches
      #
      # @return [Array]
      def gl_branches
        @gl_branches ||= begin
          logger.debug("= Fetching branches =")
          imported_project.repository_branches(auto_paginate: true).map { |b| b[:name] }
        end
      end

      # Imported project commits
      #
      # @return [Array]
      def gl_commits
        @gl_commits ||= begin
          logger.debug("= Fetching commits =")
          imported_project.commits(auto_paginate: true, attempts: 2).map { |c| c[:id] }
        end
      end

      # Imported project labels
      #
      # @return [Array]
      def gl_labels
        @gl_labels ||= begin
          logger.debug("= Fetching labels =")
          imported_project.labels(auto_paginate: true).map { |label| label.slice(:name, :color) }
        end
      end

      # Imported project milestones
      #
      # @return [<Type>] <description>
      def gl_milestones
        @gl_milestones ||= begin
          logger.debug("= Fetching milestones =")
          imported_project.milestones(auto_paginate: true).map { |ms| ms.slice(:title, :description) }
        end
      end

      # Imported project merge requests
      #
      # @return [Hash]
      def mrs
        @mrs ||= begin
          logger.debug("= Fetching merge requests =")
          imported_mrs = imported_project.merge_requests(auto_paginate: true, attempts: 2)
          logger.debug("= Transforming merge request objects for comparison =")
          imported_mrs.each_with_object({}) do |mr, hash|
            resource = Resource::MergeRequest.init do |resource|
              resource.project = imported_project
              resource.iid = mr[:iid]
              resource.api_client = api_client
            end

            hash[mr[:title]] = {
              body: mr[:description],
              comments: resource.comments(auto_paginate: true, attempts: 2)
                # remove system notes
                .reject { |c| c[:system] || c[:body].match?(/^(\*\*Review:\*\*)|(\*Merged by:).*/) }
                .map { |c| sanitize(c[:body]) }
            }
          end
        end
      end

      # Imported project issues
      #
      # @return [Hash]
      def gl_issues
        @gl_issues ||= begin
          logger.debug("= Fetching issues =")
          imported_issues = imported_project.issues(auto_paginate: true, attempts: 2)
          logger.debug("= Transforming issue objects for comparison =")
          imported_issues.each_with_object({}) do |issue, hash|
            resource = Resource::Issue.init do |issue_resource|
              issue_resource.project = imported_project
              issue_resource.iid = issue[:iid]
              issue_resource.api_client = api_client
            end

            hash[issue[:title]] = {
              body: issue[:description],
              comments: resource.comments(auto_paginate: true, attempts: 2).map { |c| sanitize(c[:body]) }
            }
          end
        end
      end

      # Remove added prefixes by importer
      #
      # @param [String] body
      # @return [String]
      def sanitize(body)
        body.gsub(/\*Created by: \S+\*\n\n/, "")
      end

      # Save json as file
      #
      # @param [String] name
      # @param [String] json
      # @return [void]
      def save_json(name, json)
        File.open("tmp/#{name}.json", "w") { |file| file.write(json) }
      end
    end
  end
end
# rubocop:enable Rails/Pluck