summaryrefslogtreecommitdiff
path: root/lib/bulk_imports/projects/pipelines/references_pipeline.rb
blob: 8f44f3ffe6a54e8b7e58158454d9e1525c147981 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# frozen_string_literal: true

module BulkImports
  module Projects
    module Pipelines
      class ReferencesPipeline
        include Pipeline

        BATCH_SIZE = 100

        def extract(_context)
          data = Enumerator.new do |enum|
            add_matching_objects(portable.issues, enum)
            add_matching_objects(portable.merge_requests, enum)
            add_notes(portable.issues, enum)
            add_notes(portable.merge_requests, enum)
          end

          BulkImports::Pipeline::ExtractedData.new(data: data)
        end

        def transform(_context, object)
          body = object_body(object).dup

          matching_urls(object).each do |old_url, new_url|
            body.gsub!(old_url, new_url)
          end

          object.assign_attributes(body_field(object) => body)

          object
        end

        def load(_context, object)
          object.save! if object_body_changed?(object)
        end

        private

        def add_matching_objects(collection, enum)
          collection.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
            batch.each do |object|
              enum << object if object_has_reference?(object)
            end
          end
        end

        def add_notes(collection, enum)
          collection.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
            batch.each do |object|
              object.notes.each_batch(of: BATCH_SIZE) do |notes_batch|
                notes_batch.each do |note|
                  note.refresh_markdown_cache!
                  enum << note if object_has_reference?(note)
                end
              end
            end
          end
        end

        def object_has_reference?(object)
          object_body(object).include?(source_full_path)
        end

        def object_body(object)
          call_object_method(object)
        end

        def object_body_changed?(object)
          call_object_method(object, suffix: '_changed?')
        end

        def call_object_method(object, suffix: nil)
          method = body_field(object)
          method = "#{method}#{suffix}" if suffix.present?

          object.public_send(method) # rubocop:disable GitlabSecurity/PublicSend
        end

        def body_field(object)
          object.is_a?(Note) ? 'note' : 'description'
        end

        def matching_urls(object)
          URI.extract(object_body(object), %w[http https]).each_with_object([]) do |url, array|
            parsed_url = URI.parse(url)

            next unless source_host == parsed_url.host
            next unless parsed_url.path&.start_with?("/#{source_full_path}")

            array << [url, new_url(parsed_url)]
          end
        end

        def new_url(parsed_old_url)
          parsed_old_url.host = ::Gitlab.config.gitlab.host
          parsed_old_url.port = ::Gitlab.config.gitlab.port
          parsed_old_url.scheme = ::Gitlab.config.gitlab.https ? 'https' : 'http'
          parsed_old_url.to_s.gsub!(source_full_path, portable.full_path)
        end

        def source_host
          @source_host ||= URI.parse(context.configuration.url).host
        end

        def source_full_path
          context.entity.source_full_path
        end
      end
    end
  end
end