summaryrefslogtreecommitdiff
path: root/lib/gitlab/import_export/base/relation_object_saver.rb
blob: ed3858d0bf4003a5320813c23daaec1da02c0627 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# frozen_string_literal: true

# RelationObjectSaver allows for an alternative approach to persisting
# objects during Project/Group Import which persists object's
# nested collection subrelations separately, in batches.
#
# Instead of the regular `relation_object.save!` that opens one db
# transaction for the object itself and all of its subrelations we
# separate collection subrelations from the object and save them
# in batches in smaller more frequent db transactions.
module Gitlab
  module ImportExport
    module Base
      class RelationObjectSaver
        include Gitlab::Utils::StrongMemoize

        BATCH_SIZE = 100
        MIN_RECORDS_SIZE = 1

        # @param relation_object [Object] Object of a project/group, e.g. an issue
        # @param relation_key [String] Name of the object association to group/project, e.g. :issues
        # @param relation_definition [Hash] Object subrelations as defined in import_export.yml
        # @param importable [Project|Group] Project or group where relation object is getting saved to
        #
        # @example
        #   Gitlab::ImportExport::Base::RelationObjectSaver.new(
        #     relation_key: 'merge_requests',
        #     relation_object: #<MergeRequest id: root/mrs!1, notes: [#<Note id: nil, note: 'test', ...>, #<Note id: nil, noteL 'another note'>]>,
        #     relation_definition: {"metrics"=>{}, "award_emoji"=>{}, "notes"=>{"author"=>{}, ... }}
        #     importable: @importable
        #   ).execute
        def initialize(relation_object:, relation_key:, relation_definition:, importable:)
          @relation_object = relation_object
          @relation_key = relation_key
          @relation_definition = relation_definition
          @importable = importable
          @invalid_subrelations = []
        end

        def execute
          move_subrelations

          relation_object.save!

          save_subrelations
        ensure
          log_invalid_subrelations
        end

        private

        attr_reader :relation_object, :relation_key, :relation_definition,
                    :importable, :collection_subrelations, :invalid_subrelations

        # rubocop:disable GitlabSecurity/PublicSend
        def save_subrelations
          collection_subrelations.each_pair do |relation_name, records|
            records.each_slice(BATCH_SIZE) do |batch|
              valid_records, invalid_records = batch.partition { |record| record.valid? }

              relation_object.public_send(relation_name) << valid_records

              # Attempt to save some of the invalid subrelations, as they might be valid after all.
              # For example, a merge request `Approval` validates presence of merge_request_id.
              # It is not present at a time of calling `#valid?` above, since it's indeed missing.
              # However, when saving such subrelation against already persisted merge request
              # such validation won't fail (e.g. `merge_request.approvals << Approval.new(user_id: 1)`),
              # as we're operating on a merge request that has `id` present.
              invalid_records.each do |invalid_record|
                relation_object.public_send(relation_name) << invalid_record

                invalid_subrelations << invalid_record unless invalid_record.persisted?
              end
            end
          end
        end

        def move_subrelations
          strong_memoize(:collection_subrelations) do
            relation_definition.each_key.each_with_object({}) do |definition, collection_subrelations|
              subrelation = relation_object.public_send(definition)
              association = relation_object.class.reflect_on_association(definition)

              next unless association&.collection? && subrelation.size > MIN_RECORDS_SIZE

              collection_subrelations[definition] = subrelation.records

              subrelation.clear
            end
          end
        end
        # rubocop:enable GitlabSecurity/PublicSend

        def log_invalid_subrelations
          invalid_subrelations.flatten.each do |record|
            Gitlab::Import::Logger.info(
              message: '[Project/Group Import] Invalid subrelation',
              importable_column_name => importable.id,
              relation_key: relation_key,
              error_messages: record.errors.full_messages.to_sentence
            )

            ImportFailure.create(
              source: 'RelationObjectSaver#save!',
              relation_key: relation_key,
              exception_class: 'RecordInvalid',
              exception_message: record.errors.full_messages.to_sentence,
              correlation_id_value: Labkit::Correlation::CorrelationId.current_or_new_id,
              importable_column_name => importable.id
            )
          end
        end

        def importable_column_name
          @column_name ||= importable.class.reflect_on_association(:import_failures).foreign_key.to_sym
        end
      end
    end
  end
end