summaryrefslogtreecommitdiff
path: root/app/models/merge_request_diff.rb
blob: ec87aee9310780329cb018d8d0e807c24ec2e79f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
class MergeRequestDiff < ActiveRecord::Base
  include Sortable
  include Importable
  include Gitlab::EncodingHelper

  # Prevent store of diff if commits amount more then 500
  COMMITS_SAFE_SIZE = 100

  # Valid types of serialized diffs allowed by Gitlab::Git::Diff
  VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze

  belongs_to :merge_request
  has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
  has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }

  serialize :st_commits # rubocop:disable Cop/ActiveRecordSerialize
  serialize :st_diffs # rubocop:disable Cop/ActiveRecordSerialize

  state_machine :state, initial: :empty do
    state :collected
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  scope :viewable, -> { without_state(:empty) }

  # All diff information is collected from repository after object is created.
  # It allows you to override variables like head_commit_sha before getting diff.
  after_create :save_git_content, unless: :importing?

  def self.find_by_diff_refs(diff_refs)
    find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
  end

  def self.select_without_diff
    select(column_names - ['st_diffs'])
  end

  def st_commits
    super || []
  end

  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
    ensure_commit_shas
    save_commits
    save_diffs
    keep_around_commits
  end

  def ensure_commit_shas
    merge_request.fetch_ref
    self.start_commit_sha ||= merge_request.target_branch_sha
    self.head_commit_sha  ||= merge_request.source_branch_sha
    self.base_commit_sha  ||= find_base_sha
    save
  end

  # Override head_commit_sha to keep compatibility with merge request diff
  # created before version 8.4 that does not store head_commit_sha in separate db field.
  def head_commit_sha
    if persisted? && super.nil?
      last_commit_sha
    else
      super
    end
  end

  # This method will rely on repository branch sha
  # in case start_commit_sha is nil. Its necesarry for old merge request diff
  # created before version 8.4 to work
  def safe_start_commit_sha
    start_commit_sha || merge_request.target_branch_sha
  end

  def size
    real_size.presence || raw_diffs.size
  end

  def raw_diffs(options = {})
    if options[:ignore_whitespace_change]
      @diffs_no_whitespace ||=
        Gitlab::Git::Compare.new(
          repository.raw_repository,
          safe_start_commit_sha,
          head_commit_sha).diffs(options)
    else
      @raw_diffs ||= {}
      @raw_diffs[options] ||= load_diffs(options)
    end
  end

  def commits
    @commits ||= load_commits
  end

  def last_commit_sha
    commit_shas.first
  end

  def first_commit
    commits.last
  end

  def base_commit
    return unless base_commit_sha

    project.commit(base_commit_sha)
  end

  def start_commit
    return unless start_commit_sha

    project.commit(start_commit_sha)
  end

  def head_commit
    return unless head_commit_sha

    project.commit(head_commit_sha)
  end

  def commit_shas
    if st_commits.present?
      st_commits.map { |commit| commit[:id] }
    else
      merge_request_diff_commits.map(&:sha)
    end
  end

  def diff_refs=(new_diff_refs)
    self.base_commit_sha = new_diff_refs&.base_sha
    self.start_commit_sha = new_diff_refs&.start_sha
    self.head_commit_sha = new_diff_refs&.head_sha
  end

  def diff_refs
    return unless start_commit_sha || base_commit_sha

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

  # MRs created before 8.4 don't store their true diff refs (start and base),
  # but we need to get a commit SHA for the "View file @ ..." link by a file,
  # so we use an approximation of the diff refs if we can't get the actual one.
  #
  # These will not be the actual diff refs if the target branch was merged into
  # the source branch after the merge request was created, but it is good enough
  # for the specific purpose of linking to a commit.
  #
  # It is not good enough for highlighting diffs, so we can't simply pass
  # these as `diff_refs.`
  def fallback_diff_refs
    real_refs = diff_refs
    return real_refs if real_refs

    likely_base_commit_sha = (first_commit&.parent || first_commit)&.sha

    Gitlab::Diff::DiffRefs.new(
      base_sha:  likely_base_commit_sha,
      start_sha: safe_start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

  def diff_refs_by_sha?
    base_commit_sha? && head_commit_sha? && start_commit_sha?
  end

  def diffs(diff_options = nil)
    Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
  end

  def project
    merge_request.target_project
  end

  def compare
    @compare ||=
      Gitlab::Git::Compare.new(
        repository.raw_repository,
        safe_start_commit_sha,
        head_commit_sha
      )
  end

  def latest?
    self == merge_request.merge_request_diff
  end

  def compare_with(sha)
    # When compare merge request versions we want diff A..B instead of A...B
    # so we handle cases when user does squash and rebase of the commits between versions.
    # For this reason we set straight to true by default.
    CompareService.new(project, head_commit_sha).execute(project, sha, straight: true)
  end

  def commits_count
    if st_commits.present?
      st_commits.size
    else
      merge_request_diff_commits.size
    end
  end

  def utf8_st_diffs
    return [] if st_diffs.blank?

    st_diffs.map do |diff|
      diff.each do |k, v|
        diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
      end
    end
  end

  private

  # Old GitLab implementations may have generated diffs as ["--broken-diff"].
  # Avoid an error 500 by ignoring bad elements. See:
  # https://gitlab.com/gitlab-org/gitlab-ce/issues/20776
  def valid_raw_diff?(raw)
    return false unless raw.respond_to?(:each)

    raw.any? { |element| VALID_CLASSES.include?(element.class) }
  end

  def create_merge_request_diff_files(diffs)
    rows = diffs.map.with_index do |diff, index|
      diff_hash = diff.to_hash.merge(
        binary: false,
        merge_request_diff_id: self.id,
        relative_order: index
      )

      # Compatibility with old diffs created with Psych.
      diff_hash.tap do |hash|
        diff_text = hash[:diff]

        if diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?
          hash[:binary] = true
          hash[:diff] = [diff_text].pack('m0')
        end
      end
    end

    Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
  end

  def load_diffs(options)
    return Gitlab::Git::DiffCollection.new([]) unless diffs_from_database

    raw = diffs_from_database

    if paths = options[:paths]
      raw = raw.select do |diff|
        paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
      end
    end

    Gitlab::Git::DiffCollection.new(raw, options)
  end

  def diffs_from_database
    return @diffs_from_database if defined?(@diffs_from_database)

    @diffs_from_database =
      if st_diffs.present?
        if valid_raw_diff?(st_diffs)
          st_diffs
        end
      elsif merge_request_diff_files.present?
        merge_request_diff_files.map(&:to_hash)
      end
  end

  def load_commits
    commits = st_commits.presence || merge_request_diff_commits

    commits.map do |commit|
      Commit.new(Gitlab::Git::Commit.new(commit.to_hash), merge_request.source_project)
    end
  end

  def save_diffs
    new_attributes = {}

    if compare.commits.size.zero?
      new_attributes[:state] = :empty
    else
      diff_collection = compare.diffs(Commit.max_diff_options)
      new_attributes[:real_size] = diff_collection.real_size

      if diff_collection.any?
        new_attributes[:state] = :collected

        create_merge_request_diff_files(diff_collection)
      end

      # Set our state to 'overflow' to make the #empty? and #collected?
      # methods (generated by StateMachine) return false.
      #
      # This attribution has to come at the end of the method so 'overflow'
      # state does not get overridden by 'collected'.
      new_attributes[:state] = :overflow if diff_collection.overflow?
    end

    update(new_attributes)
  end

  def save_commits
    MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)

    merge_request_diff_commits.reload
  end

  def repository
    project.repository
  end

  def find_base_sha
    return unless head_commit_sha && start_commit_sha

    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
  end

  def keep_around_commits
    [repository, merge_request.source_project.repository].each do |repo|
      repo.keep_around(start_commit_sha)
      repo.keep_around(head_commit_sha)
      repo.keep_around(base_commit_sha)
    end
  end
end