summaryrefslogtreecommitdiff
path: root/app/models/merge_request_diff.rb
blob: feaba925bad1197818dd18d623d0a2013b9e48c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
class MergeRequestDiff < ActiveRecord::Base
  include Sortable
  include Importable

  # Prevent store of diff if commits amount more then 500
  COMMITS_SAFE_SIZE = 100

  belongs_to :merge_request

  delegate :source_branch_sha, :target_branch_sha, :target_branch, :source_branch, to: :merge_request, prefix: nil

  state_machine :state, initial: :empty do
    state :collected
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  serialize :st_commits
  serialize :st_diffs

  after_create :reload_content, unless: :importing?
  after_save :keep_around_commits, unless: :importing?

  def reload_content
    reload_commits
    reload_diffs
  end

  def size
    real_size.presence || diffs.size
  end

  def diffs(options={})
    if options[:ignore_whitespace_change]
      @diffs_no_whitespace ||= begin
        compare = Gitlab::Git::Compare.new(
          repository.raw_repository,
          self.start_commit_sha || self.target_branch_sha,
          self.head_commit_sha || self.source_branch_sha,
        )
        compare.diffs(options)
      end
    else
      @diffs ||= {}
      @diffs[options] ||= load_diffs(st_diffs, options)
    end
  end

  def commits
    @commits ||= load_commits(st_commits || [])
  end

  def last_commit
    commits.first
  end

  def first_commit
    commits.last
  end

  def base_commit
    return unless self.base_commit_sha

    project.commit(self.base_commit_sha)
  end

  def start_commit
    return unless self.start_commit_sha

    project.commit(self.start_commit_sha)
  end

  def head_commit
    return last_commit unless self.head_commit_sha

    project.commit(self.head_commit_sha)
  end

  def compare
    @compare ||=
      begin
        # Update ref for merge request
        merge_request.fetch_ref

        Gitlab::Git::Compare.new(
          repository.raw_repository,
          self.target_branch_sha,
          self.source_branch_sha
        )
      end
  end

  private

  # Collect array of Git::Commit objects
  # between target and source branches
  def unmerged_commits
    commits = compare.commits

    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
    end

    commits
  end

  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

  # Reload all commits related to current merge request from repo
  # and save it as array of hashes in st_commits db field
  def reload_commits
    new_attributes = {}

    commit_objects = unmerged_commits

    if commit_objects.present?
      new_attributes[:st_commits] = dump_commits(commit_objects)
    end

    update_columns_serialized(new_attributes)
  end

  # Collect array of Git::Diff objects
  # between target and source branches
  def unmerged_diffs
    compare.diffs(Commit.max_diff_options)
  end

  def dump_diffs(diffs)
    if diffs.respond_to?(:map)
      diffs.map(&:to_hash)
    end
  end

  def load_diffs(raw, options)
    if raw.respond_to?(:each)
      if paths = options[:paths]
        raw = raw.select do |diff|
          paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
        end
      end

      Gitlab::Git::DiffCollection.new(raw, options)
    else
      Gitlab::Git::DiffCollection.new([])
    end
  end

  # Reload diffs between branches related to current merge request from repo
  # and save it as array of hashes in st_diffs db field
  def reload_diffs
    new_attributes = {}
    new_diffs = []

    if commits.size.zero?
      new_attributes[:state] = :empty
    else
      diff_collection = unmerged_diffs

      if diff_collection.overflow?
        # Set our state to 'overflow' to make the #empty? and #collected?
        # methods (generated by StateMachine) return false.
        new_attributes[:state] = :overflow
      end

      new_attributes[:real_size] = diff_collection.real_size

      if diff_collection.any?
        new_diffs = dump_diffs(diff_collection)
        new_attributes[:state] = :collected
      end
    end

    new_attributes[:st_diffs] = new_diffs

    new_attributes[:start_commit_sha] = self.target_branch_sha
    new_attributes[:head_commit_sha] = self.source_branch_sha
    new_attributes[:base_commit_sha] = branch_base_sha

    update_columns_serialized(new_attributes)

    keep_around_commits
  end

  def project
    merge_request.target_project
  end

  def repository
    project.repository
  end

  def branch_base_commit
    return unless self.source_branch_sha && self.target_branch_sha

    project.merge_base_commit(self.source_branch_sha, self.target_branch_sha)
  end

  def branch_base_sha
    branch_base_commit.try(:sha)
  end

  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls resulting in bad performance.
  # Using #update_columns solves the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep the same behaviour as the attribute assignment we reload the instance.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes.merge(updated_at: current_time_from_proper_timezone))
    reload
  end

  def keep_around_commits
    repository.keep_around(target_branch_sha)
    repository.keep_around(source_branch_sha)
    repository.keep_around(branch_base_sha)
  end
end