summaryrefslogtreecommitdiff
path: root/app/models/merge_request_diff.rb
blob: a75fcb4c4f605775ea0a46faf09357e751ac8dc8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
class MergeRequestDiff < ActiveRecord::Base
  include Sortable
  include Importable

  # Prevent store of diff if commits amount more then 500
  COMMITS_SAFE_SIZE = 100

  belongs_to :merge_request

  delegate :head_source_sha, :target_branch, :source_branch, to: :merge_request, prefix: nil

  state_machine :state, initial: :empty do
    state :collected
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  serialize :st_commits
  serialize :st_diffs

  after_create :reload_content, unless: :importing?

  def reload_content
    reload_commits
    reload_diffs
  end

  def size
    real_size.presence || diffs.size
  end

  def diffs(options={})
    if options[:ignore_whitespace_change]
      @diffs_no_whitespace ||= begin
        compare = Gitlab::Git::Compare.new(
          self.repository.raw_repository,
          self.base,
          self.head,
        )
        compare.diffs(options)
      end
    else
      @diffs ||= load_diffs(st_diffs, options)
    end
  end

  def commits
    @commits ||= load_commits(st_commits || [])
  end

  def last_commit
    commits.first
  end

  def first_commit
    commits.last
  end

  def base_commit
    return nil unless self.base_commit_sha

    merge_request.target_project.commit(self.base_commit_sha)
  end

  def last_commit_short_sha
    @last_commit_short_sha ||= last_commit.short_id
  end

  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

  def dump_diffs(diffs)
    if diffs.respond_to?(:map)
      diffs.map(&:to_hash)
    end
  end

  def load_diffs(raw, options)
    if raw.respond_to?(:each)
      Gitlab::Git::DiffCollection.new(raw, options)
    else
      Gitlab::Git::DiffCollection.new([])
    end
  end

  # Collect array of Git::Commit objects
  # between target and source branches
  def unmerged_commits
    commits = compare.commits

    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
    end

    commits
  end

  # Reload all commits related to current merge request from repo
  # and save it as array of hashes in st_commits db field
  def reload_commits
    new_attributes = {}

    commit_objects = unmerged_commits

    if commit_objects.present?
      new_attributes[:st_commits] = dump_commits(commit_objects)
    end

    update_columns_serialized(new_attributes)
  end

  # Reload diffs between branches related to current merge request from repo
  # and save it as array of hashes in st_diffs db field
  def reload_diffs
    new_attributes = {}
    new_diffs = []

    if commits.size.zero?
      new_attributes[:state] = :empty
    else
      diff_collection = unmerged_diffs

      if diff_collection.overflow?
        # Set our state to 'overflow' to make the #empty? and #collected?
        # methods (generated by StateMachine) return false.
        new_attributes[:state] = :overflow
      end

      new_attributes[:real_size] = diff_collection.real_size

      if diff_collection.any?
        new_diffs = dump_diffs(diff_collection)
        new_attributes[:state] = :collected
      end
    end

    new_attributes[:st_diffs] = new_diffs
    new_attributes[:base_commit_sha] = self.repository.merge_base(self.head, self.base)

    update_columns_serialized(new_attributes)
  end

  # Collect array of Git::Diff objects
  # between target and source branches
  def unmerged_diffs
    compare.diffs(Commit.max_diff_options)
  end

  def repository
    merge_request.target_project.repository
  end

  def source_sha
    return head_source_sha if head_source_sha.present?

    source_commit = merge_request.source_project.commit(source_branch)
    source_commit.try(:sha)
  end

  def target_sha
    merge_request.target_sha
  end

  def base
    self.target_sha || self.target_branch
  end

  def head
    self.source_sha
  end

  def compare
    @compare ||=
      begin
        # Update ref for merge request
        merge_request.fetch_ref

        Gitlab::Git::Compare.new(
          self.repository.raw_repository,
          self.base,
          self.head
        )
      end
  end

  private

  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls with a bad performance.
  # Using #update_columns solve the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep same behaviour than the attribute assignment we reload the instance variable.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes)
    reload
  end
end