summaryrefslogtreecommitdiff
path: root/lib/gitlab/git/diff_collection.rb
blob: bcbad8ec829d76270e90d1d06b1c19bf0e93bb4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
module Gitlab
  module Git
    class DiffCollection
      include Enumerable

      DEFAULT_LIMITS = { max_files: 100, max_lines: 5000 }.freeze

      def initialize(iterator, options = {})
        @iterator = iterator
        @max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files])
        @max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines])
        @max_bytes = @max_files * 5120 # Average 5 KB per file
        @safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min
        @safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min
        @safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file
        @all_diffs = !!options.fetch(:all_diffs, false)
        @no_collapse = !!options.fetch(:no_collapse, true)

        @line_count = 0
        @byte_count = 0
        @overflow = false
        @array = Array.new
      end

      def each(&block)
        if @populated
          # @iterator.each is slower than just iterating the array in place
          @array.each(&block)
        else
          Gitlab::GitalyClient.migrate(:commit_raw_diffs) do
            each_patch(&block)
          end
        end
      end

      def empty?
        !@iterator.any?
      end

      def overflow?
        populate!
        !!@overflow
      end

      def size
        @size ||= count # forces a loop using each method
      end

      def real_size
        populate!

        if @overflow
          "#{size}+"
        else
          size.to_s
        end
      end

      def decorate!
        collection = each_with_index do |element, i|
          @array[i] = yield(element)
        end
        @populated = true
        collection
      end

      private

      def populate!
        return if @populated

        each { nil } # force a loop through all diffs
        @populated = true
        nil
      end

      def over_safe_limits?(files)
        files >= @safe_max_files || @line_count > @safe_max_lines || @byte_count >= @safe_max_bytes
      end

      def each_patch
        @iterator.each_with_index do |raw, i|
          # First yield cached Diff instances from @array
          if @array[i]
            yield @array[i]
            next
          end

          # We have exhausted @array, time to create new Diff instances or stop.
          break if @overflow

          if !@all_diffs && i >= @max_files
            @overflow = true
            break
          end

          collapse = !@all_diffs && !@no_collapse

          diff = Gitlab::Git::Diff.new(raw, collapse: collapse)

          if collapse && over_safe_limits?(i)
            diff.prune_collapsed_diff!
          end

          @line_count += diff.line_count
          @byte_count += diff.diff.bytesize

          if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes)
            # This last Diff instance pushes us over the lines limit. We stop and
            # discard it.
            @overflow = true
            break
          end

          yield @array[i] = diff
        end
      end
    end
  end
end