summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/diff.rb
blob: 74a6c27a9015def6643524b421040bfa3ef9cc83 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
module CodeRay
module Scanners
  
  # Scanner for output of the diff command.
  # 
  # Alias: +patch+
  class Diff < Scanner
    
    register_for :diff
    title 'diff output'
    
    DEFAULT_OPTIONS = {
      :highlight_code => true,
      :inline_diff    => true,
    }
    
  protected
    
    def scan_tokens encoder, options
      
      line_kind = nil
      state = :initial
      deleted_lines_count = 0
      scanners = Hash.new do |h, lang|
        h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true
      end
      content_scanner = scanners[:plain]
      content_scanner_entry_state = nil
      
      until eos?
        
        if match = scan(/\n/)
          deleted_lines_count = 0 unless line_kind == :delete
          if line_kind
            encoder.end_line line_kind
            line_kind = nil
          end
          encoder.text_token match, :space
          next
        end
        
        case state
        
        when :initial
          if match = scan(/--- |\+\+\+ |=+|_+/)
            encoder.begin_line line_kind = :head
            encoder.text_token match, :head
            if match = scan(/[^\x00\n]+?(?=$|[\t\n]|  \(revision)/)
              encoder.text_token match, :filename
              if options[:highlight_code] && match != '/dev/null'
                file_type = CodeRay::FileType.fetch(match, :text)
                file_type = :text if file_type == :diff
                content_scanner = scanners[file_type]
                content_scanner_entry_state = nil
              end
            end
            next unless match = scan(/.+/)
            encoder.text_token match, :plain
          elsif match = scan(/Index: |Property changes on: /)
            encoder.begin_line line_kind = :head
            encoder.text_token match, :head
            next unless match = scan(/.+/)
            encoder.text_token match, :plain
          elsif match = scan(/Added: /)
            encoder.begin_line line_kind = :head
            encoder.text_token match, :head
            next unless match = scan(/.+/)
            encoder.text_token match, :plain
            state = :added
          elsif match = scan(/\\ .*/)
            encoder.text_token match, :comment
          elsif match = scan(/@@(?>[^@\n]+)@@/)
            content_scanner.state = :initial unless match?(/\n\+/)
            content_scanner_entry_state = nil
            if check(/\n|$/)
              encoder.begin_line line_kind = :change
            else
              encoder.begin_group :change
            end
            encoder.text_token match[0,2], :change
            encoder.text_token match[2...-2], :plain
            encoder.text_token match[-2,2], :change
            encoder.end_group :change unless line_kind
            next unless match = scan(/.+/)
            if options[:highlight_code]
              content_scanner.tokenize match, :tokens => encoder
            else
              encoder.text_token match, :plain
            end
            next
          elsif match = scan(/\+/)
            encoder.begin_line line_kind = :insert
            encoder.text_token match, :insert
            next unless match = scan(/.+/)
            if options[:highlight_code]
              content_scanner.tokenize match, :tokens => encoder
            else
              encoder.text_token match, :plain
            end
            next
          elsif match = scan(/-/)
            deleted_lines_count += 1
            if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && changed_lines_count <= 100_000 && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/)
              deleted_lines  = Array.new(changed_lines_count) { |i| skip(/\n\-/) if i > 0; scan(/.*/) }
              inserted_lines = Array.new(changed_lines_count) { |i| skip(/\n\+/)         ; scan(/.*/) }
              
              deleted_lines_tokenized  = []
              inserted_lines_tokenized = []
              for deleted_line, inserted_line in deleted_lines.zip(inserted_lines)
                pre, deleted_part, inserted_part, post = diff deleted_line, inserted_line
                content_scanner_entry_state = content_scanner.state
                deleted_lines_tokenized  << content_scanner.tokenize([pre, deleted_part, post], :tokens => Tokens.new)
                content_scanner.state = content_scanner_entry_state || :initial
                inserted_lines_tokenized << content_scanner.tokenize([pre, inserted_part, post], :tokens => Tokens.new)
              end
              
              for pre, deleted_part, post in deleted_lines_tokenized
                encoder.begin_line :delete
                encoder.text_token '-', :delete
                encoder.tokens pre
                unless deleted_part.empty?
                  encoder.begin_group :eyecatcher
                  encoder.tokens deleted_part
                  encoder.end_group :eyecatcher
                end
                encoder.tokens post
                encoder.end_line :delete
                encoder.text_token "\n", :space
              end
              
              for pre, inserted_part, post in inserted_lines_tokenized
                encoder.begin_line :insert
                encoder.text_token '+', :insert
                encoder.tokens pre
                unless inserted_part.empty?
                  encoder.begin_group :eyecatcher
                  encoder.tokens inserted_part
                  encoder.end_group :eyecatcher
                end
                encoder.tokens post
                changed_lines_count -= 1
                if changed_lines_count > 0
                  encoder.end_line :insert
                  encoder.text_token "\n", :space
                end
              end
              
              line_kind = :insert
              
            elsif match = scan(/.*/)
              encoder.begin_line line_kind = :delete
              encoder.text_token '-', :delete
              if options[:highlight_code]
                if deleted_lines_count == 1
                  content_scanner_entry_state = content_scanner.state
                end
                content_scanner.tokenize match, :tokens => encoder unless match.empty?
                if !match?(/\n-/)
                  if match?(/\n\+/)
                    content_scanner.state = content_scanner_entry_state || :initial
                  end
                  content_scanner_entry_state = nil
                end
              else
                encoder.text_token match, :plain
              end
            end
            next
          elsif match = scan(/ .*/)
            if options[:highlight_code]
              content_scanner.tokenize match, :tokens => encoder
            else
              encoder.text_token match, :plain
            end
            next
          elsif match = scan(/.+/)
            encoder.begin_line line_kind = :comment
            encoder.text_token match, :plain
          else
            raise_inspect 'else case rached'
          end
        
        when :added
          if match = scan(/   \+/)
            encoder.begin_line line_kind = :insert
            encoder.text_token match, :insert
            next unless match = scan(/.+/)
            encoder.text_token match, :plain
          else
            state = :initial
            next
          end
        end
        
      end
      
      encoder.end_line line_kind if line_kind
      
      encoder
    end
    
  private
    
    def diff a, b
      # i will be the index of the leftmost difference from the left.
      i_max = [a.size, b.size].min
      i = 0
      i += 1 while i < i_max && a[i] == b[i]
      # j_min will be the index of the leftmost difference from the right.
      j_min = i - i_max
      # j will be the index of the rightmost difference from the right which
      # does not precede the leftmost one from the left.
      j = -1
      j -= 1 while j >= j_min && a[j] == b[j]
      return a[0...i], a[i..j], b[i..j], (j < -1) ? a[j+1..-1] : ''
    end
    
  end
  
end
end