summaryrefslogtreecommitdiff
path: root/etc/todo/scanners/php.rb
blob: 239fbf8b800b46fd96e1e43ec1bb951cbcba7e61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
class Regexp
  def |(other)
    Regexp.union(self, other)
  end
  def +(other)
    /#{self}#{other}/
  end
end
module CodeRay
module Scanners

  load :html

  class PHP < Scanner

    register_for :php

    def setup
      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
    end

    def reset_instance
      super
      @html_scanner.reset
    end

    module Words
      ControlKeywords = %w!
        if else elseif while do for switch case default declare foreach as
        endif endwhile endfor endforeach endswitch enddeclare return break
        continue exit die try catch throw 
      !
      OtherKeywords = %w!
        function class extends implements instanceof parent self var const
        private public protected static abstract final global new echo include
        require include_once require_once eval print use unset isset empty
        interface list array clone null true false
      !

      SpecialConstants = %w! __LINE__ __FILE__ __CLASS__
        __METHOD__ __FUNCTION__ 
      !
      IdentKinds = WordList.new(:ident).
        add(ControlKeywords, :reserved).
        add(OtherKeywords, :pre_type).
        add(SpecialConstants, :pre_constant)
    end
    module RE
      def self.build_alternatives(array)
        Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE)
      end

      PHPStart = /
        <script language="php"> |
        <script language='php'> |
        <\?php                   |
        <\?(?!xml)               |
        <%
      /xi

      PHPEnd = %r!
        </script> |
        \?>        |
        %>
      !xi

      IChar = /[a-z0-9_\x80-\xFF]/i
      IStart = /[a-z_\x80-\xFF]/i
      Identifier = /#{IStart}#{IChar}*/
      Variable = /\$#{Identifier}/

      Typecasts = build_alternatives %w!
        float double real int integer bool boolean string array object null
      !.map{|s| "(#{s})"}
      OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)!
      OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)!
      OneLineComment = OneLineComment1 | OneLineComment2

      HereDoc = /<<</ + Identifier

      binops = %w!
        + - * / << >> & | ^ . % 
      !

      ComparisionOperator = build_alternatives %w$
        === !== == != <= >= 
      $
      IncDecOperator = build_alternatives %w! ++ -- !

      BinaryOperator = build_alternatives binops
      AssignOperator = build_alternatives binops.map {|s| "${s}=" }
      LogicalOperator = build_alternatives %w! and or xor not !
      ObjectOperator = build_alternatives %w! -> :: !
      OtherOperator = build_alternatives %w$ => = ? : [ ] ( ) ; , ~ ! @ > <$

      Operator = ComparisionOperator | IncDecOperator | LogicalOperator |
        ObjectOperator | AssignOperator | BinaryOperator | OtherOperator


      S = /\s+/
        
      Integer = /-?0x[0-9a-fA-F]/ | /-?\d+/
      Float = /-?(?:\d+\.\d*|\d*\.\d+)(?:e[+-]\d+)?/

    end



    def scan_tokens tokens, options
      states = [:php, :html]
      heredocdelim = nil

      until eos?
        match = nil
        kind = nil
        case states.last
        when :html
          if scan RE::PHPStart
            kind = :delimiter
            states.pop
          else
            match = scan_until(/(?=#{RE::PHPStart})/o) || scan_until(/\z/)
            @html_scanner.tokenize match if not match.empty?
            kind = :space
            match = ''
          end
        
        when :php
          if scan RE::PHPEnd
            kind = :delimiter
            states.push :html

          elsif scan RE::S
            kind = :space

          elsif scan /\/\*/
            kind = :comment
            states.push :mlcomment

          elsif scan RE::OneLineComment 
            kind = :comment

          elsif match = scan(RE::Identifier)
            kind = Words::IdentKinds[match]
            if kind == :ident and check(/:(?!:)/) and tokens[-2][0] == 'case'
#             match << scan(/:/)
              kind = :label
            elsif kind == :ident and match =~ /^[A-Z]/
              kind = :constant
            end

          elsif scan RE::Integer 
            kind = :integer

          elsif scan RE::Float
            kind = :float

          elsif scan /'/
            kind = :delimiter
            states.push :sqstring

          elsif scan /"/
            kind = :delimiter
            states.push :dqstring

          elsif match = scan(RE::HereDoc)
            heredocdelim = match[RE::Identifier]
            kind = :delimiter
            states.push = :heredocstring

          elsif scan RE::Variable
            kind = :local_variable

          elsif scan /\{/
            kind = :operator
            states.push :php

          elsif scan /\}/
            if states.length == 1
              kind = :error
            else
              kind = :operator
              states.pop
            end

          elsif scan RE::Operator
            kind = :operator

          else
            getch
            kind = :error

          end

        when :mlcomment
          if scan /(?:[^\n\r\f*]|\*(?!\/))+/
            kind = :comment

          elsif scan /\*\//
            kind = :comment
            states.pop

          elsif scan /[\r\n\f]+/ 
            kind = :space
          end

        when :sqstring
          if scan /[^\r\n\f'\\]+/
            kind = :string
          elsif match = scan(/\\\\|\\'/)
            kind = :char
          elsif scan /\\/
            kind = :string
          elsif scan /[\r\n\f ]+/ 
            kind = :space
          elsif scan /'/
            kind = :delimiter
            states.pop
          end

        when :dqstring
#todo: $foo[bar] kind of stuff
          if scan /[^\r\n\f"${\\]+/
            kind = :string
          elsif scan /\\x[a-fA-F]{2}/
            kind = :char
          elsif scan /\\\d{3}/
            kind = :char
          elsif scan /\\["\\abcfnrtyv]/
            kind = :char
          elsif scan /\\/
            kind = :string
          elsif scan /[\r\n\f]+/ 
            kind = :space
          elsif match = scan(RE::Variable)
            kind = :local_variable
            if check(/\[#{RE::Identifier}\]/)
              match << scan(/\[#{RE::Identifier}\]/)
            elsif check(/\[/)
              match << scan(/\[#{RE::Identifier}?/)
              kind = :error
            elsif check(/->#{RE::Identifier}/)
              match << scan(/->#{RE::Identifier}/)
            end
          elsif scan /\{/
            if check(/\$/)
              kind = :operator 
              states.push :php
            else
              kind = :string
            end
            match = '{'
          elsif scan /\$\{#{RE::Identifier}\}/
            kind = :local_variable
          elsif scan /\$/
            kind = :string
          elsif scan /"/
            kind = :delimiter
            states.pop
          end
        else
          raise "Unknown state!"
        end

        match ||= matched
        if $DEBUG and not kind
          raise_inspect 'Error token %p in line %d' %
            [[match, kind], line], tokens
        end
        raise_inspect 'Empty token', tokens unless match

        tokens << [match, kind] 

      end
      tokens

    end

  end

end
end