summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/java_script.rb
blob: 5e27813703be598b8e2a6cebdc2d3b545d433656 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
module CodeRay
module Scanners
  
  # Scanner for JavaScript.
  # 
  # Aliases: +ecmascript+, +ecma_script+, +javascript+
  class JavaScript < Scanner
    
    register_for :java_script
    file_extension 'js'
    
    # The actual JavaScript keywords.
    KEYWORDS = %w[
      break case catch continue default delete do else
      finally for function if in instanceof new
      return switch throw try typeof var void while with
    ]  # :nodoc:
    PREDEFINED_CONSTANTS = %w[
      false null true undefined NaN Infinity
    ]  # :nodoc:
    
    MAGIC_VARIABLES = %w[ this arguments ]  # :nodoc: arguments was introduced in JavaScript 1.4
    
    KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
      case delete in instanceof new return throw typeof with
    ]  # :nodoc:
    
    # Reserved for future use.
    RESERVED_WORDS = %w[
      abstract boolean byte char class debugger double enum export extends
      final float goto implements import int interface long native package
      private protected public short static super synchronized throws transient
      volatile
    ]  # :nodoc:
    
    IDENT_KIND = WordList.new(:ident).
      add(RESERVED_WORDS, :reserved).
      add(PREDEFINED_CONSTANTS, :predefined_constant).
      add(MAGIC_VARIABLES, :local_variable).
      add(KEYWORDS, :keyword)  # :nodoc:
    
    ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x  # :nodoc:
    REGEXP_ESCAPE =  / [bBdDsSwW] /x  # :nodoc:
    STRING_CONTENT_PATTERN = {
      "'" => /[^\\']+/,
      '"' => /[^\\"]+/,
      '/' => /[^\\\/]+/,
    }  # :nodoc:
    KEY_CHECK_PATTERN = {
      "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
      '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
    }  # :nodoc:
    
  protected
    
    def setup
      @state = :initial
    end
    
    def scan_tokens encoder, options
      
      state, string_delimiter = options[:state] || @state
      if string_delimiter
        encoder.begin_group state
      end
      
      value_expected = true
      key_expected = false
      function_expected = false
      
      until eos?
        
        case state
          
        when :initial
          
          if match = scan(/ \s+ | \\\n /x)
            value_expected = true if !value_expected && match.index(?\n)
            encoder.text_token match, :space
            
          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
            value_expected = true
            encoder.text_token match, :comment
            state = :open_multi_line_comment if self[1]
            
          elsif check(/\.?\d/)
            key_expected = value_expected = false
            if match = scan(/0[xX][0-9A-Fa-f]+/)
              encoder.text_token match, :hex
            elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
              encoder.text_token match, :octal
            elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
              encoder.text_token match, :float
            elsif match = scan(/\d+/)
              encoder.text_token match, :integer
            end
            
          elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
            # TODO: scan over nested tags
            xml_scanner.tokenize match, :tokens => encoder
            value_expected = false
            
          elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
            value_expected = true
            last_operator = match[-1]
            key_expected = (last_operator == ?{) || (last_operator == ?,)
            function_expected = false
            encoder.text_token match, :operator
            
          elsif match = scan(/ [)\]}]+ /x)
            function_expected = key_expected = value_expected = false
            encoder.text_token match, :operator
            
          elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
            kind = IDENT_KIND[match]
            value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
            # TODO: labels
            if kind == :ident
              if match.index(?$)  # $ allowed inside an identifier
                kind = :predefined
              elsif function_expected
                kind = :function
              elsif check(/\s*[=:]\s*function\b/)
                kind = :function
              elsif key_expected && check(/\s*:/)
                kind = :key
              end
            end
            function_expected = (kind == :keyword) && (match == 'function')
            key_expected = false
            encoder.text_token match, kind
            
          elsif match = scan(/["']/)
            if key_expected && check(KEY_CHECK_PATTERN[match])
              state = :key
            else
              state = :string
            end
            encoder.begin_group state
            string_delimiter = match
            encoder.text_token match, :delimiter
            
          elsif value_expected && (match = scan(/\//))
            encoder.begin_group :regexp
            state = :regexp
            string_delimiter = '/'
            encoder.text_token match, :delimiter
            
          elsif match = scan(/ \/ /x)
            value_expected = true
            key_expected = false
            encoder.text_token match, :operator
            
          else
            encoder.text_token getch, :error
            
          end
          
        when :string, :regexp, :key
          if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
            encoder.text_token match, :content
          elsif match = scan(/["'\/]/)
            encoder.text_token match, :delimiter
            if state == :regexp
              modifiers = scan(/[gim]+/)
              encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
            end
            encoder.end_group state
            string_delimiter = nil
            key_expected = value_expected = false
            state = :initial
          elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
            if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
              encoder.text_token match, :content
            else
              encoder.text_token match, :char
            end
          elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
            encoder.text_token match, :char
          elsif match = scan(/\\./m)
            encoder.text_token match, :content
          elsif match = scan(/ \\ | $ /x)
            encoder.end_group state
            encoder.text_token match, :error unless match.empty?
            string_delimiter = nil
            key_expected = value_expected = false
            state = :initial
          else
            raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
          end
          
        when :open_multi_line_comment
          if match = scan(%r! .*? \*/ !mx)
            state = :initial
          else
            match = scan(%r! .+ !mx)
          end
          value_expected = true
          encoder.text_token match, :comment if match
          
        else
          #:nocov:
          raise_inspect 'Unknown state: %p' % [state], encoder
          #:nocov:
          
        end
        
      end
      
      if options[:keep_state]
        @state = state, string_delimiter
      end
      
      if [:string, :regexp].include? state
        encoder.end_group state
      end
      
      encoder
    end
    
  protected
    
    def reset_instance
      super
      @xml_scanner.reset if defined? @xml_scanner
    end
    
    def xml_scanner
      @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
    end
    
  end
  
end
end