summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/java_script6.rb
blob: b745bd4b6f49e6ade70aadd9b14b568a6caeff20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … }
module CodeRay
module Scanners
  
  # Scanner for JavaScript.
  # 
  # Aliases: +ecmascript+, +ecma_script+, +javascript+
  class JavaScript6 < SingleStateRuleBasedScanner
    
    register_for :java_script6
    file_extension 'js'
    
    # The actual JavaScript keywords.
    KEYWORDS = %w[
      break case catch continue default delete do else
      finally for function if in instanceof new
      return switch throw try typeof var void while with
    ]  # :nodoc:
    PREDEFINED_CONSTANTS = %w[
      false null true undefined NaN Infinity
    ]  # :nodoc:
    
    MAGIC_VARIABLES = %w[ this arguments ]  # :nodoc: arguments was introduced in JavaScript 1.4
    
    KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
      case delete in instanceof new return throw typeof with
    ]  # :nodoc:
    
    # Reserved for future use.
    RESERVED_WORDS = %w[
      abstract boolean byte char class debugger double enum export extends
      final float goto implements import int interface long native package
      private protected public short static super synchronized throws transient
      volatile
    ]  # :nodoc:
    
    IDENT_KIND = WordList.new(:ident).
      add(RESERVED_WORDS, :reserved).
      add(PREDEFINED_CONSTANTS, :predefined_constant).
      add(MAGIC_VARIABLES, :local_variable).
      add(KEYWORDS, :keyword)  # :nodoc:
    
    ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x  # :nodoc:
    REGEXP_ESCAPE =  / [bBdDsSwW] /x  # :nodoc:
    STRING_CONTENT_PATTERN = {
      "'" => /[^\\']+/,
      '"' => /[^\\"]+/,
      '/' => /[^\\\/]+/,
    }  # :nodoc:
    KEY_CHECK_PATTERN = {
      "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
      '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
    }  # :nodoc:
    
    state :initial do
      on %r/ \s+ | \\\n /x, :space, set(:value_expected) { |match, value_expected| value_expected || match.index(?\n) }
      on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, flag_off(:value_expected)
        # state = :open_multi_line_comment if self[1]
      
      on? %r/\.?\d/ do
        on %r/0[xX][0-9A-Fa-f]+/, :hex, flag_off(:key_expected, :value_expected)
        on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, flag_off(:key_expected, :value_expected)
        on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, flag_off(:key_expected, :value_expected)
        on %r/\d+/, :integer, flag_off(:key_expected, :value_expected)
      end
      
      on check_if(:value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do
        # TODO: scan over nested tags
        xml_scanner.tokenize match, :tokens => encoder
      end, flag_off(:value_expected)
      
      on %r/ [-+*=<>?:;,!&^|(\[{~%]++ (?<![{,]) | \.+(?!\d) /x, :operator, flag_on(:value_expected), flag_off(:key_expected, :function_expected)
      on %r/ [-+*=<>?:;,!&^|(\[{~%]*+ (?<=[{,]) /x, :operator, flag_on(:value_expected, :key_expected), flag_off(:function_expected)
      on %r/ [)\]}]+ /x, :operator, flag_off(:function_expected, :key_expected, :value_expected)
      
      on %r/ function (?![A-Za-z_0-9$]) /x, :keyword, flag_on(:function_expected), flag_off(:key_expected, :value_expected)
      on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, kind { |match, function_expected, key_expected|
        kind = IDENT_KIND[match]
        # TODO: labels
        if kind == :ident
          if match.index(?$)  # $ allowed inside an identifier
            kind = :predefined
          elsif function_expected
            kind = :function
          elsif check(/\s*[=:]\s*function\b/)
            kind = :function
          elsif key_expected && check(/\s*:/)
            kind = :key
          end
        end
        
        kind
      }, flag_off(:function_expected, :key_expected), set(:value_expected) { |match| KEYWORDS_EXPECTING_VALUE[match] }
      
      on %r/["']/, push { |match, key_expected| key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string }, :delimiter, set(:string_delimiter) { |match| match }
      on check_if(:value_expected), %r/\//, push(:regexp), :delimiter
      
      on %r/\//, :operator, flag_on(:value_expected), flag_off(:key_expected)
    end
    
    state :string, :key do
      on pattern { |string_delimiter| STRING_CONTENT_PATTERN[string_delimiter] }, :content
      on %r/["']/, :delimiter, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop
      on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, kind { |match, string_delimiter|
        string_delimiter == "'" && !(match == "\\\\" || match == "\\'") ? :content : :char
      }
      on %r/ \\. /mx, :content
      on %r/ \\ /x, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop, :error
    end
    
    state :regexp do
      on STRING_CONTENT_PATTERN['/'], :content
      on %r/(\/)([gim]+)?/, groups(:delimiter, :modifier), flag_off(:key_expected, :value_expected), pop
      on %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
      on %r/\\./m, :content
      on %r/ \\ /x, pop, :error, flag_off(:key_expected, :value_expected)
    end
    
    # state :open_multi_line_comment do
    #   on %r! .*? \*/ !mx, :initial  # don't consume!
    #   on %r/ .+ /mx, :comment, -> { value_expected = true }
    #
    #   # if match = scan(%r! .*? \*/ !mx)
    #   #   state = :initial
    #   # else
    #   #   match = scan(%r! .+ !mx)
    #   # end
    #   # value_expected = true
    #   # encoder.text_token match, :comment if match
    # end
    
    protected
    
    def setup
      super
      
      @string_delimiter = nil
      @value_expected = true
      @key_expected = false
      @function_expected = false
    end
    
    def close_groups encoder, state
      if [:string, :key, :regexp].include? state
        encoder.end_group state
      end
    end
    
    def reset_instance
      super
      @xml_scanner.reset if defined? @xml_scanner
    end
    
    def xml_scanner
      @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
    end
    
  end
  
end
end