summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/c2.rb
blob: 3103e549351c3866305e48a4eb01f72c9de94ec5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
module CodeRay
module Scanners
  
  # Scanner for C.
  class C2 < RuleBasedScanner
    
    register_for :c2
    file_extension 'c'
    
    KEYWORDS = [
      'asm', 'break', 'case', 'continue', 'default', 'do',
      'else', 'enum', 'for', 'goto', 'if', 'return',
      'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
      'restrict',  # added in C99
    ]  # :nodoc:
    
    PREDEFINED_TYPES = [
      'int', 'long', 'short', 'char',
      'signed', 'unsigned', 'float', 'double',
      'bool', 'complex',  # added in C99
    ]  # :nodoc:
    
    PREDEFINED_CONSTANTS = [
      'EOF', 'NULL',
      'true', 'false',  # added in C99
    ]  # :nodoc:
    DIRECTIVES = [
      'auto', 'extern', 'register', 'static', 'void',
      'const', 'volatile',  # added in C89
      'inline',  # added in C99
    ]  # :nodoc:
    
    IDENT_KIND = WordList.new(:ident).
      add(KEYWORDS, :keyword).
      add(PREDEFINED_TYPES, :predefined_type).
      add(DIRECTIVES, :directive).
      add(PREDEFINED_CONSTANTS, :predefined_constant)  # :nodoc:
    
    ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
    UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x  # :nodoc:
    
  protected
    
    state :initial do
      on check_if(:in_preproc_line), %r/ \s*? \n \s* /x, :space, flag_off(:in_preproc_line), set(:label_expected, :label_expected_before_preproc_line)
      on %r/ \s+ | \\\n /x, :space
      
      on %r/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/(?![\/*])=? | \.(?!\d) /x, :operator, set(:label_expected) { |match, case_expected| match =~ /[;\{\}]/ || case_expected && match =~ /:/ }, flag_off(:case_expected)
      
      on %r/ (?: case | default ) \b /x, :keyword, flag_on(:case_expected), flag_off(:label_expected)
      on check_if(:label_expected), check_unless(:in_preproc_line), %r/ [A-Za-z_][A-Za-z_0-9]*+ :(?!:) /x, kind { |match|
        kind = IDENT_KIND[match.chop]
        kind == :ident ? :label : kind
      }, set(:label_expected) { |kind| kind == :label }
      on %r/ [A-Za-z_][A-Za-z_0-9]* /x, kind { |match| IDENT_KIND[match] }, flag_off(:label_expected)
      
      on %r/(L)?(")/, push(:string), groups(:modifier, :delimiter)
      
      on %r/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /x,                   :char,    flag_off(:label_expected)
      on %r/0[xX][0-9A-Fa-f]+/,                                           :hex,     flag_off(:label_expected)
      on %r/(?:0[0-7]+)(?![89.eEfF])/,                                    :octal,   flag_off(:label_expected)
      on %r/(?:\d+)(?![.eEfF])L?L?/,                                      :integer, flag_off(:label_expected)
      on %r/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float,   flag_off(:label_expected)
      
      on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx, :comment
      on %r/ \# \s* if \s* 0 /x, -> (match) {
        match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /mx) unless eos?
      }, :comment
      on %r/ \# [ \t]* include\b /x, :preprocessor, flag_on(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected), push_state(:include_expected)
      on %r/ \# [ \t]* \w* /x,       :preprocessor, flag_on(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected)
      
      on %r/\$/, :ident
    end
    
    state :string do
      on %r/[^\\\n"]+/, :content
      on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mx, :char
      on %r/"/, :delimiter, pop,   flag_off(:label_expected)
      on %r/ \\ /x, pop, :error,   flag_off(:label_expected)
      on %r/ $ /x,  pop, flag_off(:label_expected)
    end
    
    state :include_expected do
      on %r/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/, :include, pop_state
      on %r/ \s*? \n \s* /x, :space, pop_state
      on %r/\s+/, :space
      on %r//, pop_state  # TODO: add otherwise method for this
    end
    
    protected
    
    def setup
      super
      
      @label_expected = true
      @case_expected = false
      @label_expected_before_preproc_line = nil
      @in_preproc_line = false
    end
    
    def close_groups encoder, states
      if states.last == :string
        encoder.end_group :string
      end
    end
    
  end

end
end