summaryrefslogtreecommitdiff
path: root/etc/todo/scanners/paste-693 (IO).rb
blob: ee13933e680e96c3d3ce45b27262943f4037bc2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
module CodeRay module Scanners
  
	#A simple scanner for a simple language: Io
	
	class Io < Scanner

		register_for :io
		
		RESERVED_WORDS = [ 'clone','init', 'method', 'list', 'vector', 'block',  'if','ifTrue','ifFalse','ifTrueIfFalse','then', 'for','loop',
		'reverseForeach','foreach','map','continue','break','while','do','return',
		'self','sender','target','proto','parent','protos']

		PREDEFINED_TYPES = []

		PREDEFINED_CONSTANTS = ['Object', 'Lobby', 
                'TRUE','true','FALSE','false','NULL','null','Null','Nil','nil','YES','NO']

		IDENT_KIND = WordList.new(:ident).
			add(RESERVED_WORDS, :reserved).
			add(PREDEFINED_TYPES, :pre_type).
			add(PREDEFINED_CONSTANTS, :pre_constant)

		ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
		UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x

		def scan_tokens tokens, options

			state = :initial

			until eos?

				kind = :error
				match = nil

				if state == :initial
					
					if scan(/ \s+ | \\\n /x)
						kind = :space
						
					elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
						kind = :comment

						
					elsif scan(/ [-+*\/\$\@=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
						kind = :operator
						
					elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
						kind = IDENT_KIND[match]
						if kind == :ident and check(/:(?!:)/)
							match << scan(/:/)
							kind = :label
						end
						
					elsif match = scan(/L?"/)
						tokens << [:open, :string]
						if match[0] == ?L
							tokens << ['L', :modifier]
							match = '"'
						end
						state = :string
						kind = :delimiter
						
					elsif scan(/#\s*(\w*)/)
						kind = :preprocessor  # FIXME multiline preprocs
						state = :include_expected if self[1] == 'include'
						
					elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
						kind = :char
						
					elsif scan(/0[xX][0-9A-Fa-f]+/)
						kind = :hex
						
					elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
						kind = :octal
						
					elsif scan(/(?:\d+)(?![.eEfF])/)
						kind = :integer
						
					elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
						kind = :float

					else
						getch
					end
					
				elsif state == :string
					if scan(/[^\\"]+/)
						kind = :content
					elsif scan(/"/)
						tokens << ['"', :delimiter]
						tokens << [:close, :string]
						state = :initial
						next
					elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
						kind = :char
					elsif scan(/ \\ | $ /x)
						kind = :error
						state = :initial
					else
						raise "else case \" reached; %p not handled." % peek(1), tokens
					end
					
				elsif state == :include_expected
					if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
						kind = :include
						state = :initial
						
					elsif match = scan(/\s+/)
						kind = :space
						state = :initial if match.index ?\n
						
					else
						getch
						
					end
					
				else
					raise 'else-case reached', tokens
					
				end
				
				match ||= matched
				raise [match, kind], tokens if kind == :error

				tokens << [match, kind]
				
			end
			
			tokens
		end

	end

end end