1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
# encoding: utf-8
module CodeRay
module Scanners
# Clojure scanner by Licenser.
class Clojure < Scanner
register_for :clojure
file_extension 'clj'
SPECIAL_FORMS = %w[
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
new
] # :nodoc:
CORE_FORMS = %w[
+ - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
assert assoc assoc! assoc-in associative? atom await await-for bases bean
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
char-array char-escape-string char-name-string char? chars class class?
clear-agent-errors clojure-version coll? comment commute comp comparator
compare compare-and-set! compile complement concat cond condp conj conj!
cons constantly construct-proxy contains? count counted? create-ns
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
deliver denominator deref derive descendants disj disj! dissoc dissoc!
distinct distinct? doall doc dorun doseq dosync dotimes doto double
double-array doubles drop drop-last drop-while empty empty? ensure
enumeration-seq error-handler error-mode eval even? every? extend
extend-protocol extend-type extenders extends? false? ffirst file-seq
filter find find-doc find-ns find-var first float float-array float?
floats flush fn fn? fnext for force format future future-call future-cancel
future-cancelled? future-done? future? gen-class gen-interface gensym get
get-in get-method get-proxy-class get-thread-bindings get-validator hash
hash-map hash-set identical? identity if-let if-not ifn? import in-ns
inc init-proxy instance? int int-array integer? interleave intern
interpose into into-array ints io! isa? iterate iterator-seq juxt key
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
list? load load-file load-reader load-string loaded-libs locking long
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
map map? mapcat max max-key memfn memoize merge merge-with meta methods
min min-key mod name namespace neg? newline next nfirst nil? nnext not
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
nthnext num number? numerator object-array odd? or parents partial
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
pos? pr pr-str prefer-method prefers print print-namespace-doc
print-str printf println println-str prn prn-str promise proxy
proxy-mappings proxy-super push-thread-bindings pvalues quot rand
rand-int range ratio? rationalize re-find re-groups re-matcher
re-matches re-pattern re-seq read read-line read-string reduce ref
ref-history-count ref-max-history ref-min-history ref-set refer
refer-clojure reify release-pending-sends rem remove remove-all-methods
remove-method remove-ns remove-watch repeat repeatedly replace replicate
require reset! reset-meta! resolve rest restart-agent resultset-seq
reverse reversible? rseq rsubseq satisfies? second select-keys send
send-off seq seq? seque sequence sequential? set set-error-handler!
set-error-mode! set-validator! set? short short-array shorts
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
split-at split-with str string? struct struct-map subs subseq subvec
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
take-nth take-while test the-ns thread-bound? time to-array to-array-2d
trampoline transient tree-seq true? type unchecked-add unchecked-dec
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
unchecked-remainder unchecked-subtract underive update-in update-proxy
use val vals var-get var-set var? vary-meta vec vector vector-of vector?
when when-first when-let when-not while with-bindings with-bindings*
with-in-str with-local-vars with-meta with-open with-out-str
with-precision xml-seq zero? zipmap
] # :nodoc:
PREDEFINED_CONSTANTS = %w[
true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
*compile-files* *compile-path* *e *err* *file* *flush-on-newline*
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
*print-readably* *read-eval* *warn-on-reflection*
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(SPECIAL_FORMS, :keyword).
add(CORE_FORMS, :keyword).
add(PREDEFINED_CONSTANTS, :predefined_constant)
KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
add(%w[ ns ], :namespace).
add(%w[ defprotocol defrecord ], :class)
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
SYMBOL = /::?#{IDENTIFIER}/o
DIGIT = /\d/
DIGIT10 = DIGIT
DIGIT16 = /[0-9a-f]/i
DIGIT8 = /[0-7]/
DIGIT2 = /[01]/
RADIX16 = /\#x/i
RADIX8 = /\#o/i
RADIX2 = /\#b/i
RADIX10 = /\#d/i
EXACTNESS = /#i|#e/i
SIGN = /[\+-]?/
EXP_MARK = /[esfdl]/i
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
SUFFIX = /#{EXP}?/
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
UINT10 = /#{DIGIT10}+#*/
UINT16 = /#{DIGIT16}+#*/
UINT8 = /#{DIGIT8}+#*/
UINT2 = /#{DIGIT2}+#*/
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
REAL10 = /#{SIGN}#{UREAL10}/
REAL16 = /#{SIGN}#{UREAL16}/
REAL8 = /#{SIGN}#{UREAL8}/
REAL2 = /#{SIGN}#{UREAL2}/
IMAG10 = /i|#{UREAL10}i/
IMAG16 = /i|#{UREAL16}i/
IMAG8 = /i|#{UREAL8}i/
IMAG2 = /i|#{UREAL2}i/
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
NUM16 = /#{PREFIX16}#{COMPLEX16}/
NUM8 = /#{PREFIX8}#{COMPLEX8}/
NUM2 = /#{PREFIX2}#{COMPLEX2}/
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
protected
def scan_tokens encoder, options
state = :initial
kind = nil
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n | , /x)
encoder.text_token match, :space
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
encoder.text_token match, :operator
elsif match = scan(/;.*/)
encoder.text_token match, :comment # TODO: recognize (comment ...) too
elsif match = scan(/\#?\\(?:newline|space|.?)/)
encoder.text_token match, :char
elsif match = scan(/\#[ft]/)
encoder.text_token match, :predefined_constant
elsif match = scan(/#{IDENTIFIER}/o)
kind = IDENT_KIND[match]
encoder.text_token match, kind
if rest? && kind == :keyword
if kind = KEYWORD_NEXT_TOKEN_KIND[match]
encoder.text_token match, :space if match = scan(/\s+/o)
encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
end
end
elsif match = scan(/#{SYMBOL}/o)
encoder.text_token match, :symbol
elsif match = scan(/\./)
encoder.text_token match, :operator
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
encoder.text_token match, :type
elsif match = scan(/ (\#)? " /x)
state = self[1] ? :regexp : :string
encoder.begin_group state
encoder.text_token match, :delimiter
elsif match = scan(/#{NUM}/o) and not matched.empty?
encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
else
encoder.text_token getch, :error
end
when :string, :regexp
if match = scan(/[^"\\]+|\\.?/)
encoder.text_token match, :content
elsif match = scan(/"/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1),
encoder, state
end
else
raise 'else case reached'
end
end
if [:string, :regexp].include? state
encoder.end_group state
end
encoder
end
end
end
end
|