summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/clojure.rb
blob: f8fbf650600c64705a6840ab90792d9b4b633bb3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# encoding: utf-8
module CodeRay
  module Scanners
    
    # Clojure scanner by Licenser.
    class Clojure < Scanner
      
      register_for :clojure
      file_extension 'clj'
      
      SPECIAL_FORMS = %w[
        def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
        new 
      ]  # :nodoc:
      
      CORE_FORMS = %w[
        + - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
        agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
        alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
        aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
        assert assoc assoc! assoc-in associative? atom await await-for bases bean
        bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
        bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
        booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
        char-array char-escape-string char-name-string char? chars class class?
        clear-agent-errors clojure-version coll? comment commute comp comparator
        compare compare-and-set! compile complement concat cond condp conj conj!
        cons constantly construct-proxy contains? count counted? create-ns
        create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
        defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
        deliver denominator deref derive descendants disj disj! dissoc dissoc!
        distinct distinct? doall doc dorun doseq dosync dotimes doto double
        double-array doubles drop drop-last drop-while empty empty? ensure
        enumeration-seq error-handler error-mode eval even? every? extend
        extend-protocol extend-type extenders extends? false? ffirst file-seq
        filter find find-doc find-ns find-var first float float-array float?
        floats flush fn fn? fnext for force format future future-call future-cancel
        future-cancelled? future-done? future? gen-class gen-interface gensym get
        get-in get-method get-proxy-class get-thread-bindings get-validator hash
        hash-map hash-set identical? identity if-let if-not ifn? import in-ns
        inc init-proxy instance? int int-array integer? interleave intern
        interpose into into-array ints io! isa? iterate iterator-seq juxt key
        keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
        list? load load-file load-reader load-string loaded-libs locking long
        long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
        map map? mapcat max max-key memfn memoize merge merge-with meta methods
        min min-key mod name namespace neg? newline next nfirst nil? nnext not
        not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
        ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
        nthnext num number? numerator object-array odd? or parents partial
        partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
        pos? pr pr-str prefer-method prefers print print-namespace-doc
        print-str printf println println-str prn prn-str promise proxy
        proxy-mappings proxy-super push-thread-bindings pvalues quot rand
        rand-int range ratio? rationalize re-find re-groups re-matcher
        re-matches re-pattern re-seq read read-line read-string reduce ref
        ref-history-count ref-max-history ref-min-history ref-set refer
        refer-clojure reify release-pending-sends rem remove remove-all-methods
        remove-method remove-ns remove-watch repeat repeatedly replace replicate
        require reset! reset-meta! resolve rest restart-agent resultset-seq
        reverse reversible? rseq rsubseq satisfies? second select-keys send
        send-off seq seq? seque sequence sequential? set set-error-handler!
        set-error-mode! set-validator! set? short short-array shorts
        shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
        sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
        split-at split-with str string? struct struct-map subs subseq subvec
        supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
        take-nth take-while test the-ns thread-bound? time to-array to-array-2d
        trampoline transient tree-seq true? type unchecked-add unchecked-dec
        unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
        unchecked-remainder unchecked-subtract underive update-in update-proxy
        use val vals var-get var-set var? vary-meta vec vector vector-of vector?
        when when-first when-let when-not while with-bindings with-bindings*
        with-in-str with-local-vars with-meta with-open with-out-str
        with-precision xml-seq zero? zipmap 
      ]  # :nodoc:
      
      PREDEFINED_CONSTANTS = %w[
        true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
        *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
        *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
        *print-readably* *read-eval* *warn-on-reflection*
      ]  # :nodoc:
      
      IDENT_KIND = WordList.new(:ident).
        add(SPECIAL_FORMS, :keyword).
        add(CORE_FORMS, :keyword).
        add(PREDEFINED_CONSTANTS, :predefined_constant)
      
      KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
        add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
        add(%w[ ns ], :namespace).
        add(%w[ defprotocol defrecord ], :class)
      
      BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
      IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
      SYMBOL = /::?#{IDENTIFIER}/o
      DIGIT = /\d/
      DIGIT10 = DIGIT
      DIGIT16 = /[0-9a-f]/i
      DIGIT8 = /[0-7]/
      DIGIT2 = /[01]/
      RADIX16 = /\#x/i
      RADIX8 = /\#o/i
      RADIX2 = /\#b/i
      RADIX10 = /\#d/i
      EXACTNESS = /#i|#e/i
      SIGN = /[\+-]?/
      EXP_MARK = /[esfdl]/i
      EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
      SUFFIX = /#{EXP}?/
      PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
      PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
      PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
      PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
      UINT10 = /#{DIGIT10}+#*/
      UINT16 = /#{DIGIT16}+#*/
      UINT8 = /#{DIGIT8}+#*/
      UINT2 = /#{DIGIT2}+#*/
      DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
      UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
      UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
      UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
      UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
      REAL10 = /#{SIGN}#{UREAL10}/
      REAL16 = /#{SIGN}#{UREAL16}/
      REAL8 = /#{SIGN}#{UREAL8}/
      REAL2 = /#{SIGN}#{UREAL2}/
      IMAG10 = /i|#{UREAL10}i/
      IMAG16 = /i|#{UREAL16}i/
      IMAG8 = /i|#{UREAL8}i/
      IMAG2 = /i|#{UREAL2}i/
      COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
      COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
      COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
      COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
      NUM10 = /#{PREFIX10}?#{COMPLEX10}/
      NUM16 = /#{PREFIX16}#{COMPLEX16}/
      NUM8 = /#{PREFIX8}#{COMPLEX8}/
      NUM2 = /#{PREFIX2}#{COMPLEX2}/
      NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
      
    protected
      
      def scan_tokens encoder, options
        
        state = :initial
        kind = nil
        
        until eos?
          
          case state
          when :initial
            if match = scan(/ \s+ | \\\n | , /x)
              encoder.text_token match, :space
            elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
              encoder.text_token match, :operator
            elsif match = scan(/;.*/)
              encoder.text_token match, :comment  # TODO: recognize (comment ...) too
            elsif match = scan(/\#?\\(?:newline|space|.?)/)
              encoder.text_token match, :char
            elsif match = scan(/\#[ft]/)
              encoder.text_token match, :predefined_constant
            elsif match = scan(/#{IDENTIFIER}/o)
              kind = IDENT_KIND[match]
              encoder.text_token match, kind
              if rest? && kind == :keyword
                if kind = KEYWORD_NEXT_TOKEN_KIND[match]
                  encoder.text_token match, :space if match = scan(/\s+/o)
                  encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
                end
              end
            elsif match = scan(/#{SYMBOL}/o)
              encoder.text_token match, :symbol
            elsif match = scan(/\./)
              encoder.text_token match, :operator
            elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
              encoder.text_token match, :type
            elsif match = scan(/ (\#)? " /x)
              state = self[1] ? :regexp : :string
              encoder.begin_group state
              encoder.text_token match, :delimiter
            elsif match = scan(/#{NUM}/o) and not matched.empty?
              encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
            else
              encoder.text_token getch, :error
            end
            
          when :string, :regexp
            if match = scan(/[^"\\]+|\\.?/)
              encoder.text_token match, :content
            elsif match = scan(/"/)
              encoder.text_token match, :delimiter
              encoder.end_group state
              state = :initial
            else
              raise_inspect "else case \" reached; %p not handled." % peek(1),
                encoder, state
            end
            
          else
            raise 'else case reached'
            
          end
          
        end
        
        if [:string, :regexp].include? state
          encoder.end_group state
        end
        
        encoder
        
      end
    end
  end
end