summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2016-02-13 00:50:36 +0100
committerKornelius Kalnbach <murphy@rubychan.de>2016-02-13 00:50:36 +0100
commit4da772ba7dd5026e74c24c1e007784ea4203690c (patch)
tree5f449174760dfd6c6737980953a012f5d67ca9c1
parenta1a7b2c871a0b33451292e542073a4aa743c91f2 (diff)
downloadcoderay-4da772ba7dd5026e74c24c1e007784ea4203690c.tar.gz
add generated Lua scanner
-rw-r--r--lib/coderay/scanners/lua2.rb184
1 files changed, 184 insertions, 0 deletions
diff --git a/lib/coderay/scanners/lua2.rb b/lib/coderay/scanners/lua2.rb
new file mode 100644
index 0000000..b047629
--- /dev/null
+++ b/lib/coderay/scanners/lua2.rb
@@ -0,0 +1,184 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+ # Scanner for the Lua[http://lua.org] programming lanuage.
+ #
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class Lua2 < RuleBasedScanner
+
+ register_for :lua2
+ file_extension 'lua'
+ title 'Lua'
+
+ # Keywords used in Lua.
+ KEYWORDS = %w[and break do else elseif end
+ for function goto if in
+ local not or repeat return
+ then until while
+ ]
+
+ # Constants set by the Lua core.
+ PREDEFINED_CONSTANTS = %w[false true nil]
+
+ # The expressions contained in this array are parts of Lua’s `basic'
+ # library. Although it’s not entirely necessary to load that library,
+ # it is highly recommended and one would have to provide own implementations
+ # of some of these expressions if one does not do so. They however aren’t
+ # keywords, neither are they constants, but nearly predefined, so they
+ # get tagged as `predefined' rather than anything else.
+ #
+ # This list excludes values of form `_UPPERCASE' because the Lua manual
+ # requires such identifiers to be reserved by Lua anyway and they are
+ # highlighted directly accordingly, without the need for specific
+ # identifiers to be listed here.
+ PREDEFINED_EXPRESSIONS = %w[
+ assert collectgarbage dofile error getmetatable
+ ipairs load loadfile next pairs pcall print
+ rawequal rawget rawlen rawset select setmetatable
+ tonumber tostring type xpcall
+ ]
+
+ # Automatic token kind selection for normal words.
+ IDENT_KIND = CodeRay::WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(PREDEFINED_EXPRESSIONS, :predefined)
+
+ protected
+
+ # Scanner initialization.
+ def setup
+ @state = :initial
+ @brace_depth = 0
+ @num_equals = nil
+ end
+
+ state :initial, :map do
+ on %r/\-\-\[\=*\[/, push(:long_comment, :comment), :delimiter, #--[[ long (possibly multiline) comment ]]
+ set(:num_equals, -> (match) { match.count('=') }) # Number must match for comment end
+ on %r/--.*$/, :comment # --Lua comment
+ on %r/\[=*\[/, push(:long_string, :string), :delimiter, # [[ long (possibly multiline) string ]]
+ set(:num_equals, -> (match) { match.count('=') }) # Number must match for string end
+ on %r/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/, :label # ::goto_label::
+ on %r/_[A-Z]+/, :predefined # _UPPERCASE are names reserved for Lua
+ on check_if { |brace_depth| brace_depth > 0 }, %r/([a-zA-Z_][a-zA-Z0-9_]*) (\s+)?(=)/x, groups(:key, :space, :operator)
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, kind { |match| IDENT_KIND[match] }, push_state { |match, kind, state| # Normal letters (or letters followed by digits)
+ # Extra highlighting for entities following certain keywords
+ if kind == :keyword && match == 'function'
+ state = :function_expected
+ elsif kind == :keyword && match == 'goto'
+ state = :goto_label_expected
+ elsif kind == :keyword && match == 'local'
+ state = :local_var_expected
+ end
+
+ state
+ }
+
+ on %r/\{/, push(:map), kind { |brace_depth| brace_depth > 0 ? :inline_delimiter : :delimiter }, increment(:brace_depth) # Opening table brace {
+ on check_if { |brace_depth| brace_depth == 1 }, %r/\}/, :delimiter, pop, decrement(:brace_depth) # Closing table brace }
+ on check_if { |brace_depth| brace_depth == 0 }, %r/\}/, :error # Mismatched brace
+ on %r/\}/, :inline_delimiter, pop, decrement(:brace_depth)
+
+ on %r/'/, push(:single_quoted_string, :string), :delimiter, set(:start_delim, :match) # String delimiters " and '
+ on %r/"/, push(:double_quoted_string, :string), :delimiter, set(:start_delim, :match)
+ # ↓Prefix hex number ←|→ decimal number
+ on %r/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix, :float # hexadecimal constants have no E power, decimal ones no P power
+ # ↓Prefix hex number ←|→ decimal number
+ on %r/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix, :integer # hexadecimal constants have no E power, decimal ones no P power
+ on %r/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x, :operator # Operators
+ on %r/\s+/, :space # Space
+ end
+
+ state :function_expected do
+ on %r/\(.*?\)/m, :operator, pop_state # x = function() # "Anonymous" function without explicit name
+ on %r/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x, :ident # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :function, pop_state # function foo()
+ on %r/\s+/, :space # Between the `function' keyword and the ident may be any amount of whitespace
+ end
+
+ state :goto_label_expected do
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :label, pop_state
+ on %r/\s+/, :space # Between the `goto' keyword and the label may be any amount of whitespace
+ end
+
+ state :local_var_expected do
+ on %r/function/, :keyword, pop_state, push_state(:function_expected) # local function ...
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :local_variable
+ on %r/,/, :operator
+ on %r/\=/, :operator, pop_state
+ on %r/\n/, :space, pop_state
+ on %r/\s+/, :space
+ end
+
+ state :long_comment do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:comment)
+ on %r/.*/m, :error, pop(:comment)
+ end
+
+ state :long_string do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:string) # Long strings do not interpret any escape sequences
+ on %r/.*/m, :error, pop(:string)
+ end
+
+ state :single_quoted_string do
+ on %r/[^\\'\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/'/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings
+ # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ end
+
+ state :double_quoted_string do
+ on %r/[^\\"\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/"/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings
+ # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ end
+
+ # CodeRay entry hook. Starts parsing.
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options#{ def_line = __LINE__; nil }
+ state = options[:state] || @state
+ brace_depth = @brace_depth
+ num_equals = nil
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+#{ @code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ encoder.end_group :string if [:string, :single_quoted_string, :double_quoted_string].include? state
+ brace_depth.times { encoder.end_group :map }
+
+ encoder
+ end
+ RUBY
+
+ if ENV['PUTS']
+ puts CodeRay.scan(scan_tokens_code, :ruby).terminal
+ puts "callbacks: #{callbacks.size}"
+ end
+ class_eval scan_tokens_code, __FILE__, def_line
+ end
+
+end
+end