# encoding: utf-8 module CodeRay module Scanners # Scanner for the Lua[http://lua.org] programming lanuage. # # The language’s complete syntax is defined in # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], # which is what this scanner tries to conform to. class Lua2 < RuleBasedScanner register_for :lua2 file_extension 'lua' title 'Lua' # Keywords used in Lua. KEYWORDS = %w[and break do else elseif end for function goto if in local not or repeat return then until while ] # Constants set by the Lua core. PREDEFINED_CONSTANTS = %w[false true nil] # The expressions contained in this array are parts of Lua’s `basic' # library. Although it’s not entirely necessary to load that library, # it is highly recommended and one would have to provide own implementations # of some of these expressions if one does not do so. They however aren’t # keywords, neither are they constants, but nearly predefined, so they # get tagged as `predefined' rather than anything else. # # This list excludes values of form `_UPPERCASE' because the Lua manual # requires such identifiers to be reserved by Lua anyway and they are # highlighted directly accordingly, without the need for specific # identifiers to be listed here. PREDEFINED_EXPRESSIONS = %w[ assert collectgarbage dofile error getmetatable ipairs load loadfile next pairs pcall print rawequal rawget rawlen rawset select setmetatable tonumber tostring type xpcall ] # Automatic token kind selection for normal words. IDENT_KIND = CodeRay::WordList.new(:ident). add(KEYWORDS, :keyword). add(PREDEFINED_CONSTANTS, :predefined_constant). add(PREDEFINED_EXPRESSIONS, :predefined) state :initial, :map do on %r/\-\-\[\=*\[/, push(:long_comment, :comment), :delimiter, #--[[ long (possibly multiline) comment ]] set(:num_equals, -> (match) { match.count('=') }) # Number must match for comment end on %r/--.*$/, :comment # --Lua comment on %r/\[=*\[/, push(:long_string, :string), :delimiter, # [[ long (possibly multiline) string ]] set(:num_equals, -> (match) { match.count('=') }) # Number must match for string end on %r/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/, :label # ::goto_label:: on %r/_[A-Z]+/, :predefined # _UPPERCASE are names reserved for Lua on check_if { |brace_depth| brace_depth > 0 }, %r/([a-zA-Z_][a-zA-Z0-9_]*) (\s+)?(=)/x, groups(:key, :space, :operator) on %r/[a-zA-Z_][a-zA-Z0-9_]*/, kind { |match| IDENT_KIND[match] }, push_state { |match, kind| # Normal letters (or letters followed by digits) # Extra highlighting for entities following certain keywords if kind == :keyword && match == 'function' :function_expected elsif kind == :keyword && match == 'goto' :goto_label_expected elsif kind == :keyword && match == 'local' :local_var_expected end } on %r/\{/, push(:map), kind { |brace_depth| brace_depth > 0 ? :inline_delimiter : :delimiter }, increment(:brace_depth) # Opening table brace { on check_if { |brace_depth| brace_depth == 1 }, %r/\}/, :delimiter, pop, decrement(:brace_depth) # Closing table brace } on check_if { |brace_depth| brace_depth == 0 }, %r/\}/, :error # Mismatched brace on %r/\}/, :inline_delimiter, pop, decrement(:brace_depth) on %r/"/, push(:double_quoted_string, :string), :delimiter # String delimiters " and ' on %r/'/, push(:single_quoted_string, :string), :delimiter # ↓Prefix hex number ←|→ decimal number on %r/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix, :float # hexadecimal constants have no E power, decimal ones no P power # ↓Prefix hex number ←|→ decimal number on %r/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix, :integer # hexadecimal constants have no E power, decimal ones no P power on %r/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x, :operator # Operators on %r/\s+/, :space # Space end state :function_expected do on %r/\(.*?\)/m, :operator, pop_state # x = function() # "Anonymous" function without explicit name on %r/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x, :ident # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :function, pop_state # function foo() on %r/\s+/, :space # Between the `function' keyword and the ident may be any amount of whitespace end state :goto_label_expected do on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :label, pop_state on %r/\s+/, :space # Between the `goto' keyword and the label may be any amount of whitespace end state :local_var_expected do on %r/function/, :keyword, pop_state, push_state(:function_expected) # local function ... on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :local_variable on %r/,/, :operator on %r/\=/, :operator, pop_state on %r/\n/, :space, pop_state on %r/\s+/, :space end state :long_comment do on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:comment) on %r/.*/m, :error, pop(:comment) end state :long_string do on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:string) # Long strings do not interpret any escape sequences on %r/.*/m, :error, pop(:string) end state :single_quoted_string do on %r/[^\\'\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char on %r/'/, :delimiter, pop(:string) on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings end state :double_quoted_string do on %r/[^\\"\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char on %r/"/, :delimiter, pop(:string) on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings end protected def setup super @brace_depth = 0 @num_equals = nil end def close_groups encoder, states states.reverse_each do |state| case state when :long_string, :single_quoted_string, :double_quoted_string encoder.end_group :string when :long_comment encoder.end_group :long_comment when :map encoder.end_group :map end end end end end end